benchmarks.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. """Benchmarks for Parsimonious
  2. Run these with ``python parsimonious/tests/benchmarks.py``. They don't run during
  3. normal test runs because they're not tests--they don't assert anything. Also,
  4. they're a bit slow.
  5. These differ from the ones in test_benchmarks in that these are meant to be
  6. compared from revision to revision of Parsimonious to make sure we're not
  7. getting slower. test_benchmarks simply makes sure our choices among
  8. implementation alternatives remain valid.
  9. """
  10. from __future__ import print_function
  11. import gc
  12. from timeit import repeat
  13. from parsimonious.grammar import Grammar
  14. def test_not_really_json_parsing():
  15. """As a baseline for speed, parse some JSON.
  16. I have no reason to believe that JSON is a particularly representative or
  17. revealing grammar to test with. Also, this is a naive, unoptimized,
  18. incorrect grammar, so don't use it as a basis for comparison with other
  19. parsers. It's just meant to compare across versions of Parsimonious.
  20. """
  21. father = """{
  22. "id" : 1,
  23. "married" : true,
  24. "name" : "Larry Lopez",
  25. "sons" : null,
  26. "daughters" : [
  27. {
  28. "age" : 26,
  29. "name" : "Sandra"
  30. },
  31. {
  32. "age" : 25,
  33. "name" : "Margaret"
  34. },
  35. {
  36. "age" : 6,
  37. "name" : "Mary"
  38. }
  39. ]
  40. }"""
  41. more_fathers = ','.join([father] * 60)
  42. json = '{"fathers" : [' + more_fathers + ']}'
  43. grammar = Grammar(r"""
  44. value = space (string / number / object / array / true_false_null)
  45. space
  46. object = "{" members "}"
  47. members = (pair ("," pair)*)?
  48. pair = string ":" value
  49. array = "[" elements "]"
  50. elements = (value ("," value)*)?
  51. true_false_null = "true" / "false" / "null"
  52. string = space "\"" chars "\"" space
  53. chars = ~"[^\"]*" # TODO implement the real thing
  54. number = (int frac exp) / (int exp) / (int frac) / int
  55. int = "-"? ((digit1to9 digits) / digit)
  56. frac = "." digits
  57. exp = e digits
  58. digits = digit+
  59. e = "e+" / "e-" / "e" / "E+" / "E-" / "E"
  60. digit1to9 = ~"[1-9]"
  61. digit = ~"[0-9]"
  62. space = ~"\s*"
  63. """)
  64. # These number and repetition values seem to keep results within 5% of the
  65. # difference between min and max. We get more consistent results running a
  66. # bunch of single-parse tests and taking the min rather than upping the
  67. # NUMBER and trying to stomp out the outliers with averaging.
  68. NUMBER = 1
  69. REPEAT = 5
  70. total_seconds = min(repeat(lambda: grammar.parse(json),
  71. lambda: gc.enable(), # so we take into account how we treat the GC
  72. repeat=REPEAT,
  73. number=NUMBER))
  74. seconds_each = total_seconds / NUMBER
  75. kb = len(json) / 1024.0
  76. print('Took %.3fs to parse %.1fKB: %.0fKB/s.' % (seconds_each,
  77. kb,
  78. kb / seconds_each))
  79. if __name__ == "__main__":
  80. test_not_really_json_parsing()