| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- """Benchmarks for Parsimonious
- Run these with ``python parsimonious/tests/benchmarks.py``. They don't run during
- normal test runs because they're not tests--they don't assert anything. Also,
- they're a bit slow.
- These differ from the ones in test_benchmarks in that these are meant to be
- compared from revision to revision of Parsimonious to make sure we're not
- getting slower. test_benchmarks simply makes sure our choices among
- implementation alternatives remain valid.
- """
- from __future__ import print_function
- import gc
- from timeit import repeat
- from parsimonious.grammar import Grammar
- def test_not_really_json_parsing():
- """As a baseline for speed, parse some JSON.
- I have no reason to believe that JSON is a particularly representative or
- revealing grammar to test with. Also, this is a naive, unoptimized,
- incorrect grammar, so don't use it as a basis for comparison with other
- parsers. It's just meant to compare across versions of Parsimonious.
- """
- father = """{
- "id" : 1,
- "married" : true,
- "name" : "Larry Lopez",
- "sons" : null,
- "daughters" : [
- {
- "age" : 26,
- "name" : "Sandra"
- },
- {
- "age" : 25,
- "name" : "Margaret"
- },
- {
- "age" : 6,
- "name" : "Mary"
- }
- ]
- }"""
- more_fathers = ','.join([father] * 60)
- json = '{"fathers" : [' + more_fathers + ']}'
- grammar = Grammar(r"""
- value = space (string / number / object / array / true_false_null)
- space
- object = "{" members "}"
- members = (pair ("," pair)*)?
- pair = string ":" value
- array = "[" elements "]"
- elements = (value ("," value)*)?
- true_false_null = "true" / "false" / "null"
- string = space "\"" chars "\"" space
- chars = ~"[^\"]*" # TODO implement the real thing
- number = (int frac exp) / (int exp) / (int frac) / int
- int = "-"? ((digit1to9 digits) / digit)
- frac = "." digits
- exp = e digits
- digits = digit+
- e = "e+" / "e-" / "e" / "E+" / "E-" / "E"
- digit1to9 = ~"[1-9]"
- digit = ~"[0-9]"
- space = ~"\s*"
- """)
- # These number and repetition values seem to keep results within 5% of the
- # difference between min and max. We get more consistent results running a
- # bunch of single-parse tests and taking the min rather than upping the
- # NUMBER and trying to stomp out the outliers with averaging.
- NUMBER = 1
- REPEAT = 5
- total_seconds = min(repeat(lambda: grammar.parse(json),
- lambda: gc.enable(), # so we take into account how we treat the GC
- repeat=REPEAT,
- number=NUMBER))
- seconds_each = total_seconds / NUMBER
- kb = len(json) / 1024.0
- print('Took %.3fs to parse %.1fKB: %.0fKB/s.' % (seconds_each,
- kb,
- kb / seconds_each))
- if __name__ == "__main__":
- test_not_really_json_parsing()
|