test_grammar.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. # coding=utf-8
  2. from sys import version_info
  3. from unittest import TestCase
  4. import pytest
  5. from parsimonious.exceptions import BadGrammar, LeftRecursionError, ParseError, UndefinedLabel, VisitationError
  6. from parsimonious.expressions import Literal, Lookahead, Regex, Sequence, TokenMatcher, is_callable
  7. from parsimonious.grammar import rule_grammar, rule_syntax, RuleVisitor, Grammar, TokenGrammar, LazyReference
  8. from parsimonious.nodes import Node
  9. from parsimonious.utils import Token
  10. class BootstrappingGrammarTests(TestCase):
  11. """Tests for the expressions in the grammar that parses the grammar
  12. definition syntax"""
  13. def test_quantifier(self):
  14. text = '*'
  15. quantifier = rule_grammar['quantifier']
  16. self.assertEqual(quantifier.parse(text),
  17. Node(quantifier, text, 0, 1, children=[
  18. Node(quantifier.members[0], text, 0, 1), Node(rule_grammar['_'], text, 1, 1)]))
  19. text = '?'
  20. self.assertEqual(quantifier.parse(text),
  21. Node(quantifier, text, 0, 1, children=[
  22. Node(quantifier.members[0], text, 0, 1), Node(rule_grammar['_'], text, 1, 1)]))
  23. text = '+'
  24. self.assertEqual(quantifier.parse(text),
  25. Node(quantifier, text, 0, 1, children=[
  26. Node(quantifier.members[0], text, 0, 1), Node(rule_grammar['_'], text, 1, 1)]))
  27. def test_spaceless_literal(self):
  28. text = '"anything but quotes#$*&^"'
  29. spaceless_literal = rule_grammar['spaceless_literal']
  30. self.assertEqual(spaceless_literal.parse(text),
  31. Node(spaceless_literal, text, 0, len(text), children=[
  32. Node(spaceless_literal.members[0], text, 0, len(text))]))
  33. text = r'''r"\""'''
  34. self.assertEqual(spaceless_literal.parse(text),
  35. Node(spaceless_literal, text, 0, 5, children=[
  36. Node(spaceless_literal.members[0], text, 0, 5)]))
  37. def test_regex(self):
  38. text = '~"[a-zA-Z_][a-zA-Z_0-9]*"LI'
  39. regex = rule_grammar['regex']
  40. self.assertEqual(rule_grammar['regex'].parse(text),
  41. Node(regex, text, 0, len(text), children=[
  42. Node(Literal('~'), text, 0, 1),
  43. Node(rule_grammar['spaceless_literal'], text, 1, 25, children=[
  44. Node(rule_grammar['spaceless_literal'].members[0], text, 1, 25)]),
  45. Node(regex.members[2], text, 25, 27),
  46. Node(rule_grammar['_'], text, 27, 27)]))
  47. def test_successes(self):
  48. """Make sure the PEG recognition grammar succeeds on various inputs."""
  49. self.assertTrue(rule_grammar['label'].parse('_'))
  50. self.assertTrue(rule_grammar['label'].parse('jeff'))
  51. self.assertTrue(rule_grammar['label'].parse('_THIS_THING'))
  52. self.assertTrue(rule_grammar['atom'].parse('some_label'))
  53. self.assertTrue(rule_grammar['atom'].parse('"some literal"'))
  54. self.assertTrue(rule_grammar['atom'].parse('~"some regex"i'))
  55. self.assertTrue(rule_grammar['quantified'].parse('~"some regex"i*'))
  56. self.assertTrue(rule_grammar['quantified'].parse('thing+'))
  57. self.assertTrue(rule_grammar['quantified'].parse('"hi"?'))
  58. self.assertTrue(rule_grammar['term'].parse('this'))
  59. self.assertTrue(rule_grammar['term'].parse('that+'))
  60. self.assertTrue(rule_grammar['sequence'].parse('this that? other'))
  61. self.assertTrue(rule_grammar['ored'].parse('this / that+ / "other"'))
  62. # + is higher precedence than &, so 'anded' should match the whole
  63. # thing:
  64. self.assertTrue(rule_grammar['lookahead_term'].parse('&this+'))
  65. self.assertTrue(rule_grammar['expression'].parse('this'))
  66. self.assertTrue(rule_grammar['expression'].parse('this? that other*'))
  67. self.assertTrue(rule_grammar['expression'].parse('&this / that+ / "other"'))
  68. self.assertTrue(rule_grammar['expression'].parse('this / that? / "other"+'))
  69. self.assertTrue(rule_grammar['expression'].parse('this? that other*'))
  70. self.assertTrue(rule_grammar['rule'].parse('this = that\r'))
  71. self.assertTrue(rule_grammar['rule'].parse('this = the? that other* \t\r'))
  72. self.assertTrue(rule_grammar['rule'].parse('the=~"hi*"\n'))
  73. self.assertTrue(rule_grammar.parse('''
  74. this = the? that other*
  75. that = "thing"
  76. the=~"hi*"
  77. other = "ahoy hoy"
  78. '''))
  79. class RuleVisitorTests(TestCase):
  80. """Tests for ``RuleVisitor``
  81. As I write these, Grammar is not yet fully implemented. Normally, there'd
  82. be no reason to use ``RuleVisitor`` directly.
  83. """
  84. def test_round_trip(self):
  85. """Test a simple round trip.
  86. Parse a simple grammar, turn the parse tree into a map of expressions,
  87. and use that to parse another piece of text.
  88. Not everything was implemented yet, but it was a big milestone and a
  89. proof of concept.
  90. """
  91. tree = rule_grammar.parse('''number = ~"[0-9]+"\n''')
  92. rules, default_rule = RuleVisitor().visit(tree)
  93. text = '98'
  94. self.assertEqual(default_rule.parse(text), Node(default_rule, text, 0, 2))
  95. def test_undefined_rule(self):
  96. """Make sure we throw the right exception on undefined rules."""
  97. tree = rule_grammar.parse('boy = howdy\n')
  98. self.assertRaises(UndefinedLabel, RuleVisitor().visit, tree)
  99. def test_optional(self):
  100. tree = rule_grammar.parse('boy = "howdy"?\n')
  101. rules, default_rule = RuleVisitor().visit(tree)
  102. howdy = 'howdy'
  103. # It should turn into a Node from the Optional and another from the
  104. # Literal within.
  105. self.assertEqual(default_rule.parse(howdy), Node(default_rule, howdy, 0, 5, children=[
  106. Node(Literal("howdy"), howdy, 0, 5)]))
  107. def function_rule(text, pos):
  108. """This is an example of a grammar rule implemented as a function, and is
  109. provided as a test fixture."""
  110. token = 'function'
  111. return pos + len(token) if text[pos:].startswith(token) else None
  112. class GrammarTests(TestCase):
  113. """Integration-test ``Grammar``: feed it a PEG and see if it works."""
  114. def method_rule(self, text, pos):
  115. """This is an example of a grammar rule implemented as a method, and is
  116. provided as a test fixture."""
  117. token = 'method'
  118. return pos + len(token) if text[pos:].startswith(token) else None
  119. @staticmethod
  120. def descriptor_rule(text, pos):
  121. """This is an example of a grammar rule implemented as a descriptor,
  122. and is provided as a test fixture."""
  123. token = 'descriptor'
  124. return pos + len(token) if text[pos:].startswith(token) else None
  125. rules = {"descriptor_rule": descriptor_rule}
  126. def test_expressions_from_rules(self):
  127. """Test the ``Grammar`` base class's ability to compile an expression
  128. tree from rules.
  129. That the correct ``Expression`` tree is built is already tested in
  130. ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's
  131. ``_expressions_from_rules`` works.
  132. """
  133. greeting_grammar = Grammar('greeting = "hi" / "howdy"')
  134. tree = greeting_grammar.parse('hi')
  135. self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[
  136. Node(Literal('hi'), 'hi', 0, 2)]))
  137. def test_unicode(self):
  138. """Assert that a ``Grammar`` can convert into a string-formatted series
  139. of rules."""
  140. grammar = Grammar(r"""
  141. bold_text = bold_open text bold_close
  142. text = ~"[A-Z 0-9]*"i
  143. bold_open = "(("
  144. bold_close = "))"
  145. """)
  146. lines = str(grammar).splitlines()
  147. self.assertEqual(lines[0], 'bold_text = bold_open text bold_close')
  148. self.assertTrue("text = ~'[A-Z 0-9]*'i%s" % ('u' if version_info >= (3,) else '')
  149. in lines)
  150. self.assertTrue("bold_open = '(('" in lines)
  151. self.assertTrue("bold_close = '))'" in lines)
  152. self.assertEqual(len(lines), 4)
  153. def test_match(self):
  154. """Make sure partial-matching (with pos) works."""
  155. grammar = Grammar(r"""
  156. bold_text = bold_open text bold_close
  157. text = ~"[A-Z 0-9]*"i
  158. bold_open = "(("
  159. bold_close = "))"
  160. """)
  161. s = ' ((boo))yah'
  162. self.assertEqual(grammar.match(s, pos=1), Node(grammar['bold_text'], s, 1, 8, children=[
  163. Node(grammar['bold_open'], s, 1, 3),
  164. Node(grammar['text'], s, 3, 6),
  165. Node(grammar['bold_close'], s, 6, 8)]))
  166. def test_bad_grammar(self):
  167. """Constructing a Grammar with bad rules should raise ParseError."""
  168. self.assertRaises(ParseError, Grammar, 'just a bunch of junk')
  169. def test_comments(self):
  170. """Test tolerance of comments and blank lines in and around rules."""
  171. grammar = Grammar(r"""# This is a grammar.
  172. # It sure is.
  173. bold_text = stars text stars # nice
  174. text = ~"[A-Z 0-9]*"i #dude
  175. stars = "**"
  176. # Pretty good
  177. #Oh yeah.#""") # Make sure a comment doesn't need a
  178. # \n or \r to end.
  179. self.assertEqual(list(sorted(str(grammar).splitlines())),
  180. ['''bold_text = stars text stars''',
  181. # TODO: Unicode flag is on by default in Python 3. I wonder if we
  182. # should turn it on all the time in Parsimonious.
  183. """stars = '**'""",
  184. '''text = ~'[A-Z 0-9]*'i%s''' % ('u' if version_info >= (3,)
  185. else '')])
  186. def test_multi_line(self):
  187. """Make sure we tolerate all sorts of crazy line breaks and comments in
  188. the middle of rules."""
  189. grammar = Grammar("""
  190. bold_text = bold_open # commenty comment
  191. text # more comment
  192. bold_close
  193. text = ~"[A-Z 0-9]*"i
  194. bold_open = "((" bold_close = "))"
  195. """)
  196. self.assertTrue(grammar.parse('((booyah))') is not None)
  197. def test_not(self):
  198. """Make sure "not" predicates get parsed and work properly."""
  199. grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''')
  200. self.assertRaises(ParseError, grammar.parse, 'arp')
  201. self.assertTrue(grammar.parse('argle') is not None)
  202. def test_lookahead(self):
  203. grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
  204. self.assertRaises(ParseError, grammar.parse, 'burp')
  205. s = 'arp'
  206. self.assertEqual(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[
  207. Node(Lookahead(Literal('a')), s, 0, 0),
  208. Node(Regex(r'[a-z]+'), s, 0, 3)]))
  209. def test_parens(self):
  210. grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
  211. # Make sure it's not as if the parens aren't there:
  212. self.assertRaises(ParseError, grammar.parse, 'chitty bangbang')
  213. s = 'chitty bang bang'
  214. self.assertEqual(str(grammar.parse(s)),
  215. """<Node called "sequence" matching "chitty bang bang">
  216. <Node matching "chitty">
  217. <Node matching " bang bang">
  218. <Node matching " bang">
  219. <Node matching " ">
  220. <Node matching "bang">
  221. <Node matching " bang">
  222. <Node matching " ">
  223. <Node matching "bang">""")
  224. def test_resolve_refs_order(self):
  225. """Smoke-test a circumstance where lazy references don't get resolved."""
  226. grammar = Grammar("""
  227. expression = "(" terms ")"
  228. terms = term+
  229. term = number
  230. number = ~r"[0-9]+"
  231. """)
  232. grammar.parse('(34)')
  233. def test_resolve_refs_completeness(self):
  234. """Smoke-test another circumstance where lazy references don't get resolved."""
  235. grammar = Grammar(r"""
  236. block = "{" _ item* "}" _
  237. # An item is an element of a block.
  238. item = number / word / block / paren
  239. # Parens are for delimiting subexpressions.
  240. paren = "(" _ item* ")" _
  241. # Words are barewords, unquoted things, other than literals, that can live
  242. # in lists. We may renege on some of these chars later, especially ".". We
  243. # may add Unicode.
  244. word = spaceless_word _
  245. spaceless_word = ~r"[-a-z`~!@#$%^&*_+=|\\;<>,.?][-a-z0-9`~!@#$%^&*_+=|\\;<>,.?]*"i
  246. number = ~r"[0-9]+" _ # There are decimals and strings and other stuff back on the "parsing" branch, once you get this working.
  247. _ = meaninglessness*
  248. meaninglessness = whitespace
  249. whitespace = ~r"\s+"
  250. """)
  251. grammar.parse('{log (add 3 to 5)}')
  252. def test_infinite_loop(self):
  253. """Smoke-test a grammar that was causing infinite loops while building.
  254. This was going awry because the "int" rule was never getting marked as
  255. resolved, so it would just keep trying to resolve it over and over.
  256. """
  257. Grammar("""
  258. digits = digit+
  259. int = digits
  260. digit = ~"[0-9]"
  261. number = int
  262. main = number
  263. """)
  264. def test_circular_toplevel_reference(self):
  265. with pytest.raises(VisitationError):
  266. Grammar("""
  267. foo = bar
  268. bar = foo
  269. """)
  270. with pytest.raises(VisitationError):
  271. Grammar("""
  272. foo = foo
  273. bar = foo
  274. """)
  275. with pytest.raises(VisitationError):
  276. Grammar("""
  277. foo = bar
  278. bar = baz
  279. baz = foo
  280. """)
  281. def test_right_recursive(self):
  282. """Right-recursive refs should resolve."""
  283. grammar = Grammar("""
  284. digits = digit digits?
  285. digit = ~r"[0-9]"
  286. """)
  287. self.assertTrue(grammar.parse('12') is not None)
  288. def test_badly_circular(self):
  289. """Uselessly circular references should be detected by the grammar
  290. compiler."""
  291. self.skipTest('We have yet to make the grammar compiler detect these.')
  292. Grammar("""
  293. foo = bar
  294. bar = foo
  295. """)
  296. def test_parens_with_leading_whitespace(self):
  297. """Make sure a parenthesized expression is allowed to have leading
  298. whitespace when nested directly inside another."""
  299. Grammar("""foo = ( ("c") )""").parse('c')
  300. def test_single_quoted_literals(self):
  301. Grammar("""foo = 'a' '"'""").parse('a"')
  302. def test_simple_custom_rules(self):
  303. """Run 2-arg custom-coded rules through their paces."""
  304. grammar = Grammar("""
  305. bracketed_digit = start digit end
  306. start = '['
  307. end = ']'""",
  308. digit=lambda text, pos:
  309. (pos + 1) if text[pos].isdigit() else None)
  310. s = '[6]'
  311. self.assertEqual(grammar.parse(s),
  312. Node(grammar['bracketed_digit'], s, 0, 3, children=[
  313. Node(grammar['start'], s, 0, 1),
  314. Node(grammar['digit'], s, 1, 2),
  315. Node(grammar['end'], s, 2, 3)]))
  316. def test_complex_custom_rules(self):
  317. """Run 5-arg custom rules through their paces.
  318. Incidentally tests returning an actual Node from the custom rule.
  319. """
  320. grammar = Grammar("""
  321. bracketed_digit = start digit end
  322. start = '['
  323. end = ']'
  324. real_digit = '6'""",
  325. # In this particular implementation of the digit rule, no node is
  326. # generated for `digit`; it falls right through to `real_digit`.
  327. # I'm not sure if this could lead to problems; I can't think of
  328. # any, but it's probably not a great idea.
  329. digit=lambda text, pos, cache, error, grammar:
  330. grammar['real_digit'].match_core(text, pos, cache, error))
  331. s = '[6]'
  332. self.assertEqual(grammar.parse(s),
  333. Node(grammar['bracketed_digit'], s, 0, 3, children=[
  334. Node(grammar['start'], s, 0, 1),
  335. Node(grammar['real_digit'], s, 1, 2),
  336. Node(grammar['end'], s, 2, 3)]))
  337. def test_lazy_custom_rules(self):
  338. """Make sure LazyReferences manually shoved into custom rules are
  339. resolved.
  340. Incidentally test passing full-on Expressions as custom rules and
  341. having a custom rule as the default one.
  342. """
  343. grammar = Grammar("""
  344. four = '4'
  345. five = '5'""",
  346. forty_five=Sequence(LazyReference('four'),
  347. LazyReference('five'),
  348. name='forty_five')).default('forty_five')
  349. s = '45'
  350. self.assertEqual(grammar.parse(s),
  351. Node(grammar['forty_five'], s, 0, 2, children=[
  352. Node(grammar['four'], s, 0, 1),
  353. Node(grammar['five'], s, 1, 2)]))
  354. def test_unconnected_custom_rules(self):
  355. """Make sure custom rules that aren't hooked to any other rules still
  356. get included in the grammar and that lone ones get set as the
  357. default.
  358. Incidentally test Grammar's `rules` default arg.
  359. """
  360. grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char')
  361. s = '4'
  362. self.assertEqual(grammar.parse(s),
  363. Node(grammar['one_char'], s, 0, 1))
  364. def test_callability_of_routines(self):
  365. self.assertTrue(is_callable(function_rule))
  366. self.assertTrue(is_callable(self.method_rule))
  367. self.assertTrue(is_callable(self.rules['descriptor_rule']))
  368. def test_callability_custom_rules(self):
  369. """Confirms that functions, methods and method descriptors can all be
  370. used to supply custom grammar rules.
  371. """
  372. grammar = Grammar("""
  373. default = function method descriptor
  374. """,
  375. function=function_rule,
  376. method=self.method_rule,
  377. descriptor=self.rules['descriptor_rule'],
  378. )
  379. result = grammar.parse('functionmethoddescriptor')
  380. rule_names = [node.expr.name for node in result.children]
  381. self.assertEqual(rule_names, ['function', 'method', 'descriptor'])
  382. def test_lazy_default_rule(self):
  383. """Make sure we get an actual rule set as our default rule, even when
  384. the first rule has forward references and is thus a LazyReference at
  385. some point during grammar compilation.
  386. """
  387. grammar = Grammar(r"""
  388. styled_text = text
  389. text = "hi"
  390. """)
  391. self.assertEqual(grammar.parse('hi'), Node(grammar['text'], 'hi', 0, 2))
  392. def test_immutable_grammar(self):
  393. """Make sure that a Grammar is immutable after being created."""
  394. grammar = Grammar(r"""
  395. foo = 'bar'
  396. """)
  397. def mod_grammar(grammar):
  398. grammar['foo'] = 1
  399. self.assertRaises(TypeError, mod_grammar, [grammar])
  400. def mod_grammar(grammar):
  401. new_grammar = Grammar(r"""
  402. baz = 'biff'
  403. """)
  404. grammar.update(new_grammar)
  405. self.assertRaises(AttributeError, mod_grammar, [grammar])
  406. def test_repr(self):
  407. self.assertTrue(repr(Grammar(r'foo = "a"')))
  408. def test_rule_ordering_is_preserved(self):
  409. grammar = Grammar('\n'.join('r%s = "something"' % i for i in range(100)))
  410. self.assertEqual(
  411. list(grammar.keys()),
  412. ['r%s' % i for i in range(100)])
  413. def test_rule_ordering_is_preserved_on_shallow_copies(self):
  414. grammar = Grammar('\n'.join('r%s = "something"' % i for i in range(100)))._copy()
  415. self.assertEqual(
  416. list(grammar.keys()),
  417. ['r%s' % i for i in range(100)])
  418. def test_repetitions(self):
  419. grammar = Grammar(r'''
  420. left_missing = "a"{,5}
  421. right_missing = "a"{5,}
  422. exact = "a"{5}
  423. range = "a"{2,5}
  424. optional = "a"?
  425. plus = "a"+
  426. star = "a"*
  427. ''')
  428. should_parse = [
  429. ("left_missing", ["a" * i for i in range(6)]),
  430. ("right_missing", ["a" * i for i in range(5, 8)]),
  431. ("exact", ["a" * 5]),
  432. ("range", ["a" * i for i in range(2, 6)]),
  433. ("optional", ["", "a"]),
  434. ("plus", ["a", "aa"]),
  435. ("star", ["", "a", "aa"]),
  436. ]
  437. for rule, examples in should_parse:
  438. for example in examples:
  439. assert grammar[rule].parse(example)
  440. should_not_parse = [
  441. ("left_missing", ["a" * 6]),
  442. ("right_missing", ["a" * i for i in range(5)]),
  443. ("exact", ["a" * i for i in list(range(5)) + list(range(6, 10))]),
  444. ("range", ["a" * i for i in list(range(2)) + list(range(6, 10))]),
  445. ("optional", ["aa"]),
  446. ("plus", [""]),
  447. ("star", ["b"]),
  448. ]
  449. for rule, examples in should_not_parse:
  450. for example in examples:
  451. with pytest.raises(ParseError):
  452. grammar[rule].parse(example)
  453. def test_equal(self):
  454. grammar_def = (r"""
  455. x = y / z / ""
  456. y = "y" x
  457. z = "z" x
  458. """)
  459. assert Grammar(grammar_def) == Grammar(grammar_def)
  460. self.assertEqual(Grammar(rule_syntax), Grammar(rule_syntax))
  461. self.assertNotEqual(Grammar('expr = ~"[a-z]{1,3}"'), Grammar('expr = ~"[a-z]{2,3}"'))
  462. self.assertNotEqual(Grammar('expr = ~"[a-z]{1,3}"'), Grammar('expr = ~"[a-z]{1,4}"'))
  463. self.assertNotEqual(Grammar('expr = &"a"'), Grammar('expr = !"a"'))
  464. class TokenGrammarTests(TestCase):
  465. """Tests for the TokenGrammar class and associated machinery"""
  466. def test_parse_success(self):
  467. """Token literals should work."""
  468. s = [Token('token1'), Token('token2')]
  469. grammar = TokenGrammar("""
  470. foo = token1 "token2"
  471. token1 = "token1"
  472. """)
  473. self.assertEqual(grammar.parse(s),
  474. Node(grammar['foo'], s, 0, 2, children=[
  475. Node(grammar['token1'], s, 0, 1),
  476. Node(TokenMatcher('token2'), s, 1, 2)]))
  477. def test_parse_failure(self):
  478. """Parse failures should work normally with token literals."""
  479. grammar = TokenGrammar("""
  480. foo = "token1" "token2"
  481. """)
  482. with pytest.raises(ParseError) as e:
  483. grammar.parse([Token('tokenBOO'), Token('token2')])
  484. assert "Rule 'foo' didn't match at" in str(e.value)
  485. def test_token_repr(self):
  486. t = Token('💣')
  487. self.assertTrue(isinstance(t.__repr__(), str))
  488. self.assertEqual('<Token "💣">', t.__repr__())
  489. def test_token_star_plus_expressions(self):
  490. a = Token("a")
  491. b = Token("b")
  492. grammar = TokenGrammar("""
  493. foo = "a"*
  494. bar = "a"+
  495. """)
  496. assert grammar["foo"].parse([]) is not None
  497. assert grammar["foo"].parse([a]) is not None
  498. assert grammar["foo"].parse([a, a]) is not None
  499. with pytest.raises(ParseError):
  500. grammar["foo"].parse([a, b])
  501. with pytest.raises(ParseError):
  502. grammar["foo"].parse([b])
  503. assert grammar["bar"].parse([a]) is not None
  504. with pytest.raises(ParseError):
  505. grammar["bar"].parse([a, b])
  506. with pytest.raises(ParseError):
  507. grammar["bar"].parse([b])
  508. def test_precedence_of_string_modifiers():
  509. # r"strings", etc. should be parsed as a single literal, not r followed
  510. # by a string literal.
  511. g = Grammar(r"""
  512. escaped_bell = r"\b"
  513. r = "irrelevant"
  514. """)
  515. assert isinstance(g["escaped_bell"], Literal)
  516. assert g["escaped_bell"].literal == "\\b"
  517. with pytest.raises(ParseError):
  518. g.parse("irrelevant\b")
  519. g2 = Grammar(r"""
  520. escaped_bell = r"\b"
  521. """)
  522. assert g2.parse("\\b")
  523. def test_binary_grammar():
  524. g = Grammar(r"""
  525. file = header body terminator
  526. header = b"\xFF" length b"~"
  527. length = ~rb"\d+"
  528. body = ~b"[^\xFF]*"
  529. terminator = b"\xFF"
  530. """)
  531. length = 22
  532. assert g.parse(b"\xff22~" + (b"a" * 22) + b"\xff") is not None
  533. def test_inconsistent_string_types_in_grammar():
  534. with pytest.raises(VisitationError) as e:
  535. Grammar(r"""
  536. foo = b"foo"
  537. bar = "bar"
  538. """)
  539. assert e.value.original_class is BadGrammar
  540. with pytest.raises(VisitationError) as e:
  541. Grammar(r"""
  542. foo = ~b"foo"
  543. bar = "bar"
  544. """)
  545. assert e.value.original_class is BadGrammar
  546. # The following should parse without errors because they use the same
  547. # string types:
  548. Grammar(r"""
  549. foo = b"foo"
  550. bar = b"bar"
  551. """)
  552. Grammar(r"""
  553. foo = "foo"
  554. bar = "bar"
  555. """)
  556. def test_left_associative():
  557. # Regression test for https://github.com/erikrose/parsimonious/issues/209
  558. language_grammar = r"""
  559. expression = operator_expression / non_operator_expression
  560. non_operator_expression = number_expression
  561. operator_expression = expression "+" non_operator_expression
  562. number_expression = ~"[0-9]+"
  563. """
  564. grammar = Grammar(language_grammar)
  565. with pytest.raises(LeftRecursionError) as e:
  566. grammar["operator_expression"].parse("1+2")
  567. assert "Parsimonious is a packrat parser, so it can't handle left recursion." in str(e.value)