test_expressions.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. # coding=utf-8
  2. from unittest import TestCase
  3. from parsimonious.exceptions import ParseError, IncompleteParseError
  4. from parsimonious.expressions import (Literal, Regex, Sequence, OneOf, Not,
  5. Quantifier, Optional, ZeroOrMore, OneOrMore, Expression)
  6. from parsimonious.grammar import Grammar, rule_grammar
  7. from parsimonious.nodes import Node
  8. class LengthTests(TestCase):
  9. """Tests for returning the right lengths
  10. I wrote these before parse tree generation was implemented. They're
  11. partially redundant with TreeTests.
  12. """
  13. def len_eq(self, node, length):
  14. """Return whether the match lengths of 2 nodes are equal.
  15. Makes tests shorter and lets them omit positional stuff they don't care
  16. about.
  17. """
  18. node_length = None if node is None else node.end - node.start
  19. assert node_length == length
  20. def test_regex(self):
  21. self.len_eq(Literal('hello').match('ehello', 1), 5) # simple
  22. self.len_eq(Regex('hello*').match('hellooo'), 7) # *
  23. self.assertRaises(ParseError, Regex('hello*').match, 'goodbye') # no match
  24. self.len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)
  25. def test_sequence(self):
  26. self.len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'), 12) # succeed
  27. self.assertRaises(ParseError, Sequence(Regex('hi*'), Literal('lo'),
  28. Regex('.ingo')).match, 'hiiiilobing') # don't
  29. self.len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5) # non-0 pos
  30. def test_one_of(self):
  31. self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('aaa'), 3) # first alternative
  32. self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('bbaaa'), 2) # second
  33. self.assertRaises(ParseError, OneOf(Literal('aaa'), Literal('bb')).match, 'aa') # no match
  34. def test_not(self):
  35. self.len_eq(Not(Regex('.')).match(''), 0) # match
  36. self.assertRaises(ParseError, Not(Regex('.')).match, 'Hi') # don't
  37. def test_optional(self):
  38. self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'), 1) # contained expr fails
  39. self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'), 2) # contained expr succeeds
  40. self.len_eq(Optional(Literal('a')).match('aa'), 1)
  41. self.len_eq(Optional(Literal('a')).match('bb'), 0)
  42. def test_zero_or_more(self):
  43. self.len_eq(ZeroOrMore(Literal('b')).match(''), 0) # zero
  44. self.len_eq(ZeroOrMore(Literal('b')).match('bbb'), 3) # more
  45. self.len_eq(Regex('^').match(''), 0) # Validate the next test.
  46. # Try to make it loop infinitely using a zero-length contained expression:
  47. self.len_eq(ZeroOrMore(Regex('^')).match(''), 0)
  48. def test_one_or_more(self):
  49. self.len_eq(OneOrMore(Literal('b')).match('b'), 1) # one
  50. self.len_eq(OneOrMore(Literal('b')).match('bbb'), 3) # more
  51. self.len_eq(OneOrMore(Literal('b'), min=3).match('bbb'), 3) # with custom min; success
  52. self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbb'), 4) # with custom min and max; success
  53. self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbbbb'), 5) # with custom min and max; success
  54. self.assertRaises(ParseError, OneOrMore(Literal('b'), min=3).match, 'bb') # with custom min; failure
  55. self.assertRaises(ParseError, Quantifier(Literal('b'), min=3, max=5).match, 'bb') # with custom min and max; failure
  56. self.len_eq(OneOrMore(Regex('^')).match('bb'), 0) # attempt infinite loop
  57. class TreeTests(TestCase):
  58. """Tests for building the right trees
  59. We have only to test successes here; failures (None-returning cases) are
  60. covered above.
  61. """
  62. def test_simple_node(self):
  63. """Test that leaf expressions like ``Literal`` make the right nodes."""
  64. h = Literal('hello', name='greeting')
  65. self.assertEqual(h.match('hello'), Node(h, 'hello', 0, 5))
  66. def test_sequence_nodes(self):
  67. """Assert that ``Sequence`` produces nodes with the right children."""
  68. s = Sequence(Literal('heigh', name='greeting1'),
  69. Literal('ho', name='greeting2'), name='dwarf')
  70. text = 'heighho'
  71. self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5),
  72. Node(s.members[1], text, 5, 7)]))
  73. def test_one_of(self):
  74. """``OneOf`` should return its own node, wrapping the child that succeeds."""
  75. o = OneOf(Literal('a', name='lit'), name='one_of')
  76. text = 'aa'
  77. self.assertEqual(o.match(text), Node(o, text, 0, 1, children=[
  78. Node(o.members[0], text, 0, 1)]))
  79. def test_optional(self):
  80. """``Optional`` should return its own node wrapping the succeeded child."""
  81. expr = Optional(Literal('a', name='lit'), name='opt')
  82. text = 'a'
  83. self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
  84. Node(expr.members[0], text, 0, 1)]))
  85. # Test failure of the Literal inside the Optional; the
  86. # LengthTests.test_optional is ambiguous for that.
  87. text = ''
  88. self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
  89. def test_zero_or_more_zero(self):
  90. """Test the 0 case of ``ZeroOrMore``; it should still return a node."""
  91. expr = ZeroOrMore(Literal('a'), name='zero')
  92. text = ''
  93. self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
  94. def test_one_or_more_one(self):
  95. """Test the 1 case of ``OneOrMore``; it should return a node with a child."""
  96. expr = OneOrMore(Literal('a', name='lit'), name='one')
  97. text = 'a'
  98. self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
  99. Node(expr.members[0], text, 0, 1)]))
  100. # Things added since Grammar got implemented are covered in integration
  101. # tests in test_grammar.
  102. class ParseTests(TestCase):
  103. """Tests for the ``parse()`` method"""
  104. def test_parse_success(self):
  105. """Make sure ``parse()`` returns the tree on success.
  106. There's not much more than that to test that we haven't already vetted
  107. above.
  108. """
  109. expr = OneOrMore(Literal('a', name='lit'), name='more')
  110. text = 'aa'
  111. self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[
  112. Node(expr.members[0], text, 0, 1),
  113. Node(expr.members[0], text, 1, 2)]))
  114. class ErrorReportingTests(TestCase):
  115. """Tests for reporting parse errors"""
  116. def test_inner_rule_succeeding(self):
  117. """Make sure ``parse()`` fails and blames the
  118. rightward-progressing-most named Expression when an Expression isn't
  119. satisfied.
  120. Make sure ParseErrors have nice Unicode representations.
  121. """
  122. grammar = Grammar("""
  123. bold_text = open_parens text close_parens
  124. open_parens = "(("
  125. text = ~"[a-zA-Z]+"
  126. close_parens = "))"
  127. """)
  128. text = '((fred!!'
  129. try:
  130. grammar.parse(text)
  131. except ParseError as error:
  132. self.assertEqual(error.pos, 6)
  133. self.assertEqual(error.expr, grammar['close_parens'])
  134. self.assertEqual(error.text, text)
  135. self.assertEqual(str(error), "Rule 'close_parens' didn't match at '!!' (line 1, column 7).")
  136. def test_rewinding(self):
  137. """Make sure rewinding the stack and trying an alternative (which
  138. progresses farther) from a higher-level rule can blame an expression
  139. within the alternative on failure.
  140. There's no particular reason I suspect this wouldn't work, but it's a
  141. more real-world example than the no-alternative cases already tested.
  142. """
  143. grammar = Grammar("""
  144. formatted_text = bold_text / weird_text
  145. bold_text = open_parens text close_parens
  146. weird_text = open_parens text "!!" bork
  147. bork = "bork"
  148. open_parens = "(("
  149. text = ~"[a-zA-Z]+"
  150. close_parens = "))"
  151. """)
  152. text = '((fred!!'
  153. try:
  154. grammar.parse(text)
  155. except ParseError as error:
  156. self.assertEqual(error.pos, 8)
  157. self.assertEqual(error.expr, grammar['bork'])
  158. self.assertEqual(error.text, text)
  159. def test_no_named_rule_succeeding(self):
  160. """Make sure ParseErrors have sane printable representations even if we
  161. never succeeded in matching any named expressions."""
  162. grammar = Grammar('''bork = "bork"''')
  163. try:
  164. grammar.parse('snork')
  165. except ParseError as error:
  166. self.assertEqual(error.pos, 0)
  167. self.assertEqual(error.expr, grammar['bork'])
  168. self.assertEqual(error.text, 'snork')
  169. def test_parse_with_leftovers(self):
  170. """Make sure ``parse()`` reports where we started failing to match,
  171. even if a partial match was successful."""
  172. grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
  173. try:
  174. grammar.parse('chitty bangbang')
  175. except IncompleteParseError as error:
  176. self.assertEqual(str(
  177. error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
  178. def test_favoring_named_rules(self):
  179. """Named rules should be used in error messages in favor of anonymous
  180. ones, even if those are rightward-progressing-more, and even if the
  181. failure starts at position 0."""
  182. grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
  183. try:
  184. grammar.parse('burp')
  185. except ParseError as error:
  186. self.assertEqual(str(error), "Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1).")
  187. def test_line_and_column(self):
  188. """Make sure we got the line and column computation right."""
  189. grammar = Grammar(r"""
  190. whee_lah = whee "\n" lah "\n"
  191. whee = "whee"
  192. lah = "lah"
  193. """)
  194. try:
  195. grammar.parse('whee\nlahGOO')
  196. except ParseError as error:
  197. # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
  198. # didn't match". That's not the greatest. Fix that, then fix this.
  199. self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
  200. class RepresentationTests(TestCase):
  201. """Tests for str(), unicode(), and repr() of expressions"""
  202. def test_unicode_crash(self):
  203. """Make sure matched unicode strings don't crash ``__str__``."""
  204. grammar = Grammar(r'string = ~r"\S+"u')
  205. str(grammar.parse('中文'))
  206. def test_unicode(self):
  207. """Smoke-test the conversion of expressions to bits of rules.
  208. A slightly more comprehensive test of the actual values is in
  209. ``GrammarTests.test_unicode``.
  210. """
  211. str(rule_grammar)
  212. def test_unicode_keep_parens(self):
  213. """Make sure converting an expression to unicode doesn't strip
  214. parenthesis.
  215. """
  216. # ZeroOrMore
  217. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs")* "spam"')),
  218. "foo = 'bar' ('baz' 'eggs')* 'spam'")
  219. # Quantifiers
  220. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,4} "spam"')),
  221. "foo = 'bar' ('baz' 'eggs'){2,4} 'spam'")
  222. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,} "spam"')),
  223. "foo = 'bar' ('baz' 'eggs'){2,} 'spam'")
  224. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){1,} "spam"')),
  225. "foo = 'bar' ('baz' 'eggs')+ 'spam'")
  226. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){,4} "spam"')),
  227. "foo = 'bar' ('baz' 'eggs'){,4} 'spam'")
  228. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,1} "spam"')),
  229. "foo = 'bar' ('baz' 'eggs')? 'spam'")
  230. self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,} "spam"')),
  231. "foo = 'bar' ('baz' 'eggs')* 'spam'")
  232. # OneOf
  233. self.assertEqual(str(Grammar('foo = "bar" ("baz" / "eggs") "spam"')),
  234. "foo = 'bar' ('baz' / 'eggs') 'spam'")
  235. # Lookahead
  236. self.assertEqual(str(Grammar('foo = "bar" &("baz" "eggs") "spam"')),
  237. "foo = 'bar' &('baz' 'eggs') 'spam'")
  238. # Multiple sequences
  239. self.assertEqual(str(Grammar('foo = ("bar" "baz") / ("baff" "bam")')),
  240. "foo = ('bar' 'baz') / ('baff' 'bam')")
  241. def test_unicode_surrounding_parens(self):
  242. """
  243. Make sure there are no surrounding parens around the entire
  244. right-hand side of an expression (as they're unnecessary).
  245. """
  246. self.assertEqual(str(Grammar('foo = ("foo" ("bar" "baz"))')),
  247. "foo = 'foo' ('bar' 'baz')")
  248. class SlotsTests(TestCase):
  249. """Tests to do with __slots__"""
  250. def test_subclassing(self):
  251. """Make sure a subclass of a __slots__-less class can introduce new
  252. slots itself.
  253. This isn't supposed to work, according to the language docs:
  254. When inheriting from a class without __slots__, the __dict__
  255. attribute of that class will always be accessible, so a __slots__
  256. definition in the subclass is meaningless.
  257. But it does.
  258. """
  259. class Smoo(Quantifier):
  260. __slots__ = ['smoo']
  261. def __init__(self):
  262. self.smoo = 'smoo'
  263. smoo = Smoo()
  264. self.assertEqual(smoo.__dict__, {}) # has a __dict__ but with no smoo in it
  265. self.assertEqual(smoo.smoo, 'smoo') # The smoo attr ended up in a slot.