| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- # coding=utf-8
- from unittest import TestCase
- from parsimonious.exceptions import ParseError, IncompleteParseError
- from parsimonious.expressions import (Literal, Regex, Sequence, OneOf, Not,
- Quantifier, Optional, ZeroOrMore, OneOrMore, Expression)
- from parsimonious.grammar import Grammar, rule_grammar
- from parsimonious.nodes import Node
- class LengthTests(TestCase):
- """Tests for returning the right lengths
- I wrote these before parse tree generation was implemented. They're
- partially redundant with TreeTests.
- """
- def len_eq(self, node, length):
- """Return whether the match lengths of 2 nodes are equal.
- Makes tests shorter and lets them omit positional stuff they don't care
- about.
- """
- node_length = None if node is None else node.end - node.start
- assert node_length == length
- def test_regex(self):
- self.len_eq(Literal('hello').match('ehello', 1), 5) # simple
- self.len_eq(Regex('hello*').match('hellooo'), 7) # *
- self.assertRaises(ParseError, Regex('hello*').match, 'goodbye') # no match
- self.len_eq(Regex('hello', ignore_case=True).match('HELLO'), 5)
- def test_sequence(self):
- self.len_eq(Sequence(Regex('hi*'), Literal('lo'), Regex('.ingo')).match('hiiiilobingo1234'), 12) # succeed
- self.assertRaises(ParseError, Sequence(Regex('hi*'), Literal('lo'),
- Regex('.ingo')).match, 'hiiiilobing') # don't
- self.len_eq(Sequence(Regex('hi*')).match('>hiiii', 1), 5) # non-0 pos
- def test_one_of(self):
- self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('aaa'), 3) # first alternative
- self.len_eq(OneOf(Literal('aaa'), Literal('bb')).match('bbaaa'), 2) # second
- self.assertRaises(ParseError, OneOf(Literal('aaa'), Literal('bb')).match, 'aa') # no match
- def test_not(self):
- self.len_eq(Not(Regex('.')).match(''), 0) # match
- self.assertRaises(ParseError, Not(Regex('.')).match, 'Hi') # don't
- def test_optional(self):
- self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('b'), 1) # contained expr fails
- self.len_eq(Sequence(Optional(Literal('a')), Literal('b')).match('ab'), 2) # contained expr succeeds
- self.len_eq(Optional(Literal('a')).match('aa'), 1)
- self.len_eq(Optional(Literal('a')).match('bb'), 0)
- def test_zero_or_more(self):
- self.len_eq(ZeroOrMore(Literal('b')).match(''), 0) # zero
- self.len_eq(ZeroOrMore(Literal('b')).match('bbb'), 3) # more
- self.len_eq(Regex('^').match(''), 0) # Validate the next test.
- # Try to make it loop infinitely using a zero-length contained expression:
- self.len_eq(ZeroOrMore(Regex('^')).match(''), 0)
- def test_one_or_more(self):
- self.len_eq(OneOrMore(Literal('b')).match('b'), 1) # one
- self.len_eq(OneOrMore(Literal('b')).match('bbb'), 3) # more
- self.len_eq(OneOrMore(Literal('b'), min=3).match('bbb'), 3) # with custom min; success
- self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbb'), 4) # with custom min and max; success
- self.len_eq(Quantifier(Literal('b'), min=3, max=5).match('bbbbbb'), 5) # with custom min and max; success
- self.assertRaises(ParseError, OneOrMore(Literal('b'), min=3).match, 'bb') # with custom min; failure
- self.assertRaises(ParseError, Quantifier(Literal('b'), min=3, max=5).match, 'bb') # with custom min and max; failure
- self.len_eq(OneOrMore(Regex('^')).match('bb'), 0) # attempt infinite loop
- class TreeTests(TestCase):
- """Tests for building the right trees
- We have only to test successes here; failures (None-returning cases) are
- covered above.
- """
- def test_simple_node(self):
- """Test that leaf expressions like ``Literal`` make the right nodes."""
- h = Literal('hello', name='greeting')
- self.assertEqual(h.match('hello'), Node(h, 'hello', 0, 5))
- def test_sequence_nodes(self):
- """Assert that ``Sequence`` produces nodes with the right children."""
- s = Sequence(Literal('heigh', name='greeting1'),
- Literal('ho', name='greeting2'), name='dwarf')
- text = 'heighho'
- self.assertEqual(s.match(text), Node(s, text, 0, 7, children=[Node(s.members[0], text, 0, 5),
- Node(s.members[1], text, 5, 7)]))
- def test_one_of(self):
- """``OneOf`` should return its own node, wrapping the child that succeeds."""
- o = OneOf(Literal('a', name='lit'), name='one_of')
- text = 'aa'
- self.assertEqual(o.match(text), Node(o, text, 0, 1, children=[
- Node(o.members[0], text, 0, 1)]))
- def test_optional(self):
- """``Optional`` should return its own node wrapping the succeeded child."""
- expr = Optional(Literal('a', name='lit'), name='opt')
- text = 'a'
- self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
- Node(expr.members[0], text, 0, 1)]))
- # Test failure of the Literal inside the Optional; the
- # LengthTests.test_optional is ambiguous for that.
- text = ''
- self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
- def test_zero_or_more_zero(self):
- """Test the 0 case of ``ZeroOrMore``; it should still return a node."""
- expr = ZeroOrMore(Literal('a'), name='zero')
- text = ''
- self.assertEqual(expr.match(text), Node(expr, text, 0, 0))
- def test_one_or_more_one(self):
- """Test the 1 case of ``OneOrMore``; it should return a node with a child."""
- expr = OneOrMore(Literal('a', name='lit'), name='one')
- text = 'a'
- self.assertEqual(expr.match(text), Node(expr, text, 0, 1, children=[
- Node(expr.members[0], text, 0, 1)]))
- # Things added since Grammar got implemented are covered in integration
- # tests in test_grammar.
- class ParseTests(TestCase):
- """Tests for the ``parse()`` method"""
- def test_parse_success(self):
- """Make sure ``parse()`` returns the tree on success.
- There's not much more than that to test that we haven't already vetted
- above.
- """
- expr = OneOrMore(Literal('a', name='lit'), name='more')
- text = 'aa'
- self.assertEqual(expr.parse(text), Node(expr, text, 0, 2, children=[
- Node(expr.members[0], text, 0, 1),
- Node(expr.members[0], text, 1, 2)]))
- class ErrorReportingTests(TestCase):
- """Tests for reporting parse errors"""
- def test_inner_rule_succeeding(self):
- """Make sure ``parse()`` fails and blames the
- rightward-progressing-most named Expression when an Expression isn't
- satisfied.
- Make sure ParseErrors have nice Unicode representations.
- """
- grammar = Grammar("""
- bold_text = open_parens text close_parens
- open_parens = "(("
- text = ~"[a-zA-Z]+"
- close_parens = "))"
- """)
- text = '((fred!!'
- try:
- grammar.parse(text)
- except ParseError as error:
- self.assertEqual(error.pos, 6)
- self.assertEqual(error.expr, grammar['close_parens'])
- self.assertEqual(error.text, text)
- self.assertEqual(str(error), "Rule 'close_parens' didn't match at '!!' (line 1, column 7).")
- def test_rewinding(self):
- """Make sure rewinding the stack and trying an alternative (which
- progresses farther) from a higher-level rule can blame an expression
- within the alternative on failure.
- There's no particular reason I suspect this wouldn't work, but it's a
- more real-world example than the no-alternative cases already tested.
- """
- grammar = Grammar("""
- formatted_text = bold_text / weird_text
- bold_text = open_parens text close_parens
- weird_text = open_parens text "!!" bork
- bork = "bork"
- open_parens = "(("
- text = ~"[a-zA-Z]+"
- close_parens = "))"
- """)
- text = '((fred!!'
- try:
- grammar.parse(text)
- except ParseError as error:
- self.assertEqual(error.pos, 8)
- self.assertEqual(error.expr, grammar['bork'])
- self.assertEqual(error.text, text)
- def test_no_named_rule_succeeding(self):
- """Make sure ParseErrors have sane printable representations even if we
- never succeeded in matching any named expressions."""
- grammar = Grammar('''bork = "bork"''')
- try:
- grammar.parse('snork')
- except ParseError as error:
- self.assertEqual(error.pos, 0)
- self.assertEqual(error.expr, grammar['bork'])
- self.assertEqual(error.text, 'snork')
- def test_parse_with_leftovers(self):
- """Make sure ``parse()`` reports where we started failing to match,
- even if a partial match was successful."""
- grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
- try:
- grammar.parse('chitty bangbang')
- except IncompleteParseError as error:
- self.assertEqual(str(
- error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
- def test_favoring_named_rules(self):
- """Named rules should be used in error messages in favor of anonymous
- ones, even if those are rightward-progressing-more, and even if the
- failure starts at position 0."""
- grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
- try:
- grammar.parse('burp')
- except ParseError as error:
- self.assertEqual(str(error), "Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1).")
- def test_line_and_column(self):
- """Make sure we got the line and column computation right."""
- grammar = Grammar(r"""
- whee_lah = whee "\n" lah "\n"
- whee = "whee"
- lah = "lah"
- """)
- try:
- grammar.parse('whee\nlahGOO')
- except ParseError as error:
- # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
- # didn't match". That's not the greatest. Fix that, then fix this.
- self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
- class RepresentationTests(TestCase):
- """Tests for str(), unicode(), and repr() of expressions"""
- def test_unicode_crash(self):
- """Make sure matched unicode strings don't crash ``__str__``."""
- grammar = Grammar(r'string = ~r"\S+"u')
- str(grammar.parse('中文'))
- def test_unicode(self):
- """Smoke-test the conversion of expressions to bits of rules.
- A slightly more comprehensive test of the actual values is in
- ``GrammarTests.test_unicode``.
- """
- str(rule_grammar)
- def test_unicode_keep_parens(self):
- """Make sure converting an expression to unicode doesn't strip
- parenthesis.
- """
- # ZeroOrMore
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs")* "spam"')),
- "foo = 'bar' ('baz' 'eggs')* 'spam'")
- # Quantifiers
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,4} "spam"')),
- "foo = 'bar' ('baz' 'eggs'){2,4} 'spam'")
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){2,} "spam"')),
- "foo = 'bar' ('baz' 'eggs'){2,} 'spam'")
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){1,} "spam"')),
- "foo = 'bar' ('baz' 'eggs')+ 'spam'")
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){,4} "spam"')),
- "foo = 'bar' ('baz' 'eggs'){,4} 'spam'")
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,1} "spam"')),
- "foo = 'bar' ('baz' 'eggs')? 'spam'")
- self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs"){0,} "spam"')),
- "foo = 'bar' ('baz' 'eggs')* 'spam'")
- # OneOf
- self.assertEqual(str(Grammar('foo = "bar" ("baz" / "eggs") "spam"')),
- "foo = 'bar' ('baz' / 'eggs') 'spam'")
- # Lookahead
- self.assertEqual(str(Grammar('foo = "bar" &("baz" "eggs") "spam"')),
- "foo = 'bar' &('baz' 'eggs') 'spam'")
- # Multiple sequences
- self.assertEqual(str(Grammar('foo = ("bar" "baz") / ("baff" "bam")')),
- "foo = ('bar' 'baz') / ('baff' 'bam')")
- def test_unicode_surrounding_parens(self):
- """
- Make sure there are no surrounding parens around the entire
- right-hand side of an expression (as they're unnecessary).
- """
- self.assertEqual(str(Grammar('foo = ("foo" ("bar" "baz"))')),
- "foo = 'foo' ('bar' 'baz')")
- class SlotsTests(TestCase):
- """Tests to do with __slots__"""
- def test_subclassing(self):
- """Make sure a subclass of a __slots__-less class can introduce new
- slots itself.
- This isn't supposed to work, according to the language docs:
- When inheriting from a class without __slots__, the __dict__
- attribute of that class will always be accessible, so a __slots__
- definition in the subclass is meaningless.
- But it does.
- """
- class Smoo(Quantifier):
- __slots__ = ['smoo']
- def __init__(self):
- self.smoo = 'smoo'
- smoo = Smoo()
- self.assertEqual(smoo.__dict__, {}) # has a __dict__ but with no smoo in it
- self.assertEqual(smoo.smoo, 'smoo') # The smoo attr ended up in a slot.
|