| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 |
- """Nodes that make up parse trees
- Parsing spits out a tree of these, which you can then tell to walk itself and
- spit out a useful value. Or you can walk it yourself; the structural attributes
- are public.
- """
- # TODO: If this is slow, think about using cElementTree or something.
- from inspect import isfunction
- from sys import version_info, exc_info
- from parsimonious.exceptions import VisitationError, UndefinedLabel
- class Node(object):
- """A parse tree node
- Consider these immutable once constructed. As a side effect of a
- memory-saving strategy in the cache, multiple references to a single
- ``Node`` might be returned in a single parse tree. So, if you start
- messing with one, you'll see surprising parallel changes pop up elsewhere.
- My philosophy is that parse trees (and their nodes) should be
- representation-agnostic. That is, they shouldn't get all mixed up with what
- the final rendered form of a wiki page (or the intermediate representation
- of a programming language, or whatever) is going to be: you should be able
- to parse once and render several representations from the tree, one after
- another.
- """
- # I tried making this subclass list, but it got ugly. I had to construct
- # invalid ones and patch them up later, and there were other problems.
- __slots__ = ['expr', # The expression that generated me
- 'full_text', # The full text fed to the parser
- 'start', # The position in the text where that expr started matching
- 'end', # The position after start where the expr first didn't
- # match. [start:end] follow Python slice conventions.
- 'children'] # List of child parse tree nodes
- def __init__(self, expr, full_text, start, end, children=None):
- self.expr = expr
- self.full_text = full_text
- self.start = start
- self.end = end
- self.children = children or []
- @property
- def expr_name(self):
- # backwards compatibility
- return self.expr.name
- def __iter__(self):
- """Support looping over my children and doing tuple unpacks on me.
- It can be very handy to unpack nodes in arg lists; see
- :class:`PegVisitor` for an example.
- """
- return iter(self.children)
- @property
- def text(self):
- """Return the text this node matched."""
- return self.full_text[self.start:self.end]
- # From here down is just stuff for testing and debugging.
- def prettily(self, error=None):
- """Return a unicode, pretty-printed representation of me.
- :arg error: The node to highlight because an error occurred there
- """
- # TODO: If a Node appears multiple times in the tree, we'll point to
- # them all. Whoops.
- def indent(text):
- return '\n'.join((' ' + line) for line in text.splitlines())
- ret = [u'<%s%s matching "%s">%s' % (
- self.__class__.__name__,
- (' called "%s"' % self.expr_name) if self.expr_name else '',
- self.text,
- ' <-- *** We were here. ***' if error is self else '')]
- for n in self:
- ret.append(indent(n.prettily(error=error)))
- return '\n'.join(ret)
- def __str__(self):
- """Return a compact, human-readable representation of me."""
- return self.prettily()
- def __eq__(self, other):
- """Support by-value deep comparison with other nodes for testing."""
- if not isinstance(other, Node):
- return NotImplemented
- return (self.expr == other.expr and
- self.full_text == other.full_text and
- self.start == other.start and
- self.end == other.end and
- self.children == other.children)
- def __ne__(self, other):
- return not self == other
- def __repr__(self, top_level=True):
- """Return a bit of code (though not an expression) that will recreate
- me."""
- # repr() of unicode flattens everything out to ASCII, so we don't need
- # to explicitly encode things afterward.
- ret = ["s = %r" % self.full_text] if top_level else []
- ret.append("%s(%r, s, %s, %s%s)" % (
- self.__class__.__name__,
- self.expr,
- self.start,
- self.end,
- (', children=[%s]' %
- ', '.join([c.__repr__(top_level=False) for c in self.children]))
- if self.children else ''))
- return '\n'.join(ret)
- class RegexNode(Node):
- """Node returned from a ``Regex`` expression
- Grants access to the ``re.Match`` object, in case you want to access
- capturing groups, etc.
- """
- __slots__ = ['match']
- class RuleDecoratorMeta(type):
- def __new__(metaclass, name, bases, namespace):
- def unvisit(name):
- """Remove any leading "visit_" from a method name."""
- return name[6:] if name.startswith('visit_') else name
- methods = [v for k, v in namespace.items() if
- hasattr(v, '_rule') and isfunction(v)]
- if methods:
- from parsimonious.grammar import Grammar # circular import dodge
- methods.sort(key=(lambda x: x.func_code.co_firstlineno)
- if version_info[0] < 3 else
- (lambda x: x.__code__.co_firstlineno))
- # Possible enhancement: once we get the Grammar extensibility story
- # solidified, we can have @rules *add* to the default grammar
- # rather than pave over it.
- namespace['grammar'] = Grammar(
- '\n'.join('{name} = {expr}'.format(name=unvisit(m.__name__),
- expr=m._rule)
- for m in methods))
- return super(RuleDecoratorMeta,
- metaclass).__new__(metaclass, name, bases, namespace)
- class NodeVisitor(object, metaclass=RuleDecoratorMeta):
- """A shell for writing things that turn parse trees into something useful
- Performs a depth-first traversal of an AST. Subclass this, add methods for
- each expr you care about, instantiate, and call
- ``visit(top_node_of_parse_tree)``. It'll return the useful stuff. This API
- is very similar to that of ``ast.NodeVisitor``.
- These could easily all be static methods, but that would add at least as
- much weirdness at the call site as the ``()`` for instantiation. And this
- way, we support subclasses that require state: options, for example, or a
- symbol table constructed from a programming language's AST.
- We never transform the parse tree in place, because...
- * There are likely multiple references to the same ``Node`` object in a
- parse tree, and changes to one reference would surprise you elsewhere.
- * It makes it impossible to report errors: you'd end up with the "error"
- arrow pointing someplace in a half-transformed mishmash of nodes--and
- that's assuming you're even transforming the tree into another tree.
- Heaven forbid you're making it into a string or something else.
- """
- #: The :term:`default grammar`: the one recommended for use with this
- #: visitor. If you populate this, you will be able to call
- #: :meth:`NodeVisitor.parse()` as a shortcut.
- grammar = None
- #: Classes of exceptions you actually intend to raise during visitation
- #: and which should propagate out of the visitor. These will not be
- #: wrapped in a VisitationError when they arise.
- unwrapped_exceptions = ()
- # TODO: If we need to optimize this, we can go back to putting subclasses
- # in charge of visiting children; they know when not to bother. Or we can
- # mark nodes as not descent-worthy in the grammar.
- def visit(self, node):
- """Walk a parse tree, transforming it into another representation.
- Recursively descend a parse tree, dispatching to the method named after
- the rule in the :class:`~parsimonious.grammar.Grammar` that produced
- each node. If, for example, a rule was... ::
- bold = '<b>'
- ...the ``visit_bold()`` method would be called. It is your
- responsibility to subclass :class:`NodeVisitor` and implement those
- methods.
- """
- method = getattr(self, 'visit_' + node.expr_name, self.generic_visit)
- # Call that method, and show where in the tree it failed if it blows
- # up.
- try:
- return method(node, [self.visit(n) for n in node])
- except (VisitationError, UndefinedLabel):
- # Don't catch and re-wrap already-wrapped exceptions.
- raise
- except Exception as exc:
- # implentors may define exception classes that should not be
- # wrapped.
- if isinstance(exc, self.unwrapped_exceptions):
- raise
- # Catch any exception, and tack on a parse tree so it's easier to
- # see where it went wrong.
- exc_class = type(exc)
- raise VisitationError(exc, exc_class, node) from exc
- def generic_visit(self, node, visited_children):
- """Default visitor method
- :arg node: The node we're visiting
- :arg visited_children: The results of visiting the children of that
- node, in a list
- I'm not sure there's an implementation of this that makes sense across
- all (or even most) use cases, so we leave it to subclasses to implement
- for now.
- """
- raise NotImplementedError('No visitor method was defined for this expression: %s' %
- node.expr.as_rule())
- # Convenience methods:
- def parse(self, text, pos=0):
- """Parse some text with this Visitor's default grammar and return the
- result of visiting it.
- ``SomeVisitor().parse('some_string')`` is a shortcut for
- ``SomeVisitor().visit(some_grammar.parse('some_string'))``.
- """
- return self._parse_or_match(text, pos, 'parse')
- def match(self, text, pos=0):
- """Parse and visit some text with this Visitor's default grammar, but
- don't insist on parsing all the way to the end.
- ``SomeVisitor().match('some_string')`` is a shortcut for
- ``SomeVisitor().visit(some_grammar.match('some_string'))``.
- """
- return self._parse_or_match(text, pos, 'match')
- # Internal convenience methods to help you write your own visitors:
- def lift_child(self, node, children):
- """Lift the sole child of ``node`` up to replace the node."""
- first_child, = children
- return first_child
- # Private methods:
- def _parse_or_match(self, text, pos, method_name):
- """Execute a parse or match on the default grammar, followed by a
- visitation.
- Raise RuntimeError if there is no default grammar specified.
- """
- if not self.grammar:
- raise RuntimeError(
- "The {cls}.{method}() shortcut won't work because {cls} was "
- "never associated with a specific " "grammar. Fill out its "
- "`grammar` attribute, and try again.".format(
- cls=self.__class__.__name__,
- method=method_name))
- return self.visit(getattr(self.grammar, method_name)(text, pos=pos))
- def rule(rule_string):
- """Decorate a NodeVisitor ``visit_*`` method to tie a grammar rule to it.
- The following will arrange for the ``visit_digit`` method to receive the
- results of the ``~"[0-9]"`` parse rule::
- @rule('~"[0-9]"')
- def visit_digit(self, node, visited_children):
- ...
- Notice that there is no "digit = " as part of the rule; that gets inferred
- from the method name.
- In cases where there is only one kind of visitor interested in a grammar,
- using ``@rule`` saves you having to look back and forth between the visitor
- and the grammar definition.
- On an implementation level, all ``@rule`` rules get stitched together into
- a :class:`~parsimonious.Grammar` that becomes the NodeVisitor's
- :term:`default grammar`.
- Typically, the choice of a default rule for this grammar is simple: whatever
- ``@rule`` comes first in the class is the default. But the choice may become
- surprising if you divide the ``@rule`` calls among subclasses. At the
- moment, which method "comes first" is decided simply by comparing line
- numbers, so whatever method is on the smallest-numbered line will be the
- default. In a future release, this will change to pick the
- first ``@rule`` call on the basemost class that has one. That way, a
- subclass which does not override the default rule's ``visit_*`` method
- won't unintentionally change which rule is the default.
- """
- def decorator(method):
- method._rule = rule_string # XXX: Maybe register them on a class var instead so we can just override a @rule'd visitor method on a subclass without blowing away the rule string that comes with it.
- return method
- return decorator
|