nodes.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. """Nodes that make up parse trees
  2. Parsing spits out a tree of these, which you can then tell to walk itself and
  3. spit out a useful value. Or you can walk it yourself; the structural attributes
  4. are public.
  5. """
  6. # TODO: If this is slow, think about using cElementTree or something.
  7. from inspect import isfunction
  8. from sys import version_info, exc_info
  9. from parsimonious.exceptions import VisitationError, UndefinedLabel
  10. class Node(object):
  11. """A parse tree node
  12. Consider these immutable once constructed. As a side effect of a
  13. memory-saving strategy in the cache, multiple references to a single
  14. ``Node`` might be returned in a single parse tree. So, if you start
  15. messing with one, you'll see surprising parallel changes pop up elsewhere.
  16. My philosophy is that parse trees (and their nodes) should be
  17. representation-agnostic. That is, they shouldn't get all mixed up with what
  18. the final rendered form of a wiki page (or the intermediate representation
  19. of a programming language, or whatever) is going to be: you should be able
  20. to parse once and render several representations from the tree, one after
  21. another.
  22. """
  23. # I tried making this subclass list, but it got ugly. I had to construct
  24. # invalid ones and patch them up later, and there were other problems.
  25. __slots__ = ['expr', # The expression that generated me
  26. 'full_text', # The full text fed to the parser
  27. 'start', # The position in the text where that expr started matching
  28. 'end', # The position after start where the expr first didn't
  29. # match. [start:end] follow Python slice conventions.
  30. 'children'] # List of child parse tree nodes
  31. def __init__(self, expr, full_text, start, end, children=None):
  32. self.expr = expr
  33. self.full_text = full_text
  34. self.start = start
  35. self.end = end
  36. self.children = children or []
  37. @property
  38. def expr_name(self):
  39. # backwards compatibility
  40. return self.expr.name
  41. def __iter__(self):
  42. """Support looping over my children and doing tuple unpacks on me.
  43. It can be very handy to unpack nodes in arg lists; see
  44. :class:`PegVisitor` for an example.
  45. """
  46. return iter(self.children)
  47. @property
  48. def text(self):
  49. """Return the text this node matched."""
  50. return self.full_text[self.start:self.end]
  51. # From here down is just stuff for testing and debugging.
  52. def prettily(self, error=None):
  53. """Return a unicode, pretty-printed representation of me.
  54. :arg error: The node to highlight because an error occurred there
  55. """
  56. # TODO: If a Node appears multiple times in the tree, we'll point to
  57. # them all. Whoops.
  58. def indent(text):
  59. return '\n'.join((' ' + line) for line in text.splitlines())
  60. ret = [u'<%s%s matching "%s">%s' % (
  61. self.__class__.__name__,
  62. (' called "%s"' % self.expr_name) if self.expr_name else '',
  63. self.text,
  64. ' <-- *** We were here. ***' if error is self else '')]
  65. for n in self:
  66. ret.append(indent(n.prettily(error=error)))
  67. return '\n'.join(ret)
  68. def __str__(self):
  69. """Return a compact, human-readable representation of me."""
  70. return self.prettily()
  71. def __eq__(self, other):
  72. """Support by-value deep comparison with other nodes for testing."""
  73. if not isinstance(other, Node):
  74. return NotImplemented
  75. return (self.expr == other.expr and
  76. self.full_text == other.full_text and
  77. self.start == other.start and
  78. self.end == other.end and
  79. self.children == other.children)
  80. def __ne__(self, other):
  81. return not self == other
  82. def __repr__(self, top_level=True):
  83. """Return a bit of code (though not an expression) that will recreate
  84. me."""
  85. # repr() of unicode flattens everything out to ASCII, so we don't need
  86. # to explicitly encode things afterward.
  87. ret = ["s = %r" % self.full_text] if top_level else []
  88. ret.append("%s(%r, s, %s, %s%s)" % (
  89. self.__class__.__name__,
  90. self.expr,
  91. self.start,
  92. self.end,
  93. (', children=[%s]' %
  94. ', '.join([c.__repr__(top_level=False) for c in self.children]))
  95. if self.children else ''))
  96. return '\n'.join(ret)
  97. class RegexNode(Node):
  98. """Node returned from a ``Regex`` expression
  99. Grants access to the ``re.Match`` object, in case you want to access
  100. capturing groups, etc.
  101. """
  102. __slots__ = ['match']
  103. class RuleDecoratorMeta(type):
  104. def __new__(metaclass, name, bases, namespace):
  105. def unvisit(name):
  106. """Remove any leading "visit_" from a method name."""
  107. return name[6:] if name.startswith('visit_') else name
  108. methods = [v for k, v in namespace.items() if
  109. hasattr(v, '_rule') and isfunction(v)]
  110. if methods:
  111. from parsimonious.grammar import Grammar # circular import dodge
  112. methods.sort(key=(lambda x: x.func_code.co_firstlineno)
  113. if version_info[0] < 3 else
  114. (lambda x: x.__code__.co_firstlineno))
  115. # Possible enhancement: once we get the Grammar extensibility story
  116. # solidified, we can have @rules *add* to the default grammar
  117. # rather than pave over it.
  118. namespace['grammar'] = Grammar(
  119. '\n'.join('{name} = {expr}'.format(name=unvisit(m.__name__),
  120. expr=m._rule)
  121. for m in methods))
  122. return super(RuleDecoratorMeta,
  123. metaclass).__new__(metaclass, name, bases, namespace)
  124. class NodeVisitor(object, metaclass=RuleDecoratorMeta):
  125. """A shell for writing things that turn parse trees into something useful
  126. Performs a depth-first traversal of an AST. Subclass this, add methods for
  127. each expr you care about, instantiate, and call
  128. ``visit(top_node_of_parse_tree)``. It'll return the useful stuff. This API
  129. is very similar to that of ``ast.NodeVisitor``.
  130. These could easily all be static methods, but that would add at least as
  131. much weirdness at the call site as the ``()`` for instantiation. And this
  132. way, we support subclasses that require state: options, for example, or a
  133. symbol table constructed from a programming language's AST.
  134. We never transform the parse tree in place, because...
  135. * There are likely multiple references to the same ``Node`` object in a
  136. parse tree, and changes to one reference would surprise you elsewhere.
  137. * It makes it impossible to report errors: you'd end up with the "error"
  138. arrow pointing someplace in a half-transformed mishmash of nodes--and
  139. that's assuming you're even transforming the tree into another tree.
  140. Heaven forbid you're making it into a string or something else.
  141. """
  142. #: The :term:`default grammar`: the one recommended for use with this
  143. #: visitor. If you populate this, you will be able to call
  144. #: :meth:`NodeVisitor.parse()` as a shortcut.
  145. grammar = None
  146. #: Classes of exceptions you actually intend to raise during visitation
  147. #: and which should propagate out of the visitor. These will not be
  148. #: wrapped in a VisitationError when they arise.
  149. unwrapped_exceptions = ()
  150. # TODO: If we need to optimize this, we can go back to putting subclasses
  151. # in charge of visiting children; they know when not to bother. Or we can
  152. # mark nodes as not descent-worthy in the grammar.
  153. def visit(self, node):
  154. """Walk a parse tree, transforming it into another representation.
  155. Recursively descend a parse tree, dispatching to the method named after
  156. the rule in the :class:`~parsimonious.grammar.Grammar` that produced
  157. each node. If, for example, a rule was... ::
  158. bold = '<b>'
  159. ...the ``visit_bold()`` method would be called. It is your
  160. responsibility to subclass :class:`NodeVisitor` and implement those
  161. methods.
  162. """
  163. method = getattr(self, 'visit_' + node.expr_name, self.generic_visit)
  164. # Call that method, and show where in the tree it failed if it blows
  165. # up.
  166. try:
  167. return method(node, [self.visit(n) for n in node])
  168. except (VisitationError, UndefinedLabel):
  169. # Don't catch and re-wrap already-wrapped exceptions.
  170. raise
  171. except Exception as exc:
  172. # implentors may define exception classes that should not be
  173. # wrapped.
  174. if isinstance(exc, self.unwrapped_exceptions):
  175. raise
  176. # Catch any exception, and tack on a parse tree so it's easier to
  177. # see where it went wrong.
  178. exc_class = type(exc)
  179. raise VisitationError(exc, exc_class, node) from exc
  180. def generic_visit(self, node, visited_children):
  181. """Default visitor method
  182. :arg node: The node we're visiting
  183. :arg visited_children: The results of visiting the children of that
  184. node, in a list
  185. I'm not sure there's an implementation of this that makes sense across
  186. all (or even most) use cases, so we leave it to subclasses to implement
  187. for now.
  188. """
  189. raise NotImplementedError('No visitor method was defined for this expression: %s' %
  190. node.expr.as_rule())
  191. # Convenience methods:
  192. def parse(self, text, pos=0):
  193. """Parse some text with this Visitor's default grammar and return the
  194. result of visiting it.
  195. ``SomeVisitor().parse('some_string')`` is a shortcut for
  196. ``SomeVisitor().visit(some_grammar.parse('some_string'))``.
  197. """
  198. return self._parse_or_match(text, pos, 'parse')
  199. def match(self, text, pos=0):
  200. """Parse and visit some text with this Visitor's default grammar, but
  201. don't insist on parsing all the way to the end.
  202. ``SomeVisitor().match('some_string')`` is a shortcut for
  203. ``SomeVisitor().visit(some_grammar.match('some_string'))``.
  204. """
  205. return self._parse_or_match(text, pos, 'match')
  206. # Internal convenience methods to help you write your own visitors:
  207. def lift_child(self, node, children):
  208. """Lift the sole child of ``node`` up to replace the node."""
  209. first_child, = children
  210. return first_child
  211. # Private methods:
  212. def _parse_or_match(self, text, pos, method_name):
  213. """Execute a parse or match on the default grammar, followed by a
  214. visitation.
  215. Raise RuntimeError if there is no default grammar specified.
  216. """
  217. if not self.grammar:
  218. raise RuntimeError(
  219. "The {cls}.{method}() shortcut won't work because {cls} was "
  220. "never associated with a specific " "grammar. Fill out its "
  221. "`grammar` attribute, and try again.".format(
  222. cls=self.__class__.__name__,
  223. method=method_name))
  224. return self.visit(getattr(self.grammar, method_name)(text, pos=pos))
  225. def rule(rule_string):
  226. """Decorate a NodeVisitor ``visit_*`` method to tie a grammar rule to it.
  227. The following will arrange for the ``visit_digit`` method to receive the
  228. results of the ``~"[0-9]"`` parse rule::
  229. @rule('~"[0-9]"')
  230. def visit_digit(self, node, visited_children):
  231. ...
  232. Notice that there is no "digit = " as part of the rule; that gets inferred
  233. from the method name.
  234. In cases where there is only one kind of visitor interested in a grammar,
  235. using ``@rule`` saves you having to look back and forth between the visitor
  236. and the grammar definition.
  237. On an implementation level, all ``@rule`` rules get stitched together into
  238. a :class:`~parsimonious.Grammar` that becomes the NodeVisitor's
  239. :term:`default grammar`.
  240. Typically, the choice of a default rule for this grammar is simple: whatever
  241. ``@rule`` comes first in the class is the default. But the choice may become
  242. surprising if you divide the ``@rule`` calls among subclasses. At the
  243. moment, which method "comes first" is decided simply by comparing line
  244. numbers, so whatever method is on the smallest-numbered line will be the
  245. default. In a future release, this will change to pick the
  246. first ``@rule`` call on the basemost class that has one. That way, a
  247. subclass which does not override the default rule's ``visit_*`` method
  248. won't unintentionally change which rule is the default.
  249. """
  250. def decorator(method):
  251. method._rule = rule_string # XXX: Maybe register them on a class var instead so we can just override a @rule'd visitor method on a subclass without blowing away the rule string that comes with it.
  252. return method
  253. return decorator