text.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import functools
  2. import re
  3. # Skip text characters for text token, place those to pending buffer
  4. # and increment current pos
  5. from .state_inline import StateInline
  6. # Rule to skip pure text
  7. # '{}$%@~+=:' reserved for extensions
  8. # !!!! Don't confuse with "Markdown ASCII Punctuation" chars
  9. # http://spec.commonmark.org/0.15/#ascii-punctuation-character
  10. _TerminatorChars = {
  11. "\n",
  12. "!",
  13. "#",
  14. "$",
  15. "%",
  16. "&",
  17. "*",
  18. "+",
  19. "-",
  20. ":",
  21. "<",
  22. "=",
  23. ">",
  24. "@",
  25. "[",
  26. "\\",
  27. "]",
  28. "^",
  29. "_",
  30. "`",
  31. "{",
  32. "}",
  33. "~",
  34. }
  35. @functools.cache
  36. def _terminator_char_regex() -> re.Pattern[str]:
  37. return re.compile("[" + re.escape("".join(_TerminatorChars)) + "]")
  38. def text(state: StateInline, silent: bool) -> bool:
  39. pos = state.pos
  40. posMax = state.posMax
  41. terminator_char = _terminator_char_regex().search(state.src, pos)
  42. pos = terminator_char.start() if terminator_char else posMax
  43. if pos == state.pos:
  44. return False
  45. if not silent:
  46. state.pending += state.src[state.pos : pos]
  47. state.pos = pos
  48. return True