parser_block.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. """Block-level tokenizer."""
  2. from __future__ import annotations
  3. from collections.abc import Callable
  4. import logging
  5. from typing import TYPE_CHECKING
  6. from . import rules_block
  7. from .ruler import Ruler
  8. from .rules_block.state_block import StateBlock
  9. from .token import Token
  10. from .utils import EnvType
  11. if TYPE_CHECKING:
  12. from markdown_it import MarkdownIt
  13. LOGGER = logging.getLogger(__name__)
  14. RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
  15. """(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
  16. `silent` disables token generation, useful for lookahead.
  17. """
  18. _rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
  19. # First 2 params - rule name & source. Secondary array - list of rules,
  20. # which can be terminated by this one.
  21. ("table", rules_block.table, ["paragraph", "reference"]),
  22. ("code", rules_block.code, []),
  23. ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
  24. (
  25. "blockquote",
  26. rules_block.blockquote,
  27. ["paragraph", "reference", "blockquote", "list"],
  28. ),
  29. ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
  30. ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
  31. ("reference", rules_block.reference, []),
  32. ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
  33. ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
  34. ("lheading", rules_block.lheading, []),
  35. ("paragraph", rules_block.paragraph, []),
  36. ]
  37. class ParserBlock:
  38. """
  39. ParserBlock#ruler -> Ruler
  40. [[Ruler]] instance. Keep configuration of block rules.
  41. """
  42. def __init__(self) -> None:
  43. self.ruler = Ruler[RuleFuncBlockType]()
  44. for name, rule, alt in _rules:
  45. self.ruler.push(name, rule, {"alt": alt})
  46. def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
  47. """Generate tokens for input range."""
  48. rules = self.ruler.getRules("")
  49. line = startLine
  50. maxNesting = state.md.options.maxNesting
  51. hasEmptyLines = False
  52. while line < endLine:
  53. state.line = line = state.skipEmptyLines(line)
  54. if line >= endLine:
  55. break
  56. if state.sCount[line] < state.blkIndent:
  57. # Termination condition for nested calls.
  58. # Nested calls currently used for blockquotes & lists
  59. break
  60. if state.level >= maxNesting:
  61. # If nesting level exceeded - skip tail to the end.
  62. # That's not ordinary situation and we should not care about content.
  63. state.line = endLine
  64. break
  65. # Try all possible rules.
  66. # On success, rule should:
  67. # - update `state.line`
  68. # - update `state.tokens`
  69. # - return True
  70. for rule in rules:
  71. if rule(state, line, endLine, False):
  72. break
  73. # set state.tight if we had an empty line before current tag
  74. # i.e. latest empty line should not count
  75. state.tight = not hasEmptyLines
  76. line = state.line
  77. # paragraph might "eat" one newline after it in nested lists
  78. if (line - 1) < endLine and state.isEmpty(line - 1):
  79. hasEmptyLines = True
  80. if line < endLine and state.isEmpty(line):
  81. hasEmptyLines = True
  82. line += 1
  83. state.line = line
  84. def parse(
  85. self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
  86. ) -> list[Token] | None:
  87. """Process input string and push block tokens into `outTokens`."""
  88. if not src:
  89. return None
  90. state = StateBlock(src, md, env, outTokens)
  91. self.tokenize(state, state.line, state.lineMax)
  92. return state.tokens