main.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. from __future__ import annotations
  2. from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
  3. from contextlib import contextmanager
  4. from typing import Any, Literal, overload
  5. from . import helpers, presets
  6. from .common import normalize_url, utils
  7. from .parser_block import ParserBlock
  8. from .parser_core import ParserCore
  9. from .parser_inline import ParserInline
  10. from .renderer import RendererHTML, RendererProtocol
  11. from .rules_core.state_core import StateCore
  12. from .token import Token
  13. from .utils import EnvType, OptionsDict, OptionsType, PresetType
  14. try:
  15. import linkify_it
  16. except ModuleNotFoundError:
  17. linkify_it = None
  18. _PRESETS: dict[str, PresetType] = {
  19. "default": presets.default.make(),
  20. "js-default": presets.js_default.make(),
  21. "zero": presets.zero.make(),
  22. "commonmark": presets.commonmark.make(),
  23. "gfm-like": presets.gfm_like.make(),
  24. }
  25. class MarkdownIt:
  26. def __init__(
  27. self,
  28. config: str | PresetType = "commonmark",
  29. options_update: Mapping[str, Any] | None = None,
  30. *,
  31. renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
  32. ):
  33. """Main parser class
  34. :param config: name of configuration to load or a pre-defined dictionary
  35. :param options_update: dictionary that will be merged into ``config["options"]``
  36. :param renderer_cls: the class to load as the renderer:
  37. ``self.renderer = renderer_cls(self)
  38. """
  39. # add modules
  40. self.utils = utils
  41. self.helpers = helpers
  42. # initialise classes
  43. self.inline = ParserInline()
  44. self.block = ParserBlock()
  45. self.core = ParserCore()
  46. self.renderer = renderer_cls(self)
  47. self.linkify = linkify_it.LinkifyIt() if linkify_it else None
  48. # set the configuration
  49. if options_update and not isinstance(options_update, Mapping):
  50. # catch signature change where renderer_cls was not used as a key-word
  51. raise TypeError(
  52. f"options_update should be a mapping: {options_update}"
  53. "\n(Perhaps you intended this to be the renderer_cls?)"
  54. )
  55. self.configure(config, options_update=options_update)
  56. def __repr__(self) -> str:
  57. return f"{self.__class__.__module__}.{self.__class__.__name__}()"
  58. @overload
  59. def __getitem__(self, name: Literal["inline"]) -> ParserInline: ...
  60. @overload
  61. def __getitem__(self, name: Literal["block"]) -> ParserBlock: ...
  62. @overload
  63. def __getitem__(self, name: Literal["core"]) -> ParserCore: ...
  64. @overload
  65. def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol: ...
  66. @overload
  67. def __getitem__(self, name: str) -> Any: ...
  68. def __getitem__(self, name: str) -> Any:
  69. return {
  70. "inline": self.inline,
  71. "block": self.block,
  72. "core": self.core,
  73. "renderer": self.renderer,
  74. }[name]
  75. def set(self, options: OptionsType) -> None:
  76. """Set parser options (in the same format as in constructor).
  77. Probably, you will never need it, but you can change options after constructor call.
  78. __Note:__ To achieve the best possible performance, don't modify a
  79. `markdown-it` instance options on the fly. If you need multiple configurations
  80. it's best to create multiple instances and initialize each with separate config.
  81. """
  82. self.options = OptionsDict(options)
  83. def configure(
  84. self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
  85. ) -> MarkdownIt:
  86. """Batch load of all options and component settings.
  87. This is an internal method, and you probably will not need it.
  88. But if you will - see available presets and data structure
  89. [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
  90. We strongly recommend to use presets instead of direct config loads.
  91. That will give better compatibility with next versions.
  92. """
  93. if isinstance(presets, str):
  94. if presets not in _PRESETS:
  95. raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
  96. config = _PRESETS[presets]
  97. else:
  98. config = presets
  99. if not config:
  100. raise ValueError("Wrong `markdown-it` config, can't be empty")
  101. options = config.get("options", {}) or {}
  102. if options_update:
  103. options = {**options, **options_update} # type: ignore
  104. self.set(options) # type: ignore
  105. if "components" in config:
  106. for name, component in config["components"].items():
  107. rules = component.get("rules", None)
  108. if rules:
  109. self[name].ruler.enableOnly(rules)
  110. rules2 = component.get("rules2", None)
  111. if rules2:
  112. self[name].ruler2.enableOnly(rules2)
  113. return self
  114. def get_all_rules(self) -> dict[str, list[str]]:
  115. """Return the names of all active rules."""
  116. rules = {
  117. chain: self[chain].ruler.get_all_rules()
  118. for chain in ["core", "block", "inline"]
  119. }
  120. rules["inline2"] = self.inline.ruler2.get_all_rules()
  121. return rules
  122. def get_active_rules(self) -> dict[str, list[str]]:
  123. """Return the names of all active rules."""
  124. rules = {
  125. chain: self[chain].ruler.get_active_rules()
  126. for chain in ["core", "block", "inline"]
  127. }
  128. rules["inline2"] = self.inline.ruler2.get_active_rules()
  129. return rules
  130. def enable(
  131. self, names: str | Iterable[str], ignoreInvalid: bool = False
  132. ) -> MarkdownIt:
  133. """Enable list or rules. (chainable)
  134. :param names: rule name or list of rule names to enable.
  135. :param ignoreInvalid: set `true` to ignore errors when rule not found.
  136. It will automatically find appropriate components,
  137. containing rules with given names. If rule not found, and `ignoreInvalid`
  138. not set - throws exception.
  139. Example::
  140. md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
  141. """
  142. result = []
  143. if isinstance(names, str):
  144. names = [names]
  145. for chain in ["core", "block", "inline"]:
  146. result.extend(self[chain].ruler.enable(names, True))
  147. result.extend(self.inline.ruler2.enable(names, True))
  148. missed = [name for name in names if name not in result]
  149. if missed and not ignoreInvalid:
  150. raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
  151. return self
  152. def disable(
  153. self, names: str | Iterable[str], ignoreInvalid: bool = False
  154. ) -> MarkdownIt:
  155. """The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
  156. :param names: rule name or list of rule names to disable.
  157. :param ignoreInvalid: set `true` to ignore errors when rule not found.
  158. """
  159. result = []
  160. if isinstance(names, str):
  161. names = [names]
  162. for chain in ["core", "block", "inline"]:
  163. result.extend(self[chain].ruler.disable(names, True))
  164. result.extend(self.inline.ruler2.disable(names, True))
  165. missed = [name for name in names if name not in result]
  166. if missed and not ignoreInvalid:
  167. raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
  168. return self
  169. @contextmanager
  170. def reset_rules(self) -> Generator[None, None, None]:
  171. """A context manager, that will reset the current enabled rules on exit."""
  172. chain_rules = self.get_active_rules()
  173. yield
  174. for chain, rules in chain_rules.items():
  175. if chain != "inline2":
  176. self[chain].ruler.enableOnly(rules)
  177. self.inline.ruler2.enableOnly(chain_rules["inline2"])
  178. def add_render_rule(
  179. self, name: str, function: Callable[..., Any], fmt: str = "html"
  180. ) -> None:
  181. """Add a rule for rendering a particular Token type.
  182. Only applied when ``renderer.__output__ == fmt``
  183. """
  184. if self.renderer.__output__ == fmt:
  185. self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
  186. def use(
  187. self, plugin: Callable[..., None], *params: Any, **options: Any
  188. ) -> MarkdownIt:
  189. """Load specified plugin with given params into current parser instance. (chainable)
  190. It's just a sugar to call `plugin(md, params)` with curring.
  191. Example::
  192. def func(tokens, idx):
  193. tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
  194. md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
  195. """
  196. plugin(self, *params, **options)
  197. return self
  198. def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
  199. """Parse the source string to a token stream
  200. :param src: source string
  201. :param env: environment sandbox
  202. Parse input string and return list of block tokens (special token type
  203. "inline" will contain list of inline tokens).
  204. `env` is used to pass data between "distributed" rules and return additional
  205. metadata like reference info, needed for the renderer. It also can be used to
  206. inject data in specific cases. Usually, you will be ok to pass `{}`,
  207. and then pass updated object to renderer.
  208. """
  209. env = {} if env is None else env
  210. if not isinstance(env, MutableMapping):
  211. raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
  212. if not isinstance(src, str):
  213. raise TypeError(f"Input data should be a string, not {type(src)}")
  214. state = StateCore(src, self, env)
  215. self.core.process(state)
  216. return state.tokens
  217. def render(self, src: str, env: EnvType | None = None) -> Any:
  218. """Render markdown string into html. It does all magic for you :).
  219. :param src: source string
  220. :param env: environment sandbox
  221. :returns: The output of the loaded renderer
  222. `env` can be used to inject additional metadata (`{}` by default).
  223. But you will not need it with high probability. See also comment
  224. in [[MarkdownIt.parse]].
  225. """
  226. env = {} if env is None else env
  227. return self.renderer.render(self.parse(src, env), self.options, env)
  228. def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
  229. """The same as [[MarkdownIt.parse]] but skip all block rules.
  230. :param src: source string
  231. :param env: environment sandbox
  232. It returns the
  233. block tokens list with the single `inline` element, containing parsed inline
  234. tokens in `children` property. Also updates `env` object.
  235. """
  236. env = {} if env is None else env
  237. if not isinstance(env, MutableMapping):
  238. raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
  239. if not isinstance(src, str):
  240. raise TypeError(f"Input data should be a string, not {type(src)}")
  241. state = StateCore(src, self, env)
  242. state.inlineMode = True
  243. self.core.process(state)
  244. return state.tokens
  245. def renderInline(self, src: str, env: EnvType | None = None) -> Any:
  246. """Similar to [[MarkdownIt.render]] but for single paragraph content.
  247. :param src: source string
  248. :param env: environment sandbox
  249. Similar to [[MarkdownIt.render]] but for single paragraph content. Result
  250. will NOT be wrapped into `<p>` tags.
  251. """
  252. env = {} if env is None else env
  253. return self.renderer.render(self.parseInline(src, env), self.options, env)
  254. # link methods
  255. def validateLink(self, url: str) -> bool:
  256. """Validate if the URL link is allowed in output.
  257. This validator can prohibit more than really needed to prevent XSS.
  258. It's a tradeoff to keep code simple and to be secure by default.
  259. Note: the url should be normalized at this point, and existing entities decoded.
  260. """
  261. return normalize_url.validateLink(url)
  262. def normalizeLink(self, url: str) -> str:
  263. """Normalize destination URLs in links
  264. ::
  265. [label]: destination 'title'
  266. ^^^^^^^^^^^
  267. """
  268. return normalize_url.normalizeLink(url)
  269. def normalizeLinkText(self, link: str) -> str:
  270. """Normalize autolink content
  271. ::
  272. <destination>
  273. ~~~~~~~~~~~
  274. """
  275. return normalize_url.normalizeLinkText(link)