renderer.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. """
  2. class Renderer
  3. Generates HTML from parsed token stream. Each instance has independent
  4. copy of rules. Those can be rewritten with ease. Also, you can add new
  5. rules if you create plugin and adds new token types.
  6. """
  7. from __future__ import annotations
  8. from collections.abc import Sequence
  9. import inspect
  10. from typing import Any, ClassVar, Protocol
  11. from .common.utils import escapeHtml, unescapeAll
  12. from .token import Token
  13. from .utils import EnvType, OptionsDict
  14. class RendererProtocol(Protocol):
  15. __output__: ClassVar[str]
  16. def render(
  17. self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
  18. ) -> Any: ...
  19. class RendererHTML(RendererProtocol):
  20. """Contains render rules for tokens. Can be updated and extended.
  21. Example:
  22. Each rule is called as independent static function with fixed signature:
  23. ::
  24. class Renderer:
  25. def token_type_name(self, tokens, idx, options, env) {
  26. # ...
  27. return renderedHTML
  28. ::
  29. class CustomRenderer(RendererHTML):
  30. def strong_open(self, tokens, idx, options, env):
  31. return '<b>'
  32. def strong_close(self, tokens, idx, options, env):
  33. return '</b>'
  34. md = MarkdownIt(renderer_cls=CustomRenderer)
  35. result = md.render(...)
  36. See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
  37. for more details and examples.
  38. """
  39. __output__ = "html"
  40. def __init__(self, parser: Any = None):
  41. self.rules = {
  42. k: v
  43. for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
  44. if not (k.startswith("render") or k.startswith("_"))
  45. }
  46. def render(
  47. self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
  48. ) -> str:
  49. """Takes token stream and generates HTML.
  50. :param tokens: list on block tokens to render
  51. :param options: params of parser instance
  52. :param env: additional data from parsed input
  53. """
  54. result = ""
  55. for i, token in enumerate(tokens):
  56. if token.type == "inline":
  57. if token.children:
  58. result += self.renderInline(token.children, options, env)
  59. elif token.type in self.rules:
  60. result += self.rules[token.type](tokens, i, options, env)
  61. else:
  62. result += self.renderToken(tokens, i, options, env)
  63. return result
  64. def renderInline(
  65. self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
  66. ) -> str:
  67. """The same as ``render``, but for single token of `inline` type.
  68. :param tokens: list on block tokens to render
  69. :param options: params of parser instance
  70. :param env: additional data from parsed input (references, for example)
  71. """
  72. result = ""
  73. for i, token in enumerate(tokens):
  74. if token.type in self.rules:
  75. result += self.rules[token.type](tokens, i, options, env)
  76. else:
  77. result += self.renderToken(tokens, i, options, env)
  78. return result
  79. def renderToken(
  80. self,
  81. tokens: Sequence[Token],
  82. idx: int,
  83. options: OptionsDict,
  84. env: EnvType,
  85. ) -> str:
  86. """Default token renderer.
  87. Can be overridden by custom function
  88. :param idx: token index to render
  89. :param options: params of parser instance
  90. """
  91. result = ""
  92. needLf = False
  93. token = tokens[idx]
  94. # Tight list paragraphs
  95. if token.hidden:
  96. return ""
  97. # Insert a newline between hidden paragraph and subsequent opening
  98. # block-level tag.
  99. #
  100. # For example, here we should insert a newline before blockquote:
  101. # - a
  102. # >
  103. #
  104. if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
  105. result += "\n"
  106. # Add token name, e.g. `<img`
  107. result += ("</" if token.nesting == -1 else "<") + token.tag
  108. # Encode attributes, e.g. `<img src="foo"`
  109. result += self.renderAttrs(token)
  110. # Add a slash for self-closing tags, e.g. `<img src="foo" /`
  111. if token.nesting == 0 and options["xhtmlOut"]:
  112. result += " /"
  113. # Check if we need to add a newline after this tag
  114. if token.block:
  115. needLf = True
  116. if token.nesting == 1 and (idx + 1 < len(tokens)):
  117. nextToken = tokens[idx + 1]
  118. if nextToken.type == "inline" or nextToken.hidden:
  119. # Block-level tag containing an inline tag.
  120. #
  121. needLf = False
  122. elif nextToken.nesting == -1 and nextToken.tag == token.tag:
  123. # Opening tag + closing tag of the same type. E.g. `<li></li>`.
  124. #
  125. needLf = False
  126. result += ">\n" if needLf else ">"
  127. return result
  128. @staticmethod
  129. def renderAttrs(token: Token) -> str:
  130. """Render token attributes to string."""
  131. result = ""
  132. for key, value in token.attrItems():
  133. result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
  134. return result
  135. def renderInlineAsText(
  136. self,
  137. tokens: Sequence[Token] | None,
  138. options: OptionsDict,
  139. env: EnvType,
  140. ) -> str:
  141. """Special kludge for image `alt` attributes to conform CommonMark spec.
  142. Don't try to use it! Spec requires to show `alt` content with stripped markup,
  143. instead of simple escaping.
  144. :param tokens: list on block tokens to render
  145. :param options: params of parser instance
  146. :param env: additional data from parsed input
  147. """
  148. result = ""
  149. for token in tokens or []:
  150. if token.type == "text":
  151. result += token.content
  152. elif token.type == "image":
  153. if token.children:
  154. result += self.renderInlineAsText(token.children, options, env)
  155. elif token.type == "softbreak":
  156. result += "\n"
  157. return result
  158. ###################################################
  159. def code_inline(
  160. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  161. ) -> str:
  162. token = tokens[idx]
  163. return (
  164. "<code"
  165. + self.renderAttrs(token)
  166. + ">"
  167. + escapeHtml(tokens[idx].content)
  168. + "</code>"
  169. )
  170. def code_block(
  171. self,
  172. tokens: Sequence[Token],
  173. idx: int,
  174. options: OptionsDict,
  175. env: EnvType,
  176. ) -> str:
  177. token = tokens[idx]
  178. return (
  179. "<pre"
  180. + self.renderAttrs(token)
  181. + "><code>"
  182. + escapeHtml(tokens[idx].content)
  183. + "</code></pre>\n"
  184. )
  185. def fence(
  186. self,
  187. tokens: Sequence[Token],
  188. idx: int,
  189. options: OptionsDict,
  190. env: EnvType,
  191. ) -> str:
  192. token = tokens[idx]
  193. info = unescapeAll(token.info).strip() if token.info else ""
  194. langName = ""
  195. langAttrs = ""
  196. if info:
  197. arr = info.split(maxsplit=1)
  198. langName = arr[0]
  199. if len(arr) == 2:
  200. langAttrs = arr[1]
  201. if options.highlight:
  202. highlighted = options.highlight(
  203. token.content, langName, langAttrs
  204. ) or escapeHtml(token.content)
  205. else:
  206. highlighted = escapeHtml(token.content)
  207. if highlighted.startswith("<pre"):
  208. return highlighted + "\n"
  209. # If language exists, inject class gently, without modifying original token.
  210. # May be, one day we will add .deepClone() for token and simplify this part, but
  211. # now we prefer to keep things local.
  212. if info:
  213. # Fake token just to render attributes
  214. tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
  215. tmpToken.attrJoin("class", options.langPrefix + langName)
  216. return (
  217. "<pre><code"
  218. + self.renderAttrs(tmpToken)
  219. + ">"
  220. + highlighted
  221. + "</code></pre>\n"
  222. )
  223. return (
  224. "<pre><code"
  225. + self.renderAttrs(token)
  226. + ">"
  227. + highlighted
  228. + "</code></pre>\n"
  229. )
  230. def image(
  231. self,
  232. tokens: Sequence[Token],
  233. idx: int,
  234. options: OptionsDict,
  235. env: EnvType,
  236. ) -> str:
  237. token = tokens[idx]
  238. # "alt" attr MUST be set, even if empty. Because it's mandatory and
  239. # should be placed on proper position for tests.
  240. if token.children:
  241. token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
  242. else:
  243. token.attrSet("alt", "")
  244. return self.renderToken(tokens, idx, options, env)
  245. def hardbreak(
  246. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  247. ) -> str:
  248. return "<br />\n" if options.xhtmlOut else "<br>\n"
  249. def softbreak(
  250. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  251. ) -> str:
  252. return (
  253. ("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
  254. )
  255. def text(
  256. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  257. ) -> str:
  258. return escapeHtml(tokens[idx].content)
  259. def html_block(
  260. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  261. ) -> str:
  262. return tokens[idx].content
  263. def html_inline(
  264. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  265. ) -> str:
  266. return tokens[idx].content