token.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. from __future__ import annotations
  2. from collections.abc import Callable, MutableMapping
  3. import dataclasses as dc
  4. from typing import Any, Literal
  5. import warnings
  6. def convert_attrs(value: Any) -> Any:
  7. """Convert Token.attrs set as ``None`` or ``[[key, value], ...]`` to a dict.
  8. This improves compatibility with upstream markdown-it.
  9. """
  10. if not value:
  11. return {}
  12. if isinstance(value, list):
  13. return dict(value)
  14. return value
  15. @dc.dataclass(slots=True)
  16. class Token:
  17. type: str
  18. """Type of the token (string, e.g. "paragraph_open")"""
  19. tag: str
  20. """HTML tag name, e.g. 'p'"""
  21. nesting: Literal[-1, 0, 1]
  22. """Level change (number in {-1, 0, 1} set), where:
  23. - `1` means the tag is opening
  24. - `0` means the tag is self-closing
  25. - `-1` means the tag is closing
  26. """
  27. attrs: dict[str, str | int | float] = dc.field(default_factory=dict)
  28. """HTML attributes.
  29. Note this differs from the upstream "list of lists" format,
  30. although than an instance can still be initialised with this format.
  31. """
  32. map: list[int] | None = None
  33. """Source map info. Format: `[ line_begin, line_end ]`"""
  34. level: int = 0
  35. """Nesting level, the same as `state.level`"""
  36. children: list[Token] | None = None
  37. """Array of child nodes (inline and img tokens)."""
  38. content: str = ""
  39. """Inner content, in the case of a self-closing tag (code, html, fence, etc.),"""
  40. markup: str = ""
  41. """'*' or '_' for emphasis, fence string for fence, etc."""
  42. info: str = ""
  43. """Additional information:
  44. - Info string for "fence" tokens
  45. - The value "auto" for autolink "link_open" and "link_close" tokens
  46. - The string value of the item marker for ordered-list "list_item_open" tokens
  47. """
  48. meta: dict[Any, Any] = dc.field(default_factory=dict)
  49. """A place for plugins to store any arbitrary data"""
  50. block: bool = False
  51. """True for block-level tokens, false for inline tokens.
  52. Used in renderer to calculate line breaks
  53. """
  54. hidden: bool = False
  55. """If true, ignore this element when rendering.
  56. Used for tight lists to hide paragraphs.
  57. """
  58. def __post_init__(self) -> None:
  59. self.attrs = convert_attrs(self.attrs)
  60. def attrIndex(self, name: str) -> int:
  61. warnings.warn( # noqa: B028
  62. "Token.attrIndex should not be used, since Token.attrs is a dictionary",
  63. UserWarning,
  64. )
  65. if name not in self.attrs:
  66. return -1
  67. return list(self.attrs.keys()).index(name)
  68. def attrItems(self) -> list[tuple[str, str | int | float]]:
  69. """Get (key, value) list of attrs."""
  70. return list(self.attrs.items())
  71. def attrPush(self, attrData: tuple[str, str | int | float]) -> None:
  72. """Add `[ name, value ]` attribute to list. Init attrs if necessary."""
  73. name, value = attrData
  74. self.attrSet(name, value)
  75. def attrSet(self, name: str, value: str | int | float) -> None:
  76. """Set `name` attribute to `value`. Override old value if exists."""
  77. self.attrs[name] = value
  78. def attrGet(self, name: str) -> None | str | int | float:
  79. """Get the value of attribute `name`, or null if it does not exist."""
  80. return self.attrs.get(name, None)
  81. def attrJoin(self, name: str, value: str) -> None:
  82. """Join value to existing attribute via space.
  83. Or create new attribute if not exists.
  84. Useful to operate with token classes.
  85. """
  86. if name in self.attrs:
  87. current = self.attrs[name]
  88. if not isinstance(current, str):
  89. raise TypeError(
  90. f"existing attr 'name' is not a str: {self.attrs[name]}"
  91. )
  92. self.attrs[name] = f"{current} {value}"
  93. else:
  94. self.attrs[name] = value
  95. def copy(self, **changes: Any) -> Token:
  96. """Return a shallow copy of the instance."""
  97. return dc.replace(self, **changes)
  98. def as_dict(
  99. self,
  100. *,
  101. children: bool = True,
  102. as_upstream: bool = True,
  103. meta_serializer: Callable[[dict[Any, Any]], Any] | None = None,
  104. filter: Callable[[str, Any], bool] | None = None,
  105. dict_factory: Callable[..., MutableMapping[str, Any]] = dict,
  106. ) -> MutableMapping[str, Any]:
  107. """Return the token as a dictionary.
  108. :param children: Also convert children to dicts
  109. :param as_upstream: Ensure the output dictionary is equal to that created by markdown-it
  110. For example, attrs are converted to null or lists
  111. :param meta_serializer: hook for serializing ``Token.meta``
  112. :param filter: A callable whose return code determines whether an
  113. attribute or element is included (``True``) or dropped (``False``).
  114. Is called with the (key, value) pair.
  115. :param dict_factory: A callable to produce dictionaries from.
  116. For example, to produce ordered dictionaries instead of normal Python
  117. dictionaries, pass in ``collections.OrderedDict``.
  118. """
  119. mapping = dict_factory((f.name, getattr(self, f.name)) for f in dc.fields(self))
  120. if filter:
  121. mapping = dict_factory((k, v) for k, v in mapping.items() if filter(k, v))
  122. if as_upstream and "attrs" in mapping:
  123. mapping["attrs"] = (
  124. None
  125. if not mapping["attrs"]
  126. else [[k, v] for k, v in mapping["attrs"].items()]
  127. )
  128. if meta_serializer and "meta" in mapping:
  129. mapping["meta"] = meta_serializer(mapping["meta"])
  130. if children and mapping.get("children", None):
  131. mapping["children"] = [
  132. child.as_dict(
  133. children=children,
  134. filter=filter,
  135. dict_factory=dict_factory,
  136. as_upstream=as_upstream,
  137. meta_serializer=meta_serializer,
  138. )
  139. for child in mapping["children"]
  140. ]
  141. return mapping
  142. @classmethod
  143. def from_dict(cls, dct: MutableMapping[str, Any]) -> Token:
  144. """Convert a dict to a Token."""
  145. token = cls(**dct)
  146. if token.children:
  147. token.children = [cls.from_dict(c) for c in token.children] # type: ignore[arg-type]
  148. return token