reference.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. import logging
  2. from ..common.utils import charCodeAt, isSpace, normalizeReference
  3. from .state_block import StateBlock
  4. LOGGER = logging.getLogger(__name__)
  5. def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> bool:
  6. LOGGER.debug(
  7. "entering reference: %s, %s, %s, %s", state, startLine, _endLine, silent
  8. )
  9. pos = state.bMarks[startLine] + state.tShift[startLine]
  10. maximum = state.eMarks[startLine]
  11. nextLine = startLine + 1
  12. if state.is_code_block(startLine):
  13. return False
  14. if state.src[pos] != "[":
  15. return False
  16. string = state.src[pos : maximum + 1]
  17. # string = state.getLines(startLine, nextLine, state.blkIndent, False).strip()
  18. maximum = len(string)
  19. labelEnd = None
  20. pos = 1
  21. while pos < maximum:
  22. ch = charCodeAt(string, pos)
  23. if ch == 0x5B: # /* [ */
  24. return False
  25. elif ch == 0x5D: # /* ] */
  26. labelEnd = pos
  27. break
  28. elif ch == 0x0A: # /* \n */
  29. if (lineContent := getNextLine(state, nextLine)) is not None:
  30. string += lineContent
  31. maximum = len(string)
  32. nextLine += 1
  33. elif ch == 0x5C: # /* \ */
  34. pos += 1
  35. if (
  36. pos < maximum
  37. and charCodeAt(string, pos) == 0x0A
  38. and (lineContent := getNextLine(state, nextLine)) is not None
  39. ):
  40. string += lineContent
  41. maximum = len(string)
  42. nextLine += 1
  43. pos += 1
  44. if (
  45. labelEnd is None or labelEnd < 0 or charCodeAt(string, labelEnd + 1) != 0x3A
  46. ): # /* : */
  47. return False
  48. # [label]: destination 'title'
  49. # ^^^ skip optional whitespace here
  50. pos = labelEnd + 2
  51. while pos < maximum:
  52. ch = charCodeAt(string, pos)
  53. if ch == 0x0A:
  54. if (lineContent := getNextLine(state, nextLine)) is not None:
  55. string += lineContent
  56. maximum = len(string)
  57. nextLine += 1
  58. elif isSpace(ch):
  59. pass
  60. else:
  61. break
  62. pos += 1
  63. # [label]: destination 'title'
  64. # ^^^^^^^^^^^ parse this
  65. destRes = state.md.helpers.parseLinkDestination(string, pos, maximum)
  66. if not destRes.ok:
  67. return False
  68. href = state.md.normalizeLink(destRes.str)
  69. if not state.md.validateLink(href):
  70. return False
  71. pos = destRes.pos
  72. # save cursor state, we could require to rollback later
  73. destEndPos = pos
  74. destEndLineNo = nextLine
  75. # [label]: destination 'title'
  76. # ^^^ skipping those spaces
  77. start = pos
  78. while pos < maximum:
  79. ch = charCodeAt(string, pos)
  80. if ch == 0x0A:
  81. if (lineContent := getNextLine(state, nextLine)) is not None:
  82. string += lineContent
  83. maximum = len(string)
  84. nextLine += 1
  85. elif isSpace(ch):
  86. pass
  87. else:
  88. break
  89. pos += 1
  90. # [label]: destination 'title'
  91. # ^^^^^^^ parse this
  92. titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, None)
  93. while titleRes.can_continue:
  94. if (lineContent := getNextLine(state, nextLine)) is None:
  95. break
  96. string += lineContent
  97. pos = maximum
  98. maximum = len(string)
  99. nextLine += 1
  100. titleRes = state.md.helpers.parseLinkTitle(string, pos, maximum, titleRes)
  101. if pos < maximum and start != pos and titleRes.ok:
  102. title = titleRes.str
  103. pos = titleRes.pos
  104. else:
  105. title = ""
  106. pos = destEndPos
  107. nextLine = destEndLineNo
  108. # skip trailing spaces until the rest of the line
  109. while pos < maximum:
  110. ch = charCodeAt(string, pos)
  111. if not isSpace(ch):
  112. break
  113. pos += 1
  114. if pos < maximum and charCodeAt(string, pos) != 0x0A and title:
  115. # garbage at the end of the line after title,
  116. # but it could still be a valid reference if we roll back
  117. title = ""
  118. pos = destEndPos
  119. nextLine = destEndLineNo
  120. while pos < maximum:
  121. ch = charCodeAt(string, pos)
  122. if not isSpace(ch):
  123. break
  124. pos += 1
  125. if pos < maximum and charCodeAt(string, pos) != 0x0A:
  126. # garbage at the end of the line
  127. return False
  128. label = normalizeReference(string[1:labelEnd])
  129. if not label:
  130. # CommonMark 0.20 disallows empty labels
  131. return False
  132. # Reference can not terminate anything. This check is for safety only.
  133. if silent:
  134. return True
  135. if "references" not in state.env:
  136. state.env["references"] = {}
  137. state.line = nextLine
  138. # note, this is not part of markdown-it JS, but is useful for renderers
  139. if state.md.options.get("inline_definitions", False):
  140. token = state.push("definition", "", 0)
  141. token.meta = {
  142. "id": label,
  143. "title": title,
  144. "url": href,
  145. "label": string[1:labelEnd],
  146. }
  147. token.map = [startLine, state.line]
  148. if label not in state.env["references"]:
  149. state.env["references"][label] = {
  150. "title": title,
  151. "href": href,
  152. "map": [startLine, state.line],
  153. }
  154. else:
  155. state.env.setdefault("duplicate_refs", []).append(
  156. {
  157. "title": title,
  158. "href": href,
  159. "label": label,
  160. "map": [startLine, state.line],
  161. }
  162. )
  163. return True
  164. def getNextLine(state: StateBlock, nextLine: int) -> None | str:
  165. endLine = state.lineMax
  166. if nextLine >= endLine or state.isEmpty(nextLine):
  167. # empty line or end of input
  168. return None
  169. isContinuation = False
  170. # this would be a code block normally, but after paragraph
  171. # it's considered a lazy continuation regardless of what's there
  172. if state.is_code_block(nextLine):
  173. isContinuation = True
  174. # quirk for blockquotes, this line should already be checked by that rule
  175. if state.sCount[nextLine] < 0:
  176. isContinuation = True
  177. if not isContinuation:
  178. terminatorRules = state.md.block.ruler.getRules("reference")
  179. oldParentType = state.parentType
  180. state.parentType = "reference"
  181. # Some tags can terminate paragraph without empty line.
  182. terminate = False
  183. for terminatorRule in terminatorRules:
  184. if terminatorRule(state, nextLine, endLine, True):
  185. terminate = True
  186. break
  187. state.parentType = oldParentType
  188. if terminate:
  189. # terminated by another block
  190. return None
  191. pos = state.bMarks[nextLine] + state.tShift[nextLine]
  192. maximum = state.eMarks[nextLine]
  193. # max + 1 explicitly includes the newline
  194. return state.src[pos : maximum + 1]