table.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. # GFM table, https://github.github.com/gfm/#tables-extension-
  2. from __future__ import annotations
  3. import re
  4. from ..common.utils import charStrAt, isStrSpace
  5. from .state_block import StateBlock
  6. headerLineRe = re.compile(r"^:?-+:?$")
  7. enclosingPipesRe = re.compile(r"^\||\|$")
  8. # Limit the amount of empty autocompleted cells in a table,
  9. # see https://github.com/markdown-it/markdown-it/issues/1000,
  10. # Both pulldown-cmark and commonmark-hs limit the number of cells this way to ~200k.
  11. # We set it to 65k, which can expand user input by a factor of x370
  12. # (256x256 square is 1.8kB expanded into 650kB).
  13. MAX_AUTOCOMPLETED_CELLS = 0x10000
  14. def getLine(state: StateBlock, line: int) -> str:
  15. pos = state.bMarks[line] + state.tShift[line]
  16. maximum = state.eMarks[line]
  17. # return state.src.substr(pos, max - pos)
  18. return state.src[pos:maximum]
  19. def escapedSplit(string: str) -> list[str]:
  20. result: list[str] = []
  21. pos = 0
  22. max = len(string)
  23. isEscaped = False
  24. lastPos = 0
  25. current = ""
  26. ch = charStrAt(string, pos)
  27. while pos < max:
  28. if ch == "|":
  29. if not isEscaped:
  30. # pipe separating cells, '|'
  31. result.append(current + string[lastPos:pos])
  32. current = ""
  33. lastPos = pos + 1
  34. else:
  35. # escaped pipe, '\|'
  36. current += string[lastPos : pos - 1]
  37. lastPos = pos
  38. isEscaped = ch == "\\"
  39. pos += 1
  40. ch = charStrAt(string, pos)
  41. result.append(current + string[lastPos:])
  42. return result
  43. def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
  44. tbodyLines = None
  45. # should have at least two lines
  46. if startLine + 2 > endLine:
  47. return False
  48. nextLine = startLine + 1
  49. if state.sCount[nextLine] < state.blkIndent:
  50. return False
  51. if state.is_code_block(nextLine):
  52. return False
  53. # first character of the second line should be '|', '-', ':',
  54. # and no other characters are allowed but spaces;
  55. # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
  56. pos = state.bMarks[nextLine] + state.tShift[nextLine]
  57. if pos >= state.eMarks[nextLine]:
  58. return False
  59. first_ch = state.src[pos]
  60. pos += 1
  61. if first_ch not in ("|", "-", ":"):
  62. return False
  63. if pos >= state.eMarks[nextLine]:
  64. return False
  65. second_ch = state.src[pos]
  66. pos += 1
  67. if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
  68. return False
  69. # if first character is '-', then second character must not be a space
  70. # (due to parsing ambiguity with list)
  71. if first_ch == "-" and isStrSpace(second_ch):
  72. return False
  73. while pos < state.eMarks[nextLine]:
  74. ch = state.src[pos]
  75. if ch not in ("|", "-", ":") and not isStrSpace(ch):
  76. return False
  77. pos += 1
  78. lineText = getLine(state, startLine + 1)
  79. columns = lineText.split("|")
  80. aligns = []
  81. for i in range(len(columns)):
  82. t = columns[i].strip()
  83. if not t:
  84. # allow empty columns before and after table, but not in between columns;
  85. # e.g. allow ` |---| `, disallow ` ---||--- `
  86. if i == 0 or i == len(columns) - 1:
  87. continue
  88. else:
  89. return False
  90. if not headerLineRe.search(t):
  91. return False
  92. if charStrAt(t, len(t) - 1) == ":":
  93. aligns.append("center" if charStrAt(t, 0) == ":" else "right")
  94. elif charStrAt(t, 0) == ":":
  95. aligns.append("left")
  96. else:
  97. aligns.append("")
  98. lineText = getLine(state, startLine).strip()
  99. if "|" not in lineText:
  100. return False
  101. if state.is_code_block(startLine):
  102. return False
  103. columns = escapedSplit(lineText)
  104. if columns and columns[0] == "":
  105. columns.pop(0)
  106. if columns and columns[-1] == "":
  107. columns.pop()
  108. # header row will define an amount of columns in the entire table,
  109. # and align row should be exactly the same (the rest of the rows can differ)
  110. columnCount = len(columns)
  111. if columnCount == 0 or columnCount != len(aligns):
  112. return False
  113. if silent:
  114. return True
  115. oldParentType = state.parentType
  116. state.parentType = "table"
  117. # use 'blockquote' lists for termination because it's
  118. # the most similar to tables
  119. terminatorRules = state.md.block.ruler.getRules("blockquote")
  120. token = state.push("table_open", "table", 1)
  121. token.map = tableLines = [startLine, 0]
  122. token = state.push("thead_open", "thead", 1)
  123. token.map = [startLine, startLine + 1]
  124. token = state.push("tr_open", "tr", 1)
  125. token.map = [startLine, startLine + 1]
  126. for i in range(len(columns)):
  127. token = state.push("th_open", "th", 1)
  128. if aligns[i]:
  129. token.attrs = {"style": "text-align:" + aligns[i]}
  130. token = state.push("inline", "", 0)
  131. # note in markdown-it this map was removed in v12.0.0 however, we keep it,
  132. # since it is helpful to propagate to children tokens
  133. token.map = [startLine, startLine + 1]
  134. token.content = columns[i].strip()
  135. token.children = []
  136. token = state.push("th_close", "th", -1)
  137. token = state.push("tr_close", "tr", -1)
  138. token = state.push("thead_close", "thead", -1)
  139. autocompleted_cells = 0
  140. nextLine = startLine + 2
  141. while nextLine < endLine:
  142. if state.sCount[nextLine] < state.blkIndent:
  143. break
  144. terminate = False
  145. for i in range(len(terminatorRules)):
  146. if terminatorRules[i](state, nextLine, endLine, True):
  147. terminate = True
  148. break
  149. if terminate:
  150. break
  151. lineText = getLine(state, nextLine).strip()
  152. if not lineText:
  153. break
  154. if state.is_code_block(nextLine):
  155. break
  156. columns = escapedSplit(lineText)
  157. if columns and columns[0] == "":
  158. columns.pop(0)
  159. if columns and columns[-1] == "":
  160. columns.pop()
  161. # note: autocomplete count can be negative if user specifies more columns than header,
  162. # but that does not affect intended use (which is limiting expansion)
  163. autocompleted_cells += columnCount - len(columns)
  164. if autocompleted_cells > MAX_AUTOCOMPLETED_CELLS:
  165. break
  166. if nextLine == startLine + 2:
  167. token = state.push("tbody_open", "tbody", 1)
  168. token.map = tbodyLines = [startLine + 2, 0]
  169. token = state.push("tr_open", "tr", 1)
  170. token.map = [nextLine, nextLine + 1]
  171. for i in range(columnCount):
  172. token = state.push("td_open", "td", 1)
  173. if aligns[i]:
  174. token.attrs = {"style": "text-align:" + aligns[i]}
  175. token = state.push("inline", "", 0)
  176. # note in markdown-it this map was removed in v12.0.0 however, we keep it,
  177. # since it is helpful to propagate to children tokens
  178. token.map = [nextLine, nextLine + 1]
  179. try:
  180. token.content = columns[i].strip() if columns[i] else ""
  181. except IndexError:
  182. token.content = ""
  183. token.children = []
  184. token = state.push("td_close", "td", -1)
  185. token = state.push("tr_close", "tr", -1)
  186. nextLine += 1
  187. if tbodyLines:
  188. token = state.push("tbody_close", "tbody", -1)
  189. tbodyLines[1] = nextLine
  190. token = state.push("table_close", "table", -1)
  191. tableLines[1] = nextLine
  192. state.parentType = oldParentType
  193. state.line = nextLine
  194. return True