Update included Marko sources to be on par w/ 2.1.2 from upstream.
Chris Pressey
6 months ago
12 | 12 | Licensed under MIT. |
13 | 13 | Created by Frost Ming<mianghong@gmail.com> |
14 | 14 | """ |
15 | ||
15 | 16 | from __future__ import annotations |
16 | 17 | |
17 | 18 | from typing import TYPE_CHECKING, Iterable, cast |
25 | 26 | from .block import Document |
26 | 27 | from .parser import ElementType |
27 | 28 | |
28 | __version__ = "2.0.2" | |
29 | __version__ = "2.1.2" | |
29 | 30 | |
30 | 31 | |
31 | 32 | class SetupDone(Exception): |
132 | 133 | Override this to handle parsed result. |
133 | 134 | """ |
134 | 135 | self._setup_extensions() |
135 | self.renderer.root_node = parsed | |
136 | 136 | with self.renderer as r: |
137 | 137 | return r.render(parsed) |
138 | 138 |
12 | 12 | Licensed under MIT. |
13 | 13 | Created by Frost Ming<mianghong@gmail.com> |
14 | 14 | """ |
15 | ||
15 | 16 | from . import cli |
16 | 17 | |
17 | 18 | if __name__ == "__main__": |
4 | 4 | """ |
5 | 5 | AST renderers for inspecting the markdown parsing result. |
6 | 6 | """ |
7 | ||
7 | 8 | from __future__ import annotations |
8 | 9 | |
9 | 10 | import html |
38 | 39 | def render_raw_text(self, element: inline.RawText) -> dict[str, Any]: |
39 | 40 | return { |
40 | 41 | "element": "raw_text", |
41 | "children": html.unescape(element.children) | |
42 | if element.escape | |
43 | else element.children, | |
42 | "children": ( | |
43 | html.unescape(element.children) if element.escape else element.children | |
44 | ), | |
44 | 45 | "escape": element.escape, |
45 | 46 | } |
46 | 47 | |
47 | 48 | @overload |
48 | def render_children(self, element: list[Element]) -> list[dict[str, Any]]: | |
49 | ... | |
49 | def render_children(self, element: list[Element]) -> list[dict[str, Any]]: ... | |
50 | 50 | |
51 | 51 | @overload |
52 | def render_children(self, element: Element) -> dict[str, Any]: | |
53 | ... | |
52 | def render_children(self, element: Element) -> dict[str, Any]: ... | |
54 | 53 | |
55 | 54 | @overload |
56 | def render_children(self, element: str) -> str: | |
57 | ... | |
55 | def render_children(self, element: str) -> str: ... | |
58 | 56 | |
59 | 57 | def render_children(self, element): |
60 | 58 | if isinstance(element, list): |
21 | 21 | """ |
22 | 22 | |
23 | 23 | # Prevent override of BlockElement and InlineElement |
24 | if cls.override and cls.__base__ not in Element.__subclasses__(): | |
24 | if ( | |
25 | cls.override | |
26 | and cls.__base__ | |
27 | and cls.__base__ not in Element.__subclasses__() | |
28 | ): | |
25 | 29 | name = cls.__base__.__name__ |
26 | 30 | else: |
27 | 31 | name = cls.__name__ |
19 | 19 | markdown = Markdown(extensions=['codehilite']) |
20 | 20 | markdown.convert('```python filename="my_script.py"\nprint('hello world')\n```') |
21 | 21 | """ |
22 | ||
22 | 23 | import json |
23 | 24 | |
24 | 25 | from pygments import highlight |
26 | 26 | GFM = MarkoExtension( |
27 | 27 | elements=[ |
28 | 28 | elements.Paragraph, |
29 | elements.InlineHTML, | |
30 | 29 | elements.Strikethrough, |
31 | 30 | elements.Url, |
32 | 31 | elements.Table, |
4 | 4 | """ |
5 | 5 | Extra elements |
6 | 6 | """ |
7 | ||
7 | 8 | from __future__ import annotations |
8 | 9 | |
9 | 10 | import itertools |
10 | 11 | import re |
11 | 12 | from typing import Any, cast |
12 | 13 | |
13 | from marko import block, inline, patterns | |
14 | from marko import block, inline | |
14 | 15 | from marko.source import Source |
15 | 16 | |
16 | 17 | |
24 | 25 | if m: |
25 | 26 | self.checked = m.group(1)[1:-1].lower() == "x" |
26 | 27 | self.inline_body = self.inline_body[m.end(1) :] |
27 | ||
28 | ||
29 | class InlineHTML(inline.InlineHTML): | |
30 | pattern = re.compile( | |
31 | r"(<%s(?:%s)* */?>" # open tag | |
32 | r"|</%s *>" # closing tag | |
33 | r"|<!--(?:>|->|[\s\S]*?-->)" # HTML comment | |
34 | r"|<\?[\s\S]*?\?>" # processing instruction | |
35 | r"|<![A-Z]+ +[\s\S]*?>" # declaration | |
36 | r"|<!\[CDATA\[[\s\S]*?\]\]>)" # CDATA section | |
37 | % (patterns.tag_name, patterns.attribute, patterns.tag_name) | |
38 | ) | |
39 | 28 | |
40 | 29 | |
41 | 30 | class Strikethrough(inline.InlineElement): |
43 | 43 | |
44 | 44 | @render_dispatch(HTMLRenderer) |
45 | 45 | def render_strikethrough(self, element): |
46 | return "<del>{}</del>".format(self.render_children(element)) | |
46 | return f"<del>{self.render_children(element)}</del>" | |
47 | 47 | |
48 | 48 | @render_strikethrough.dispatch(MarkdownRenderer) |
49 | 49 | def render_strikethrough(self, element): |
60 | 60 | @render_dispatch(HTMLRenderer) |
61 | 61 | def render_table(self, element): |
62 | 62 | head, *body = element.children |
63 | theader = "<thead>\n{}</thead>".format(self.render(head)) | |
63 | theader = f"<thead>\n{self.render(head)}</thead>" | |
64 | 64 | tbody = "" |
65 | 65 | if body: |
66 | 66 | tbody = "\n<tbody>\n{}</tbody>".format( |
80 | 80 | |
81 | 81 | @render_dispatch(HTMLRenderer) |
82 | 82 | def render_table_row(self, element): |
83 | return "<tr>\n{}</tr>\n".format(self.render_children(element)) | |
83 | return f"<tr>\n{self.render_children(element)}</tr>\n" | |
84 | 84 | |
85 | 85 | @render_table_row.dispatch(MarkdownRenderer) |
86 | 86 | def render_table_row(self, element): |
19 | 19 | markdown = Markdown(extensions=['pangu']) |
20 | 20 | print(markdown(text)) |
21 | 21 | """ |
22 | ||
22 | 23 | import re |
23 | 24 | |
24 | 25 | from marko import HTMLRenderer |
25 | 25 | print(markdown.renderer.render_toc()) |
26 | 26 | |
27 | 27 | """ |
28 | ||
28 | 29 | import re |
29 | 30 | |
30 | 31 | from slugify import slugify |
4 | 4 | """ |
5 | 5 | Helper functions and data structures |
6 | 6 | """ |
7 | ||
7 | 8 | from __future__ import annotations |
8 | 9 | |
9 | 10 | import dataclasses |
172 | 173 | raise NotImplementedError(f"Unsupported renderer {type(r)}") from None |
173 | 174 | |
174 | 175 | @overload |
175 | def __get__(self: D, obj: None, owner: type) -> D: | |
176 | ... | |
176 | def __get__(self: D, obj: None, owner: type) -> D: ... | |
177 | 177 | |
178 | 178 | @overload |
179 | def __get__(self: D, obj: Renderer, owner: type) -> RendererFunc: | |
180 | ... | |
179 | def __get__(self: D, obj: Renderer, owner: type) -> RendererFunc: ... | |
181 | 180 | |
182 | 181 | def __get__(self: D, obj: Renderer | None, owner: type) -> RendererFunc | D: |
183 | 182 | if obj is None: |
4 | 4 | """ |
5 | 5 | Inline(span) level elements |
6 | 6 | """ |
7 | ||
7 | 8 | from __future__ import annotations |
8 | 9 | |
9 | 10 | import re |
94 | 95 | pattern = re.compile( |
95 | 96 | r"(<%s(?:%s)* */?>" # open tag |
96 | 97 | r"|</%s *>" # closing tag |
97 | r"|<!--(?!>|->|[\s\S]*?--[\s\S]*?-->)[\s\S]*?(?<!-)-->" # HTML comment | |
98 | r"|<!--(?:>|->|[\s\S]*?-->)" # HTML comment | |
98 | 99 | r"|<\?[\s\S]*?\?>" # processing instruction |
99 | 100 | r"|<![A-Z]+ +[\s\S]*?>" # declaration |
100 | 101 | r"|<!\[CDATA\[[\s\S]*?\]\]>)" # CDATA section |
4 | 4 | """ |
5 | 5 | Parse inline elements |
6 | 6 | """ |
7 | ||
7 | 8 | from __future__ import annotations |
8 | 9 | |
9 | 10 | import re |
304 | 305 | else: |
305 | 306 | escaped = False |
306 | 307 | pairs = 0 |
307 | i = 0 | |
308 | for i, c in enumerate(link_text[start:], start): | |
308 | i = start | |
309 | while i < len(link_text): | |
310 | c = link_text[i] | |
309 | 311 | if escaped: |
310 | 312 | escaped = False |
311 | 313 | elif c == "\\": |
324 | 326 | return link_dest, _EMPTY_GROUP |
325 | 327 | else: |
326 | 328 | raise ParseError("unmatched parenthesis") |
329 | i += 1 | |
327 | 330 | else: |
328 | 331 | if is_inline: |
329 | 332 | raise ParseError("No right parenthesis is found") |
515 | 518 | ) |
516 | 519 | |
517 | 520 | def followed_by_punc(self) -> bool: |
518 | return ( | |
519 | self.end < len(self.text) | |
520 | and patterns.punctuation.match(self.text, self.end) is not None | |
521 | return self.end < len(self.text) and patterns.is_punctuation( | |
522 | self.text[self.end] | |
521 | 523 | ) |
522 | 524 | |
523 | 525 | def preceded_by_punc(self) -> bool: |
524 | return ( | |
525 | self.start > 0 | |
526 | and patterns.punctuation.match(self.text[self.start - 1]) is not None | |
527 | ) | |
526 | return self.start > 0 and patterns.is_punctuation(self.text[self.start - 1]) | |
528 | 527 | |
529 | 528 | def closed_by(self, other: Delimiter) -> bool: |
530 | 529 | return not ( |
4 | 4 | """ |
5 | 5 | Some regex patterns |
6 | 6 | """ |
7 | ||
8 | import functools | |
7 | 9 | import re |
10 | import string | |
11 | import unicodedata | |
8 | 12 | |
9 | 13 | tags = [ |
10 | 14 | "address", |
88 | 92 | r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]" |
89 | 93 | r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*" |
90 | 94 | ) |
91 | punctuation = re.compile( | |
92 | r'[!"#$%&\'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB' | |
93 | r"\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3" | |
94 | r"\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F" | |
95 | r"\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E" | |
96 | r"\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12" | |
97 | r"\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB" | |
98 | r"\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736" | |
99 | r"\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-" | |
100 | r"\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F" | |
101 | r"\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E" | |
102 | r"\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5" | |
103 | r"\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC" | |
104 | r"\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011" | |
105 | r"\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673" | |
106 | r"\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E" | |
107 | r"\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0" | |
108 | r"\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63" | |
109 | r"\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B" | |
110 | r"\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-" | |
111 | r"\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58" | |
112 | r"\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D" | |
113 | r"\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD" | |
114 | r"\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7" | |
115 | r"\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F" | |
116 | r"\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]" | |
117 | ) | |
95 | ||
96 | ||
97 | @functools.lru_cache(maxsize=128) | |
98 | def is_punctuation(ch: str) -> bool: | |
99 | if ch in string.punctuation: | |
100 | return True | |
101 | category = unicodedata.category(ch) | |
102 | return category.startswith("P") or category.startswith("S") |
4 | 4 | """ |
5 | 5 | Base renderer class |
6 | 6 | """ |
7 | ||
7 | 8 | from __future__ import annotations |
8 | 9 | |
9 | 10 | import html |
54 | 55 | |
55 | 56 | def __exit__(self, *args: Any) -> None: |
56 | 57 | html._charref = _charref_bak # type: ignore[attr-defined] |
58 | self.root_node = None | |
57 | 59 | |
58 | 60 | def render(self, element: Element) -> Any: |
59 | 61 | """Renders the given element to string. |
61 | 63 | :param element: a element to be rendered. |
62 | 64 | :returns: the output string or any values. |
63 | 65 | """ |
66 | from .block import Document | |
67 | ||
64 | 68 | # Store the root node since it may be required by the render functions |
65 | 69 | if not self.root_node: # pragma: no cover |
66 | self.root_node = element # type: ignore | |
70 | if isinstance(element, Document): | |
71 | self.root_node = element | |
72 | else: | |
73 | # Make a dummy root node from it | |
74 | self.root_node = Document() | |
75 | self.root_node.children = [element] | |
67 | 76 | if hasattr(element, "get_type"): |
68 | 77 | func_name = "render_" + element.get_type(snake_case=True) |
69 | 78 | render_func = getattr(self, func_name, None) |
80 | 80 | return regexp.match(self._buffer, pos) |
81 | 81 | |
82 | 82 | @staticmethod |
83 | @functools.lru_cache() | |
83 | @functools.lru_cache | |
84 | 84 | def match_prefix(prefix: str, line: str) -> int: |
85 | 85 | """Check if the line starts with given prefix and |
86 | 86 | return the position of the end of prefix. |
115 | 115 | return None |
116 | 116 | |
117 | 117 | @overload |
118 | def next_line(self, require_prefix: Literal[False] = ...) -> str: | |
119 | ... | |
118 | def next_line(self, require_prefix: Literal[False] = ...) -> str: ... | |
120 | 119 | |
121 | 120 | @overload |
122 | def next_line(self, require_prefix: Literal[True] = ...) -> str | None: | |
123 | ... | |
121 | def next_line(self, require_prefix: Literal[True] = ...) -> str | None: ... | |
124 | 122 | |
125 | 123 | def next_line(self, require_prefix: bool = True) -> str | None: |
126 | 124 | """Return the next line in the source. |