git @ Cat's Eye Technologies Cleandown / 7e8cda4
Update included Marko sources to be on par w/ 2.1.2 from upstream. Chris Pressey 6 months ago
22 changed file(s) with 67 addition(s) and 76 deletion(s). Raw diff Collapse all Expand all
1212 Licensed under MIT.
1313 Created by Frost Ming<mianghong@gmail.com>
1414 """
15
1516 from __future__ import annotations
1617
1718 from typing import TYPE_CHECKING, Iterable, cast
2526 from .block import Document
2627 from .parser import ElementType
2728
28 __version__ = "2.0.2"
29 __version__ = "2.1.2"
2930
3031
3132 class SetupDone(Exception):
132133 Override this to handle parsed result.
133134 """
134135 self._setup_extensions()
135 self.renderer.root_node = parsed
136136 with self.renderer as r:
137137 return r.render(parsed)
138138
1212 Licensed under MIT.
1313 Created by Frost Ming<mianghong@gmail.com>
1414 """
15
1516 from . import cli
1617
1718 if __name__ == "__main__":
44 """
55 AST renderers for inspecting the markdown parsing result.
66 """
7
78 from __future__ import annotations
89
910 import html
3839 def render_raw_text(self, element: inline.RawText) -> dict[str, Any]:
3940 return {
4041 "element": "raw_text",
41 "children": html.unescape(element.children)
42 if element.escape
43 else element.children,
42 "children": (
43 html.unescape(element.children) if element.escape else element.children
44 ),
4445 "escape": element.escape,
4546 }
4647
4748 @overload
48 def render_children(self, element: list[Element]) -> list[dict[str, Any]]:
49 ...
49 def render_children(self, element: list[Element]) -> list[dict[str, Any]]: ...
5050
5151 @overload
52 def render_children(self, element: Element) -> dict[str, Any]:
53 ...
52 def render_children(self, element: Element) -> dict[str, Any]: ...
5453
5554 @overload
56 def render_children(self, element: str) -> str:
57 ...
55 def render_children(self, element: str) -> str: ...
5856
5957 def render_children(self, element):
6058 if isinstance(element, list):
44 """
55 Block level elements
66 """
7
78 from __future__ import annotations
89
910 import re
44 """
55 Command line interfaces
66 """
7
78 import codecs
89 import importlib
910 import sys
2121 """
2222
2323 # Prevent override of BlockElement and InlineElement
24 if cls.override and cls.__base__ not in Element.__subclasses__():
24 if (
25 cls.override
26 and cls.__base__
27 and cls.__base__ not in Element.__subclasses__()
28 ):
2529 name = cls.__base__.__name__
2630 else:
2731 name = cls.__name__
1919 markdown = Markdown(extensions=['codehilite'])
2020 markdown.convert('```python filename="my_script.py"\nprint('hello world')\n```')
2121 """
22
2223 import json
2324
2425 from pygments import highlight
2626 GFM = MarkoExtension(
2727 elements=[
2828 elements.Paragraph,
29 elements.InlineHTML,
3029 elements.Strikethrough,
3130 elements.Url,
3231 elements.Table,
44 """
55 Extra elements
66 """
7
78 from __future__ import annotations
89
910 import itertools
1011 import re
1112 from typing import Any, cast
1213
13 from marko import block, inline, patterns
14 from marko import block, inline
1415 from marko.source import Source
1516
1617
2425 if m:
2526 self.checked = m.group(1)[1:-1].lower() == "x"
2627 self.inline_body = self.inline_body[m.end(1) :]
27
28
29 class InlineHTML(inline.InlineHTML):
30 pattern = re.compile(
31 r"(<%s(?:%s)* */?>" # open tag
32 r"|</%s *>" # closing tag
33 r"|<!--(?:>|->|[\s\S]*?-->)" # HTML comment
34 r"|<\?[\s\S]*?\?>" # processing instruction
35 r"|<![A-Z]+ +[\s\S]*?>" # declaration
36 r"|<!\[CDATA\[[\s\S]*?\]\]>)" # CDATA section
37 % (patterns.tag_name, patterns.attribute, patterns.tag_name)
38 )
3928
4029
4130 class Strikethrough(inline.InlineElement):
4343
4444 @render_dispatch(HTMLRenderer)
4545 def render_strikethrough(self, element):
46 return "<del>{}</del>".format(self.render_children(element))
46 return f"<del>{self.render_children(element)}</del>"
4747
4848 @render_strikethrough.dispatch(MarkdownRenderer)
4949 def render_strikethrough(self, element):
6060 @render_dispatch(HTMLRenderer)
6161 def render_table(self, element):
6262 head, *body = element.children
63 theader = "<thead>\n{}</thead>".format(self.render(head))
63 theader = f"<thead>\n{self.render(head)}</thead>"
6464 tbody = ""
6565 if body:
6666 tbody = "\n<tbody>\n{}</tbody>".format(
8080
8181 @render_dispatch(HTMLRenderer)
8282 def render_table_row(self, element):
83 return "<tr>\n{}</tr>\n".format(self.render_children(element))
83 return f"<tr>\n{self.render_children(element)}</tr>\n"
8484
8585 @render_table_row.dispatch(MarkdownRenderer)
8686 def render_table_row(self, element):
44 """
55 LaTeX renderer
66 """
7
78 from __future__ import annotations
89
910 import logging
1919 markdown = Markdown(extensions=['pangu'])
2020 print(markdown(text))
2121 """
22
2223 import re
2324
2425 from marko import HTMLRenderer
2525 print(markdown.renderer.render_toc())
2626
2727 """
28
2829 import re
2930
3031 from slugify import slugify
44 """
55 Helper functions and data structures
66 """
7
78 from __future__ import annotations
89
910 import dataclasses
172173 raise NotImplementedError(f"Unsupported renderer {type(r)}") from None
173174
174175 @overload
175 def __get__(self: D, obj: None, owner: type) -> D:
176 ...
176 def __get__(self: D, obj: None, owner: type) -> D: ...
177177
178178 @overload
179 def __get__(self: D, obj: Renderer, owner: type) -> RendererFunc:
180 ...
179 def __get__(self: D, obj: Renderer, owner: type) -> RendererFunc: ...
181180
182181 def __get__(self: D, obj: Renderer | None, owner: type) -> RendererFunc | D:
183182 if obj is None:
44 """
55 HTML renderer
66 """
7
78 from __future__ import annotations
89
910 import html
44 """
55 Inline(span) level elements
66 """
7
78 from __future__ import annotations
89
910 import re
9495 pattern = re.compile(
9596 r"(<%s(?:%s)* */?>" # open tag
9697 r"|</%s *>" # closing tag
97 r"|<!--(?!>|->|[\s\S]*?--[\s\S]*?-->)[\s\S]*?(?<!-)-->" # HTML comment
98 r"|<!--(?:>|->|[\s\S]*?-->)" # HTML comment
9899 r"|<\?[\s\S]*?\?>" # processing instruction
99100 r"|<![A-Z]+ +[\s\S]*?>" # declaration
100101 r"|<!\[CDATA\[[\s\S]*?\]\]>)" # CDATA section
44 """
55 Parse inline elements
66 """
7
78 from __future__ import annotations
89
910 import re
304305 else:
305306 escaped = False
306307 pairs = 0
307 i = 0
308 for i, c in enumerate(link_text[start:], start):
308 i = start
309 while i < len(link_text):
310 c = link_text[i]
309311 if escaped:
310312 escaped = False
311313 elif c == "\\":
324326 return link_dest, _EMPTY_GROUP
325327 else:
326328 raise ParseError("unmatched parenthesis")
329 i += 1
327330 else:
328331 if is_inline:
329332 raise ParseError("No right parenthesis is found")
515518 )
516519
517520 def followed_by_punc(self) -> bool:
518 return (
519 self.end < len(self.text)
520 and patterns.punctuation.match(self.text, self.end) is not None
521 return self.end < len(self.text) and patterns.is_punctuation(
522 self.text[self.end]
521523 )
522524
523525 def preceded_by_punc(self) -> bool:
524 return (
525 self.start > 0
526 and patterns.punctuation.match(self.text[self.start - 1]) is not None
527 )
526 return self.start > 0 and patterns.is_punctuation(self.text[self.start - 1])
528527
529528 def closed_by(self, other: Delimiter) -> bool:
530529 return not (
44 """
55 Markdown renderer
66 """
7
78 from __future__ import annotations
89
910 import re
44 """
55 Base parser
66 """
7
78 from __future__ import annotations
89
910 import itertools
44 """
55 Some regex patterns
66 """
7
8 import functools
79 import re
10 import string
11 import unicodedata
812
913 tags = [
1014 "address",
8892 r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]"
8993 r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"
9094 )
91 punctuation = re.compile(
92 r'[!"#$%&\'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB'
93 r"\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3"
94 r"\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F"
95 r"\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E"
96 r"\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12"
97 r"\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB"
98 r"\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736"
99 r"\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-"
100 r"\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F"
101 r"\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E"
102 r"\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5"
103 r"\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC"
104 r"\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011"
105 r"\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673"
106 r"\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E"
107 r"\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0"
108 r"\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63"
109 r"\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B"
110 r"\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-"
111 r"\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58"
112 r"\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D"
113 r"\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD"
114 r"\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7"
115 r"\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F"
116 r"\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]"
117 )
95
96
97 @functools.lru_cache(maxsize=128)
98 def is_punctuation(ch: str) -> bool:
99 if ch in string.punctuation:
100 return True
101 category = unicodedata.category(ch)
102 return category.startswith("P") or category.startswith("S")
44 """
55 Base renderer class
66 """
7
78 from __future__ import annotations
89
910 import html
5455
5556 def __exit__(self, *args: Any) -> None:
5657 html._charref = _charref_bak # type: ignore[attr-defined]
58 self.root_node = None
5759
5860 def render(self, element: Element) -> Any:
5961 """Renders the given element to string.
6163 :param element: a element to be rendered.
6264 :returns: the output string or any values.
6365 """
66 from .block import Document
67
6468 # Store the root node since it may be required by the render functions
6569 if not self.root_node: # pragma: no cover
66 self.root_node = element # type: ignore
70 if isinstance(element, Document):
71 self.root_node = element
72 else:
73 # Make a dummy root node from it
74 self.root_node = Document()
75 self.root_node.children = [element]
6776 if hasattr(element, "get_type"):
6877 func_name = "render_" + element.get_type(snake_case=True)
6978 render_func = getattr(self, func_name, None)
8080 return regexp.match(self._buffer, pos)
8181
8282 @staticmethod
83 @functools.lru_cache()
83 @functools.lru_cache
8484 def match_prefix(prefix: str, line: str) -> int:
8585 """Check if the line starts with given prefix and
8686 return the position of the end of prefix.
115115 return None
116116
117117 @overload
118 def next_line(self, require_prefix: Literal[False] = ...) -> str:
119 ...
118 def next_line(self, require_prefix: Literal[False] = ...) -> str: ...
120119
121120 @overload
122 def next_line(self, require_prefix: Literal[True] = ...) -> str | None:
123 ...
121 def next_line(self, require_prefix: Literal[True] = ...) -> str | None: ...
124122
125123 def next_line(self, require_prefix: bool = True) -> str | None:
126124 """Return the next line in the source.