Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️ Refactor backslash escape logic #276

Merged
merged 2 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion markdown_it/parser_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
from __future__ import annotations

from .ruler import RuleFunc, Ruler
from .rules_core import block, inline, linkify, normalize, replace, smartquotes
from .rules_core import (
block,
inline,
linkify,
normalize,
replace,
smartquotes,
text_join,
)
from .rules_core.state_core import StateCore

_rules: list[tuple[str, RuleFunc]] = [
Expand All @@ -17,6 +25,7 @@
("linkify", linkify),
("replacements", replace),
("smartquotes", smartquotes),
("text_join", text_join),
]


Expand Down
9 changes: 8 additions & 1 deletion markdown_it/parser_inline.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,18 @@
("entity", rules_inline.entity),
]

# Note `rule2` ruleset was created specifically for emphasis/strikethrough
# post-processing and may be changed in the future.
#
# Don't use this for anything except pairs (plugins working with `balance_pairs`).
#
_rules2: list[tuple[str, RuleFunc]] = [
("balance_pairs", rules_inline.link_pairs),
("strikethrough", rules_inline.strikethrough.postProcess),
("emphasis", rules_inline.emphasis.postProcess),
("text_collapse", rules_inline.text_collapse),
# rules for pairs separate '**' into its own text tokens, which may be left unused,
# rule below merges unused segments back with the rest of the text
("fragments_join", rules_inline.fragments_join),
]


Expand Down
4 changes: 2 additions & 2 deletions markdown_it/presets/commonmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def make() -> PresetType:
"highlight": None,
},
"components": {
"core": {"rules": ["normalize", "block", "inline"]},
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
"block": {
"rules": [
"blockquote",
Expand Down Expand Up @@ -68,7 +68,7 @@ def make() -> PresetType:
"newline",
"text",
],
"rules2": ["balance_pairs", "emphasis", "text_collapse"],
"rules2": ["balance_pairs", "emphasis", "fragments_join"],
},
},
}
7 changes: 5 additions & 2 deletions markdown_it/presets/zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ def make() -> PresetType:
"highlight": None,
},
"components": {
"core": {"rules": ["normalize", "block", "inline"]},
"core": {"rules": ["normalize", "block", "inline", "text_join"]},
"block": {"rules": ["paragraph"]},
"inline": {"rules": ["text"], "rules2": ["balance_pairs", "text_collapse"]},
"inline": {
"rules": ["text"],
"rules2": ["balance_pairs", "fragments_join"],
},
},
}
2 changes: 2 additions & 0 deletions markdown_it/rules_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"replace",
"smartquotes",
"linkify",
"text_join",
)

from .block import block
Expand All @@ -15,3 +16,4 @@
from .replacements import replace
from .smartquotes import smartquotes
from .state_core import StateCore
from .text_join import text_join
34 changes: 34 additions & 0 deletions markdown_it/rules_core/text_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Join raw text tokens with the rest of the text

This is set as a separate rule to provide an opportunity for plugins
to run text replacements after text join, but before escape join.

For example, `\\:)` shouldn't be replaced with an emoji.
"""
from __future__ import annotations

from ..token import Token
from .state_core import StateCore


def text_join(state: StateCore) -> None:
"""Join raw text for escape sequences (`text_special`) tokens with the rest of the text"""

for inline_token in state.tokens[:]:
if inline_token.type != "inline":
continue

# convert text_special to text and join all adjacent text nodes
new_tokens: list[Token] = []
for child_token in inline_token.children or []:
if child_token.type == "text_special":
child_token.type = "text"
if (
child_token.type == "text"
and new_tokens
and new_tokens[-1].type == "text"
):
new_tokens[-1].content += child_token.content
else:
new_tokens.append(child_token)
inline_token.children = new_tokens
4 changes: 2 additions & 2 deletions markdown_it/rules_inline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__all__ = (
"StateInline",
"text",
"text_collapse",
"fragments_join",
"link_pairs",
"escape",
"newline",
Expand All @@ -20,10 +20,10 @@
from .balance_pairs import link_pairs
from .entity import entity
from .escape import escape
from .fragments_join import fragments_join
from .html_inline import html_inline
from .image import image
from .link import link
from .newline import newline
from .state_inline import StateInline
from .text import text
from .text_collapse import text_collapse
91 changes: 52 additions & 39 deletions markdown_it/rules_inline/escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,58 @@
from ..common.utils import isStrSpace
from .state_inline import StateInline


def escape(state: StateInline, silent: bool) -> bool:
"""Process escaped chars and hardbreaks."""
pos = state.pos
maximum = state.posMax

if state.src[pos] != "\\":
return False

pos += 1

# '\' at the end of the inline block
if pos >= maximum:
return False

ch1 = state.src[pos]
ch1_ord = ord(ch1)
if ch1 == "\n":
if not silent:
state.push("hardbreak", "br", 0)
pos += 1
# skip leading whitespaces from next line
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
break
pos += 1

state.pos = pos
return True

escapedStr = state.src[pos]

if ch1_ord >= 0xD800 and ch1_ord <= 0xDBFF and pos + 1 < maximum:
ch2 = state.src[pos + 1]
ch2_ord = ord(ch2)
if ch2_ord >= 0xDC00 and ch2_ord <= 0xDFFF:
escapedStr += ch2
pos += 1

origStr = "\\" + escapedStr

if not silent:
token = state.push("text_special", "", 0)
token.content = escapedStr if ch1 in _ESCAPED else origStr
token.markup = origStr
token.info = "escape"

state.pos = pos + 1
return True


_ESCAPED = {
"!",
'"',
Expand Down Expand Up @@ -38,42 +90,3 @@
"}",
"~",
}


def escape(state: StateInline, silent: bool) -> bool:
pos = state.pos
maximum = state.posMax

if state.src[pos] != "\\":
return False

pos += 1

if pos < maximum:
ch = state.src[pos]

if ch in _ESCAPED:
if not silent:
state.pending += state.src[pos]
state.pos += 2
return True

if ch == "\n":
if not silent:
state.push("hardbreak", "br", 0)

pos += 1
# skip leading whitespaces from next line
while pos < maximum:
ch = state.src[pos]
if not isStrSpace(ch):
break
pos += 1

state.pos = pos
return True

if not silent:
state.pending += "\\"
state.pos += 1
return True
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .state_inline import StateInline


def text_collapse(state: StateInline) -> None:
def fragments_join(state: StateInline) -> None:
"""
Clean up tokens after emphasis and strikethrough postprocessing:
merge adjacent text nodes into one and re-calculate all token levels
Expand Down
35 changes: 18 additions & 17 deletions tests/test_api/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def test_get_rules():
"linkify",
"replacements",
"smartquotes",
"text_join",
],
"block": [
"table",
Expand Down Expand Up @@ -40,21 +41,21 @@ def test_get_rules():
"html_inline",
"entity",
],
"inline2": ["balance_pairs", "strikethrough", "emphasis", "text_collapse"],
"inline2": ["balance_pairs", "strikethrough", "emphasis", "fragments_join"],
}


def test_load_presets():
md = MarkdownIt("zero")
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
md = MarkdownIt("commonmark")
assert md.get_active_rules() == {
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"block": [
"code",
"fence",
Expand All @@ -79,7 +80,7 @@ def test_load_presets():
"html_inline",
"entity",
],
"inline2": ["balance_pairs", "emphasis", "text_collapse"],
"inline2": ["balance_pairs", "emphasis", "fragments_join"],
}


Expand All @@ -94,33 +95,33 @@ def test_enable():
md = MarkdownIt("zero").enable("heading")
assert md.get_active_rules() == {
"block": ["heading", "paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
md.enable(["backticks", "autolink"])
assert md.get_active_rules() == {
"block": ["heading", "paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text", "backticks", "autolink"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}


def test_disable():
md = MarkdownIt("zero").disable("inline")
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block"],
"core": ["normalize", "block", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
md.disable(["text"])
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block"],
"core": ["normalize", "block", "text_join"],
"inline": [],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}


Expand All @@ -130,15 +131,15 @@ def test_reset():
md.disable("inline")
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block"],
"core": ["normalize", "block", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}
assert md.get_active_rules() == {
"block": ["paragraph"],
"core": ["normalize", "block", "inline"],
"core": ["normalize", "block", "inline", "text_join"],
"inline": ["text"],
"inline2": ["balance_pairs", "text_collapse"],
"inline2": ["balance_pairs", "fragments_join"],
}


Expand Down
9 changes: 8 additions & 1 deletion tests/test_port/fixtures/linkify.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,11 @@ after
<p>before</p>
<p><a href="http://github.com">github.com</a></p>
<p>after</p>
.
.

Don't match escaped
.
google\.com
.
<p>google.com</p>
.
15 changes: 14 additions & 1 deletion tests/test_port/fixtures/smartquotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,17 @@ Should parse quotes adjacent to inline html, #677:
.
<p>“test <br>”</p>
<p>“<br> test”</p>
.
.

Should be escapable:
.
"foo"

\"foo"

"foo\"
.
<p>“foo”</p>
<p>&quot;foo&quot;</p>
<p>&quot;foo&quot;</p>
.
Loading