From 3c485075a6e004f558a0200b09835bc39fa00f2e Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 02:34:40 +0200 Subject: [PATCH 1/3] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20REFACTOR:=20Replace=20?= =?UTF-8?q?character=20codes=20with=20strings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markdown_it/common/utils.py | 34 ++++++++-- markdown_it/helpers/parse_link_destination.py | 2 +- markdown_it/helpers/parse_link_label.py | 6 +- markdown_it/main.py | 2 +- markdown_it/parser_block.py | 9 +-- markdown_it/ruler.py | 16 ++++- markdown_it/rules_block/blockquote.py | 32 +++++----- markdown_it/rules_block/fence.py | 14 ++--- markdown_it/rules_block/heading.py | 20 +++--- markdown_it/rules_block/hr.py | 14 ++--- markdown_it/rules_block/html_block.py | 2 +- markdown_it/rules_block/lheading.py | 13 ++-- markdown_it/rules_block/list.py | 48 +++++++------- markdown_it/rules_block/reference.py | 6 +- markdown_it/rules_block/state_block.py | 59 +++++++++-------- markdown_it/rules_block/table.py | 33 +++++----- markdown_it/rules_core/block.py | 4 +- markdown_it/rules_core/smartquotes.py | 25 ++++---- markdown_it/rules_inline/autolink.py | 8 +-- markdown_it/rules_inline/backticks.py | 8 +-- markdown_it/rules_inline/emphasis.py | 11 ++-- markdown_it/rules_inline/entity.py | 6 +- markdown_it/rules_inline/escape.py | 52 +++++++++++---- markdown_it/rules_inline/html_inline.py | 11 +--- markdown_it/rules_inline/image.py | 30 +++++---- markdown_it/rules_inline/link.py | 22 +++---- markdown_it/rules_inline/newline.py | 11 ++-- markdown_it/rules_inline/state_inline.py | 16 ++--- markdown_it/rules_inline/strikethrough.py | 21 +++---- markdown_it/rules_inline/text.py | 63 +++++++++---------- 30 files changed, 319 insertions(+), 279 deletions(-) diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py index ed862e74..4effc00f 100644 --- a/markdown_it/common/utils.py +++ b/markdown_it/common/utils.py @@ -4,12 +4,12 @@ import html import re -from typing import Any, Match, TypeVar +from typing import Match, TypeVar from .entities import entities -def charCodeAt(src: str, pos: int) -> Any: +def charCodeAt(src: str, pos: int) -> int | None: """ Returns the Unicode value of the character at the specified location. @@ -24,6 +24,21 @@ def charCodeAt(src: str, pos: int) -> Any: return None +def charStrAt(src: str, pos: int) -> str | None: + """ + Returns the Unicode value of the character at the specified location. + + @param - index The zero-based index of the desired character. + If there is no character at the specified index, NaN is returned. + + This was added for compatibility with python + """ + try: + return src[pos] + except IndexError: + return None + + _ItemTV = TypeVar("_ItemTV") @@ -96,7 +111,7 @@ def replaceEntityPattern(match: str, name: str) -> str: if name in entities: return entities[name] - if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name): + if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name): code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10) if isValidEntityCode(code): return fromCodePoint(code) @@ -178,8 +193,14 @@ def escapeRE(string: str) -> str: # ////////////////////////////////////////////////////////////////////////////// -def isSpace(code: object) -> bool: - return code in {0x09, 0x20} +def isSpace(code: int | None) -> bool: + """Check if character code is a whitespace.""" + return code in (0x09, 0x20) + + +def isStrSpace(ch: str | None) -> bool: + """Check if character is a whitespace.""" + return ch in ("\t", " ") MD_WHITESPACE = { @@ -188,7 +209,7 @@ def isSpace(code: object) -> bool: 0x0B, # \v 0x0C, # \f 0x0D, # \r - 0x20, + 0x20, # space 0xA0, 0x1680, 0x202F, @@ -213,6 +234,7 @@ def isWhiteSpace(code: int) -> bool: # Currently without astral characters support. def isPunctChar(ch: str) -> bool: + """Check if character is a punctuation character.""" return UNICODE_PUNCT_RE.search(ch) is not None diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py index d527ce0c..f42b2244 100644 --- a/markdown_it/helpers/parse_link_destination.py +++ b/markdown_it/helpers/parse_link_destination.py @@ -49,7 +49,7 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result: while pos < maximum: code = charCodeAt(string, pos) - if code == 0x20: + if code is None or code == 0x20: break # ascii control characters diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py index 6ce8daf8..01c653c5 100644 --- a/markdown_it/helpers/parse_link_label.py +++ b/markdown_it/helpers/parse_link_label.py @@ -17,8 +17,8 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) level = 1 while state.pos < state.posMax: - marker = state.srcCharCode[state.pos] - if marker == 0x5D: # /* ] */) + marker = state.src[state.pos] + if marker == "]": level -= 1 if level == 0: found = True @@ -26,7 +26,7 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False) prevPos = state.pos state.md.inline.skipToken(state) - if marker == 0x5B: # /* [ */) + if marker == "[": if prevPos == state.pos - 1: # increase level if we find text `[`, # which is not a part of any token diff --git a/markdown_it/main.py b/markdown_it/main.py index 243e1509..bb294a99 100644 --- a/markdown_it/main.py +++ b/markdown_it/main.py @@ -46,7 +46,7 @@ def __init__( """ # add modules self.utils = utils - self.helpers: Any = helpers + self.helpers = helpers # initialise classes self.inline = ParserInline() diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py index cd240a8a..86f08cf5 100644 --- a/markdown_it/parser_block.py +++ b/markdown_it/parser_block.py @@ -97,16 +97,11 @@ def tokenize( state.line = line def parse( - self, - src: str, - md: MarkdownIt, - env: EnvType, - outTokens: list[Token], - ords: tuple[int, ...] | None = None, + self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token] ) -> list[Token] | None: """Process input string and push block tokens into `outTokens`.""" if not src: return None - state = StateBlock(src, md, env, outTokens, ords) + state = StateBlock(src, md, env, outTokens) self.tokenize(state, state.line, state.lineMax) return state.tokens diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py index 8ae32beb..9849561d 100644 --- a/markdown_it/ruler.py +++ b/markdown_it/ruler.py @@ -20,6 +20,7 @@ class Ruler from collections.abc import Callable, Iterable from dataclasses import dataclass, field from typing import TYPE_CHECKING, TypedDict +import warnings from markdown_it._compat import DATACLASS_KWARGS @@ -30,8 +31,6 @@ class Ruler class StateBase: - srcCharCode: tuple[int, ...] # noqa: N815 - def __init__(self, src: str, md: MarkdownIt, env: EnvType): self.src = src self.env = env @@ -44,7 +43,18 @@ def src(self) -> str: @src.setter def src(self, value: str) -> None: self._src = value - self.srcCharCode = tuple(ord(c) for c in self.src) + self._srcCharCode: tuple[int, ...] | None = None + + @property + def srcCharCode(self) -> tuple[int, ...]: + warnings.warn( + "StateBase.srcCharCode is deprecated. Use StateBase.src instead.", + DeprecationWarning, + stacklevel=2, + ) + if self._srcCharCode is None: + self._srcCharCode = tuple(ord(c) for c in self._src) + return self._srcCharCode # The first positional arg is always a subtype of `StateBase`. Other diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py index da57dfa5..0c9081b9 100644 --- a/markdown_it/rules_block/blockquote.py +++ b/markdown_it/rules_block/blockquote.py @@ -3,7 +3,7 @@ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -23,7 +23,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # check the block quote marker try: - if state.srcCharCode[pos] != 0x3E: # /* > */ + if state.src[pos] != ">": return False except IndexError: return False @@ -38,12 +38,12 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = state.sCount[startLine] + 1 try: - second_char_code: int | None = state.srcCharCode[pos] + second_char: str | None = state.src[pos] except IndexError: - second_char_code = None + second_char = None # skip one optional space after '>' - if second_char_code == 0x20: # /* space */ + if second_char == " ": # ' > test ' # ^ -- position start of line here: pos += 1 @@ -51,7 +51,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> offset += 1 adjustTab = False spaceAfterMarker = True - elif second_char_code == 0x09: # /* tab */ + elif second_char == "\t": spaceAfterMarker = True if (state.bsCount[startLine] + offset) % 4 == 3: @@ -74,10 +74,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.bMarks[startLine] = pos while pos < max: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if isSpace(ch): - if ch == 0x09: # / tab / + if isStrSpace(ch): + if ch == "\t": offset += ( 4 - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4 @@ -147,7 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Case 1: line is not inside the blockquote, and this line is empty. break - evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */ + evaluatesTrue = state.src[pos] == ">" and not isOutdented pos += 1 if evaluatesTrue: # This line is inside the blockquote. @@ -156,12 +156,12 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> initial = offset = state.sCount[nextLine] + 1 try: - next_char: int | None = state.srcCharCode[pos] + next_char: str | None = state.src[pos] except IndexError: next_char = None # skip one optional space after '>' - if next_char == 0x20: # /* space */ + if next_char == " ": # ' > test ' # ^ -- position start of line here: pos += 1 @@ -169,7 +169,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> offset += 1 adjustTab = False spaceAfterMarker = True - elif next_char == 0x09: # /* tab */ + elif next_char == "\t": spaceAfterMarker = True if (state.bsCount[nextLine] + offset) % 4 == 3: @@ -192,10 +192,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.bMarks[nextLine] = pos while pos < max: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if isSpace(ch): - if ch == 0x09: + if isStrSpace(ch): + if ch == "\t": offset += ( 4 - ( diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py index 2051b96b..263f1b8d 100644 --- a/markdown_it/rules_block/fence.py +++ b/markdown_it/rules_block/fence.py @@ -19,15 +19,14 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if pos + 3 > maximum: return False - marker = state.srcCharCode[pos] + marker = state.src[pos] - # /* ~ */ /* ` */ - if marker != 0x7E and marker != 0x60: + if marker not in ("~", "`"): return False # scan marker length mem = pos - pos = state.skipChars(pos, marker) + pos = state.skipCharsStr(pos, marker) length = pos - mem @@ -37,8 +36,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool markup = state.src[mem:pos] params = state.src[pos:maximum] - # /* ` */ - if marker == 0x60 and chr(marker) in params: + if marker == "`" and marker in params: return False # Since start is found, we can report success here in validation mode @@ -65,7 +63,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool break try: - if state.srcCharCode[pos] != marker: + if state.src[pos] != marker: continue except IndexError: break @@ -73,7 +71,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if state.is_code_block(nextLine): continue - pos = state.skipChars(pos, marker) + pos = state.skipCharsStr(pos, marker) # closing code fence must be at least as long as the opening one if pos - mem < length: diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py index 90847f9d..850ffb50 100644 --- a/markdown_it/rules_block/heading.py +++ b/markdown_it/rules_block/heading.py @@ -3,7 +3,7 @@ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -18,29 +18,27 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo if state.is_code_block(startLine): return False - ch: int | None = state.srcCharCode[pos] + ch: str | None = state.src[pos] - # /* # */ - if ch != 0x23 or pos >= maximum: + if ch != "#" or pos >= maximum: return False # count heading level level = 1 pos += 1 try: - ch = state.srcCharCode[pos] + ch = state.src[pos] except IndexError: ch = None - # /* # */ - while ch == 0x23 and pos < maximum and level <= 6: + while ch == "#" and pos < maximum and level <= 6: level += 1 pos += 1 try: - ch = state.srcCharCode[pos] + ch = state.src[pos] except IndexError: ch = None - if level > 6 or (pos < maximum and not isSpace(ch)): + if level > 6 or (pos < maximum and not isStrSpace(ch)): return False if silent: @@ -49,8 +47,8 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo # Let's cut tails like ' ### ' from the end of string maximum = state.skipSpacesBack(maximum, pos) - tmp = state.skipCharsBack(maximum, 0x23, pos) # # - if tmp > pos and isSpace(state.srcCharCode[tmp - 1]): + tmp = state.skipCharsStrBack(maximum, "#", pos) + if tmp > pos and isStrSpace(state.src[tmp - 1]): maximum = tmp state.line = startLine + 1 diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py index 6e6b907b..16df05f2 100644 --- a/markdown_it/rules_block/hr.py +++ b/markdown_it/rules_block/hr.py @@ -4,7 +4,7 @@ """ import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -20,22 +20,22 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: return False try: - marker = state.srcCharCode[pos] + marker = state.src[pos] except IndexError: return False pos += 1 - # Check hr marker: /* * */ /* - */ /* _ */ - if marker != 0x2A and marker != 0x2D and marker != 0x5F: + # Check hr marker + if marker not in ("*", "-", "_"): return False # markers can be mixed with spaces, but there should be at least 3 of them cnt = 1 while pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 - if ch != marker and not isSpace(ch): + if ch != marker and not isStrSpace(ch): return False if ch == marker: cnt += 1 @@ -50,6 +50,6 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool: token = state.push("hr", "hr", 0) token.map = [startLine, state.line] - token.markup = chr(marker) * (cnt + 1) + token.markup = marker * (cnt + 1) return True diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py index dc3cadb1..3d43f6ee 100644 --- a/markdown_it/rules_block/html_block.py +++ b/markdown_it/rules_block/html_block.py @@ -44,7 +44,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if not state.md.options.get("html", None): return False - if state.srcCharCode[pos] != 0x3C: # /* < */ + if state.src[pos] != "<": return False lineText = state.src[pos:maximum] diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py index beb56698..fbd50699 100644 --- a/markdown_it/rules_block/lheading.py +++ b/markdown_it/rules_block/lheading.py @@ -35,16 +35,15 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b maximum = state.eMarks[nextLine] if pos < maximum: - marker = state.srcCharCode[pos] + marker = state.src[pos] - # /* - */ /* = */ - if marker == 0x2D or marker == 0x3D: - pos = state.skipChars(pos, marker) + if marker in ("-", "="): + pos = state.skipCharsStr(pos, marker) pos = state.skipSpaces(pos) # /* = */ if pos >= maximum: - level = 1 if marker == 0x3D else 2 + level = 1 if marker == "=" else 2 break # quirk for blockquotes, this line should already be checked by that rule @@ -72,7 +71,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b state.line = nextLine + 1 token = state.push("heading_open", "h" + str(level), 1) - token.markup = chr(marker) + token.markup = marker token.map = [startLine, state.line] token = state.push("inline", "", 0) @@ -81,7 +80,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> b token.children = [] token = state.push("heading_close", "h" + str(level), -1) - token.markup = chr(marker) + token.markup = marker state.parentType = oldParentType diff --git a/markdown_it/rules_block/list.py b/markdown_it/rules_block/list.py index f1cb089e..a5c596bb 100644 --- a/markdown_it/rules_block/list.py +++ b/markdown_it/rules_block/list.py @@ -1,7 +1,7 @@ # Lists import logging -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_block import StateBlock LOGGER = logging.getLogger(__name__) @@ -14,18 +14,18 @@ def skipBulletListMarker(state: StateBlock, startLine: int) -> int: maximum = state.eMarks[startLine] try: - marker = state.srcCharCode[pos] + marker = state.src[pos] except IndexError: return -1 pos += 1 - # Check bullet /* * */ /* - */ /* + */ - if marker != 0x2A and marker != 0x2D and marker != 0x2B: + + if marker not in ("*", "-", "+"): return -1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if not isSpace(ch): + if not isStrSpace(ch): # " -test " - is not a list item return -1 @@ -43,11 +43,12 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: if pos + 1 >= maximum: return -1 - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 + ch_ord = ord(ch) # /* 0 */ /* 9 */ - if ch < 0x30 or ch > 0x39: + if ch_ord < 0x30 or ch_ord > 0x39: return -1 while True: @@ -55,11 +56,12 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: if pos >= maximum: return -1 - ch = state.srcCharCode[pos] + ch = state.src[pos] pos += 1 # /* 0 */ /* 9 */ - if ch >= 0x30 and ch <= 0x39: + ch_ord = ord(ch) + if ch_ord >= 0x30 and ch_ord <= 0x39: # List marker should have no more than 9 digits # (prevents integer overflow in browsers) if pos - start >= 10: @@ -67,16 +69,16 @@ def skipOrderedListMarker(state: StateBlock, startLine: int) -> int: continue - # found valid marker: /* ) */ /* . */ - if ch == 0x29 or ch == 0x2E: + # found valid marker + if ch in (")", "."): break return -1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if not isSpace(ch): + if not isStrSpace(ch): # " 1.test " - is not a list item return -1 @@ -159,7 +161,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> return False # We should terminate list on style change. Remember first one to compare. - markerCharCode = state.srcCharCode[posAfterMarker - 1] + markerChar = state.src[posAfterMarker - 1] # For validation mode we can terminate immediately if silent: @@ -177,7 +179,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> token = state.push("bullet_list_open", "ul", 1) token.map = listLines = [startLine, 0] - token.markup = chr(markerCharCode) + token.markup = markerChar # # Iterate list items @@ -201,11 +203,11 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> ) while pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch == 0x09: # \t + if ch == "\t": offset += 4 - (offset + state.bsCount[nextLine]) % 4 - elif ch == 0x20: # \s + elif ch == " ": offset += 1 else: break @@ -228,7 +230,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> # Run subparser & write tokens token = state.push("list_item_open", "li", 1) - token.markup = chr(markerCharCode) + token.markup = markerChar token.map = itemLines = [startLine, 0] if isOrdered: token.info = state.src[start : posAfterMarker - 1] @@ -280,7 +282,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.tight = oldTight token = state.push("list_item_close", "li", -1) - token.markup = chr(markerCharCode) + token.markup = markerChar nextLine = startLine = state.line itemLines[1] = nextLine @@ -320,7 +322,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if posAfterMarker < 0: break - if markerCharCode != state.srcCharCode[posAfterMarker - 1]: + if markerChar != state.src[posAfterMarker - 1]: break # Finalize list @@ -329,7 +331,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> else: token = state.push("bullet_list_close", "ul", -1) - token.markup = chr(markerCharCode) + token.markup = markerChar listLines[1] = nextLine state.line = nextLine diff --git a/markdown_it/rules_block/reference.py b/markdown_it/rules_block/reference.py index 92f0918c..b77944b2 100644 --- a/markdown_it/rules_block/reference.py +++ b/markdown_it/rules_block/reference.py @@ -19,17 +19,17 @@ def reference(state: StateBlock, startLine: int, _endLine: int, silent: bool) -> if state.is_code_block(startLine): return False - if state.srcCharCode[pos] != 0x5B: # /* [ */ + if state.src[pos] != "[": return False # Simple check to quickly interrupt scan on [link](url) at the start of line. # Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54 while pos < maximum: # /* ] */ /* \ */ /* : */ - if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C: + if state.src[pos] == "]" and state.src[pos - 1] != "\\": if pos + 1 == maximum: return False - if state.srcCharCode[pos + 1] != 0x3A: + if state.src[pos + 1] != ":": return False break pos += 1 diff --git a/markdown_it/rules_block/state_block.py b/markdown_it/rules_block/state_block.py index ee77f097..96a2f88f 100644 --- a/markdown_it/rules_block/state_block.py +++ b/markdown_it/rules_block/state_block.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Literal -from ..common.utils import isSpace +from ..common.utils import isStrSpace from ..ruler import StateBase from ..token import Token from ..utils import EnvType @@ -13,18 +13,9 @@ class StateBlock(StateBase): def __init__( - self, - src: str, - md: MarkdownIt, - env: EnvType, - tokens: list[Token], - srcCharCode: tuple[int, ...] | None = None, - ): - if srcCharCode is not None: - self._src = src - self.srcCharCode = srcCharCode - else: - self.src = src + self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token] + ) -> None: + self.src = src # link to parser instance self.md = md @@ -80,12 +71,12 @@ def __init__( start = pos = indent = offset = 0 length = len(self.src) - for pos, character in enumerate(self.srcCharCode): + for pos, character in enumerate(self.src): if not indent_found: - if isSpace(character): + if isStrSpace(character): indent += 1 - if character == 0x09: + if character == "\t": offset += 4 - offset % 4 else: offset += 1 @@ -93,8 +84,8 @@ def __init__( else: indent_found = True - if character == 0x0A or pos == length - 1: - if character != 0x0A: + if character == "\n" or pos == length - 1: + if character != "\n": pos += 1 self.bMarks.append(start) self.eMarks.append(pos) @@ -157,7 +148,7 @@ def skipEmptyLines(self, from_pos: int) -> int: def skipSpaces(self, pos: int) -> int: """Skip spaces from given position.""" while pos < len(self.src): - if not isSpace(self.srcCharCode[pos]): + if not isStrSpace(self.src[pos]): break pos += 1 return pos @@ -168,20 +159,28 @@ def skipSpacesBack(self, pos: int, minimum: int) -> int: return pos while pos > minimum: pos -= 1 - if not isSpace(self.srcCharCode[pos]): + if not isStrSpace(self.src[pos]): return pos + 1 return pos def skipChars(self, pos: int, code: int) -> int: - """Skip char codes from given position.""" + """Skip character code from given position.""" while pos < len(self.src): if self.srcCharCode[pos] != code: break pos += 1 return pos + def skipCharsStr(self, pos: int, ch: str) -> int: + """Skip character string from given position.""" + while pos < len(self.src): + if self.src[pos] != ch: + break + pos += 1 + return pos + def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: - """Skip char codes reverse from given position - 1.""" + """Skip character code reverse from given position - 1.""" if pos <= minimum: return pos while pos > minimum: @@ -190,6 +189,16 @@ def skipCharsBack(self, pos: int, code: int, minimum: int) -> int: return pos + 1 return pos + def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int: + """Skip character string reverse from given position - 1.""" + if pos <= minimum: + return pos + while pos > minimum: + pos -= 1 + if ch != self.src[pos]: + return pos + 1 + return pos + def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: """Cut lines range from source.""" line = begin @@ -209,9 +218,9 @@ def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str: ) while (first < last) and (lineIndent < indent): - ch = self.srcCharCode[first] - if isSpace(ch): - if ch == 0x09: + ch = self.src[first] + if isStrSpace(ch): + if ch == "\t": lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4 else: lineIndent += 1 diff --git a/markdown_it/rules_block/table.py b/markdown_it/rules_block/table.py index 8f7be7f1..4b666c1d 100644 --- a/markdown_it/rules_block/table.py +++ b/markdown_it/rules_block/table.py @@ -3,7 +3,7 @@ import re -from ..common.utils import charCodeAt, isSpace +from ..common.utils import charStrAt, isStrSpace from .state_block import StateBlock headerLineRe = re.compile(r"^:?-+:?$") @@ -25,10 +25,10 @@ def escapedSplit(string: str) -> list[str]: isEscaped = False lastPos = 0 current = "" - ch = charCodeAt(string, pos) + ch = charStrAt(string, pos) while pos < max: - if ch == 0x7C: # /* | */ + if ch == "|": if not isEscaped: # pipe separating cells, '|' result.append(current + string[lastPos:pos]) @@ -39,10 +39,10 @@ def escapedSplit(string: str) -> list[str]: current += string[lastPos : pos - 1] lastPos = pos - isEscaped = ch == 0x5C # /* \ */ + isEscaped = ch == "\\" pos += 1 - ch = charCodeAt(string, pos) + ch = charStrAt(string, pos) result.append(current + string[lastPos:]) @@ -71,29 +71,27 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool pos = state.bMarks[nextLine] + state.tShift[nextLine] if pos >= state.eMarks[nextLine]: return False - first_ch = state.srcCharCode[pos] + first_ch = state.src[pos] pos += 1 - if first_ch not in {0x7C, 0x2D, 0x3A}: # not in {"|", "-", ":"} + if first_ch not in ("|", "-", ":"): return False if pos >= state.eMarks[nextLine]: return False - second_ch = state.srcCharCode[pos] + second_ch = state.src[pos] pos += 1 - # not in {"|", "-", ":"} and not space - if second_ch not in {0x7C, 0x2D, 0x3A} and not isSpace(second_ch): + if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch): return False # if first character is '-', then second character must not be a space # (due to parsing ambiguity with list) - if first_ch == 0x2D and isSpace(second_ch): + if first_ch == "-" and isStrSpace(second_ch): return False while pos < state.eMarks[nextLine]: - ch = state.srcCharCode[pos] + ch = state.src[pos] - # /* | */ /* - */ /* : */ - if ch not in {0x7C, 0x2D, 0x3A} and not isSpace(ch): + if ch not in ("|", "-", ":") and not isStrSpace(ch): return False pos += 1 @@ -114,10 +112,9 @@ def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool if not headerLineRe.search(t): return False - if charCodeAt(t, len(t) - 1) == 0x3A: # /* : */ - # /* : */ - aligns.append("center" if charCodeAt(t, 0) == 0x3A else "right") - elif charCodeAt(t, 0) == 0x3A: # /* : */ + if charStrAt(t, len(t) - 1) == ":": + aligns.append("center" if charStrAt(t, 0) == ":" else "right") + elif charStrAt(t, 0) == ":": aligns.append("left") else: aligns.append("") diff --git a/markdown_it/rules_core/block.py b/markdown_it/rules_core/block.py index dc756418..a6c3bb8d 100644 --- a/markdown_it/rules_core/block.py +++ b/markdown_it/rules_core/block.py @@ -10,6 +10,4 @@ def block(state: StateCore) -> None: token.children = [] state.tokens.append(token) else: - state.md.block.parse( - state.src, state.md, state.env, state.tokens, state.srcCharCode - ) + state.md.block.parse(state.src, state.md, state.env, state.tokens) diff --git a/markdown_it/rules_core/smartquotes.py b/markdown_it/rules_core/smartquotes.py index b4284493..c98fbd71 100644 --- a/markdown_it/rules_core/smartquotes.py +++ b/markdown_it/rules_core/smartquotes.py @@ -24,9 +24,7 @@ def replaceAt(string: str, index: int, ch: str) -> str: def process_inlines(tokens: list[Token], state: StateCore) -> None: stack: list[dict[str, Any]] = [] - for i in range(len(tokens)): - token = tokens[i] - + for i, token in enumerate(tokens): thisLevel = token.level j = 0 @@ -60,13 +58,12 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: # Find previous character, # default to space if it's the beginning of the line - lastChar = 0x20 + lastChar: None | int = 0x20 if t.start(0) + lastIndex - 1 >= 0: lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) else: for j in range(i)[::-1]: - # lastChar defaults to 0x20 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": break # should skip all tokens except 'text', 'html_inline' or 'code_inline' @@ -78,7 +75,7 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: # Find next character, # default to space if it's the end of the line - nextChar = 0x20 + nextChar: None | int = 0x20 if pos < maximum: nextChar = charCodeAt(text, pos) @@ -94,11 +91,15 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: nextChar = charCodeAt(tokens[j].content, 0) break - isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) - isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + isLastPunctChar = lastChar is not None and ( + isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) + ) + isNextPunctChar = nextChar is not None and ( + isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + ) - isLastWhiteSpace = isWhiteSpace(lastChar) - isNextWhiteSpace = isWhiteSpace(nextChar) + isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar) + isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar) if isNextWhiteSpace: # noqa: SIM114 canOpen = False @@ -111,7 +112,9 @@ def process_inlines(tokens: list[Token], state: StateCore) -> None: canClose = False if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102 - if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 + if ( + lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39 + ): # 0x30: 0, 0x39: 9 # special case: 1"" - count first quote as an inch canClose = canOpen = False diff --git a/markdown_it/rules_inline/autolink.py b/markdown_it/rules_inline/autolink.py index 11ac5905..295d963f 100644 --- a/markdown_it/rules_inline/autolink.py +++ b/markdown_it/rules_inline/autolink.py @@ -12,7 +12,7 @@ def autolink(state: StateInline, silent: bool) -> bool: pos = state.pos - if state.srcCharCode[pos] != 0x3C: # /* < */ + if state.src[pos] != "<": return False start = state.pos @@ -23,11 +23,11 @@ def autolink(state: StateInline, silent: bool) -> bool: if pos >= maximum: return False - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch == 0x3C: # /* < */ + if ch == "<": return False - if ch == 0x3E: # /* > */ + if ch == ">": break url = state.src[start + 1 : pos] diff --git a/markdown_it/rules_inline/backticks.py b/markdown_it/rules_inline/backticks.py index 5f1e0552..fc60d6b1 100644 --- a/markdown_it/rules_inline/backticks.py +++ b/markdown_it/rules_inline/backticks.py @@ -8,10 +8,8 @@ def backtick(state: StateInline, silent: bool) -> bool: pos = state.pos - ch = state.srcCharCode[pos] - # /* ` */ - if ch != 0x60: + if state.src[pos] != "`": return False start = pos @@ -19,7 +17,7 @@ def backtick(state: StateInline, silent: bool) -> bool: maximum = state.posMax # scan marker length - while pos < maximum and (state.srcCharCode[pos] == 0x60): # /* ` */ + while pos < maximum and (state.src[pos] == "`"): pos += 1 marker = state.src[start:pos] @@ -42,7 +40,7 @@ def backtick(state: StateInline, silent: bool) -> bool: matchEnd = matchStart + 1 # scan marker length - while matchEnd < maximum and (state.srcCharCode[matchEnd] == 0x60): # /* ` */ + while matchEnd < maximum and (state.src[matchEnd] == "`"): matchEnd += 1 closerLength = matchEnd - matchStart diff --git a/markdown_it/rules_inline/emphasis.py b/markdown_it/rules_inline/emphasis.py index d21b494c..56b94b6b 100644 --- a/markdown_it/rules_inline/emphasis.py +++ b/markdown_it/rules_inline/emphasis.py @@ -8,23 +8,22 @@ def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.srcCharCode[start] + marker = state.src[start] if silent: return False - # /* _ */ /* * */ - if marker != 0x5F and marker != 0x2A: + if marker not in ("_", "*"): return False - scanned = state.scanDelims(state.pos, marker == 0x2A) + scanned = state.scanDelims(state.pos, marker == "*") for i in range(scanned.length): token = state.push("text", "", 0) - token.content = chr(marker) + token.content = marker state.delimiters.append( Delimiter( - marker=marker, + marker=ord(marker), length=scanned.length, jump=i, token=len(state.tokens) - 1, diff --git a/markdown_it/rules_inline/entity.py b/markdown_it/rules_inline/entity.py index 1e5d0ea0..d3b5f6bb 100644 --- a/markdown_it/rules_inline/entity.py +++ b/markdown_it/rules_inline/entity.py @@ -13,13 +13,11 @@ def entity(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax - if state.srcCharCode[pos] != 0x26: # /* & */ + if state.src[pos] != "&": return False if (pos + 1) < maximum: - ch = state.srcCharCode[pos + 1] - - if ch == 0x23: # /* # */ + if state.src[pos + 1] == "#": match = DIGITAL_RE.search(state.src[pos:]) if match: if not silent: diff --git a/markdown_it/rules_inline/escape.py b/markdown_it/rules_inline/escape.py index 1767e01d..8694cec1 100644 --- a/markdown_it/rules_inline/escape.py +++ b/markdown_it/rules_inline/escape.py @@ -1,42 +1,72 @@ """ Process escaped chars and hardbreaks """ -from ..common.utils import isSpace +from ..common.utils import isStrSpace from .state_inline import StateInline -ESCAPED = [0 for _ in range(256)] -for ch in "\\!\"#$%&'()*+,./:;<=>?@[]^_`{|}~-": - ESCAPED[ord(ch)] = 1 +_ESCAPED = { + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "|", + "}", + "~", +} def escape(state: StateInline, silent: bool) -> bool: pos = state.pos maximum = state.posMax - # /* \ */ - if state.srcCharCode[pos] != 0x5C: + if state.src[pos] != "\\": return False pos += 1 if pos < maximum: - ch = state.srcCharCode[pos] + ch = state.src[pos] - if ch < 256 and ESCAPED[ch] != 0: + if ch in _ESCAPED: if not silent: state.pending += state.src[pos] state.pos += 2 return True - if ch == 0x0A: + if ch == "\n": if not silent: state.push("hardbreak", "br", 0) pos += 1 # skip leading whitespaces from next line while pos < maximum: - ch = state.srcCharCode[pos] - if not isSpace(ch): + ch = state.src[pos] + if not isStrSpace(ch): break pos += 1 diff --git a/markdown_it/rules_inline/html_inline.py b/markdown_it/rules_inline/html_inline.py index 6a636684..3c8b5331 100644 --- a/markdown_it/rules_inline/html_inline.py +++ b/markdown_it/rules_inline/html_inline.py @@ -17,17 +17,12 @@ def html_inline(state: StateInline, silent: bool) -> bool: # Check start maximum = state.posMax - if state.srcCharCode[pos] != 0x3C or pos + 2 >= maximum: # /* < */ + if state.src[pos] != "<" or pos + 2 >= maximum: return False # Quick fail on second char - ch = state.srcCharCode[pos + 1] - if ( - ch != 0x21 - and ch != 0x3F # /* ! */ - and ch != 0x2F # /* ? */ - and not isLetter(ch) # /* / */ - ): + ch = state.src[pos + 1] + if ch not in ("!", "?", "/") and not isLetter(ord(ch)): # /* / */ return False match = HTML_TAG_RE.search(state.src[pos:]) diff --git a/markdown_it/rules_inline/image.py b/markdown_it/rules_inline/image.py index 0cb14ffd..b4a32a9f 100644 --- a/markdown_it/rules_inline/image.py +++ b/markdown_it/rules_inline/image.py @@ -1,7 +1,7 @@ # Process ![image]( "title") from __future__ import annotations -from ..common.utils import isSpace, normalizeReference +from ..common.utils import isStrSpace, normalizeReference from ..token import Token from .state_inline import StateInline @@ -12,11 +12,10 @@ def image(state: StateInline, silent: bool) -> bool: oldPos = state.pos max = state.posMax - # /* ! */ - if state.srcCharCode[state.pos] != 0x21: + if state.src[state.pos] != "!": return False - # /* [ */ - if state.pos + 1 < state.posMax and state.srcCharCode[state.pos + 1] != 0x5B: + + if state.pos + 1 < state.posMax and state.src[state.pos + 1] != "[": return False labelStart = state.pos + 2 @@ -27,8 +26,8 @@ def image(state: StateInline, silent: bool) -> bool: return False pos = labelEnd + 1 - # /* ( */ - if pos < max and state.srcCharCode[pos] == 0x28: + + if pos < max and state.src[pos] == "(": # # Inline link # @@ -37,8 +36,8 @@ def image(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces pos += 1 while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -60,8 +59,8 @@ def image(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces start = pos while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -75,15 +74,14 @@ def image(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^ skipping these spaces while pos < max: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 else: title = "" - # /* ) */ - if pos >= max or state.srcCharCode[pos] != 0x29: + if pos >= max or state.src[pos] != ")": state.pos = oldPos return False @@ -97,7 +95,7 @@ def image(state: StateInline, silent: bool) -> bool: return False # /* [ */ - if pos < max and state.srcCharCode[pos] == 0x5B: + if pos < max and state.src[pos] == "[": start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/link.py b/markdown_it/rules_inline/link.py index c4548ccd..18c0736c 100644 --- a/markdown_it/rules_inline/link.py +++ b/markdown_it/rules_inline/link.py @@ -1,6 +1,6 @@ # Process [link]( "stuff") -from ..common.utils import isSpace, normalizeReference +from ..common.utils import isStrSpace, normalizeReference from .state_inline import StateInline @@ -13,7 +13,7 @@ def link(state: StateInline, silent: bool) -> bool: start = state.pos parseReference = True - if state.srcCharCode[state.pos] != 0x5B: # /* [ */ + if state.src[state.pos] != "[": return False labelStart = state.pos + 1 @@ -25,7 +25,7 @@ def link(state: StateInline, silent: bool) -> bool: pos = labelEnd + 1 - if pos < maximum and state.srcCharCode[pos] == 0x28: # /* ( */ + if pos < maximum and state.src[pos] == "(": # # Inline link # @@ -37,8 +37,8 @@ def link(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces pos += 1 while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -60,8 +60,8 @@ def link(state: StateInline, silent: bool) -> bool: # ^^ skipping these spaces start = pos while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 @@ -75,12 +75,12 @@ def link(state: StateInline, silent: bool) -> bool: # [link]( "title" ) # ^^ skipping these spaces while pos < maximum: - code = state.srcCharCode[pos] - if not isSpace(code) and code != 0x0A: + ch = state.src[pos] + if not isStrSpace(ch) and ch != "\n": break pos += 1 - if pos >= maximum or state.srcCharCode[pos] != 0x29: # /* ) */ + if pos >= maximum or state.src[pos] != ")": # parsing a valid shortcut link failed, fallback to reference parseReference = True @@ -93,7 +93,7 @@ def link(state: StateInline, silent: bool) -> bool: if "references" not in state.env: return False - if pos < maximum and state.srcCharCode[pos] == 0x5B: # /* [ */ + if pos < maximum and state.src[pos] == "[": start = pos + 1 pos = state.md.helpers.parseLinkLabel(state, pos) if pos >= 0: diff --git a/markdown_it/rules_inline/newline.py b/markdown_it/rules_inline/newline.py index 4c440579..dede7251 100644 --- a/markdown_it/rules_inline/newline.py +++ b/markdown_it/rules_inline/newline.py @@ -1,7 +1,7 @@ # Proceess '\n' import re -from ..common.utils import charCodeAt, isSpace +from ..common.utils import charStrAt, isStrSpace from .state_inline import StateInline endSpace = re.compile(r" +$") @@ -10,8 +10,7 @@ def newline(state: StateInline, silent: bool) -> bool: pos = state.pos - # /* \n */ - if state.srcCharCode[pos] != 0x0A: + if state.src[pos] != "\n": return False pmax = len(state.pending) - 1 @@ -22,8 +21,8 @@ def newline(state: StateInline, silent: bool) -> bool: # Pending string is stored in concat mode, indexed lookups will cause # conversion to flat mode. if not silent: - if pmax >= 0 and charCodeAt(state.pending, pmax) == 0x20: - if pmax >= 1 and charCodeAt(state.pending, pmax - 1) == 0x20: + if pmax >= 0 and charStrAt(state.pending, pmax) == " ": + if pmax >= 1 and charStrAt(state.pending, pmax - 1) == " ": state.pending = endSpace.sub("", state.pending) state.push("hardbreak", "br", 0) else: @@ -36,7 +35,7 @@ def newline(state: StateInline, silent: bool) -> bool: pos += 1 # skip heading spaces for next line - while pos < maximum and isSpace(state.srcCharCode[pos]): + while pos < maximum and isStrSpace(state.src[pos]): pos += 1 state.pos = pos diff --git a/markdown_it/rules_inline/state_inline.py b/markdown_it/rules_inline/state_inline.py index 12e1d934..ef23f85d 100644 --- a/markdown_it/rules_inline/state_inline.py +++ b/markdown_it/rules_inline/state_inline.py @@ -132,24 +132,24 @@ def scanDelims(self, start: int, canSplitWord: bool) -> Scanned: """ pos = start maximum = self.posMax - marker = self.srcCharCode[start] + marker = self.src[start] # treat beginning of the line as a whitespace - lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20 + lastChar = self.src[start - 1] if start > 0 else " " - while pos < maximum and self.srcCharCode[pos] == marker: + while pos < maximum and self.src[pos] == marker: pos += 1 count = pos - start # treat end of the line as a whitespace - nextChar = self.srcCharCode[pos] if pos < maximum else 0x20 + nextChar = self.src[pos] if pos < maximum else " " - isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) - isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) + isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar) + isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar) - isLastWhiteSpace = isWhiteSpace(lastChar) - isNextWhiteSpace = isWhiteSpace(nextChar) + isLastWhiteSpace = isWhiteSpace(ord(lastChar)) + isNextWhiteSpace = isWhiteSpace(ord(nextChar)) left_flanking = not ( isNextWhiteSpace diff --git a/markdown_it/rules_inline/strikethrough.py b/markdown_it/rules_inline/strikethrough.py index 8b080816..f671412c 100644 --- a/markdown_it/rules_inline/strikethrough.py +++ b/markdown_it/rules_inline/strikethrough.py @@ -7,17 +7,16 @@ def tokenize(state: StateInline, silent: bool) -> bool: """Insert each marker as a separate text token, and add it to delimiter list""" start = state.pos - marker = state.srcCharCode[start] + ch = state.src[start] if silent: return False - if marker != 0x7E: # /* ~ */ + if ch != "~": return False scanned = state.scanDelims(state.pos, True) length = scanned.length - ch = chr(marker) if length < 2: return False @@ -33,15 +32,13 @@ def tokenize(state: StateInline, silent: bool) -> bool: token.content = ch + ch state.delimiters.append( Delimiter( - **{ - "marker": marker, - "length": 0, # disable "rule of 3" length checks meant for emphasis - "jump": i // 2, # for `~~` 1 marker = 2 characters - "token": len(state.tokens) - 1, - "end": -1, - "open": scanned.can_open, - "close": scanned.can_close, - } + marker=ord(ch), + length=0, # disable "rule of 3" length checks meant for emphasis + jump=i // 2, # for `~~` 1 marker = 2 characters + token=len(state.tokens) - 1, + end=-1, + open=scanned.can_open, + close=scanned.can_close, ) ) diff --git a/markdown_it/rules_inline/text.py b/markdown_it/rules_inline/text.py index bdf55310..f306b2e4 100644 --- a/markdown_it/rules_inline/text.py +++ b/markdown_it/rules_inline/text.py @@ -1,50 +1,45 @@ # Skip text characters for text token, place those to pending buffer # and increment current pos -from typing import Any - from .state_inline import StateInline # Rule to skip pure text # '{}$%@~+=:' reserved for extensions -# !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ - # !!!! Don't confuse with "Markdown ASCII Punctuation" chars # http://spec.commonmark.org/0.15/#ascii-punctuation-character -def isTerminatorChar(ch: int) -> bool: - return ch in { - 0x0A, # /* \n */: - 0x21, # /* ! */: - 0x23, # /* # */: - 0x24, # /* $ */: - 0x25, # /* % */: - 0x26, # /* & */: - 0x2A, # /* * */: - 0x2B, # /* + */: - 0x2D, # /* - */: - 0x3A, # /* : */: - 0x3C, # /* < */: - 0x3D, # /* = */: - 0x3E, # /* > */: - 0x40, # /* @ */: - 0x5B, # /* [ */: - 0x5C, # /* \ */: - 0x5D, # /* ] */: - 0x5E, # /* ^ */: - 0x5F, # /* _ */: - 0x60, # /* ` */: - 0x7B, # /* { */: - 0x7D, # /* } */: - 0x7E, # /* ~ */: - } - - -def text(state: StateInline, silent: bool, **args: Any) -> bool: +_TerminatorChars = { + "\n", + "!", + "#", + "$", + "%", + "&", + "*", + "+", + "-", + ":", + "<", + "=", + ">", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "}", + "~", +} + + +def text(state: StateInline, silent: bool) -> bool: pos = state.pos posMax = state.posMax - while (pos < posMax) and not isTerminatorChar(state.srcCharCode[pos]): + while (pos < posMax) and state.src[pos] not in _TerminatorChars: pos += 1 if pos == state.pos: From 19f3907529c8cbef241ce5df94397bcedcc28542 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 02:47:56 +0200 Subject: [PATCH 2/3] Update profiler.py --- scripts/profiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/profiler.py b/scripts/profiler.py index 414a7727..a593baa1 100644 --- a/scripts/profiler.py +++ b/scripts/profiler.py @@ -9,7 +9,7 @@ from markdown_it import MarkdownIt commonmark_spec = ( - (Path(__file__).parent / "tests" / "test_cmark_spec" / "spec.md") + (Path(__file__).parent.parent / "tests" / "test_cmark_spec" / "spec.md") .read_bytes() .decode() ) From 4eeba11b45bf75fc86c12fac40170817b1e9bc4b Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Thu, 1 Jun 2023 03:28:32 +0200 Subject: [PATCH 3/3] Update port.yaml --- markdown_it/port.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml index a6718fda..945a19f6 100644 --- a/markdown_it/port.yaml +++ b/markdown_it/port.yaml @@ -23,8 +23,7 @@ to manipulate `Token.attrs`, which have an identical signature to those upstream. - Use python version of `charCodeAt` - | - Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state - objects and sharing those whenever possible + Use `str` units instead of `int`s to represent Unicode codepoints. This provides a significant performance boost - | In markdown_it/rules_block/reference.py,