diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8f45e22e6..f80c0c6eb 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -71,7 +71,7 @@ jobs: fail-fast: false max-parallel: 4 matrix: - tox-env: [flake8, pep517check, checkspelling] + tox-env: [mypy, flake8, pep517check, checkspelling] env: TOXENV: ${{ matrix.tox-env }} diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d2020b9b6..723ef7884 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -175,7 +175,7 @@ def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or - (len(parent) and parent[-1] is not None and + (len(parent) > 0 and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES))) def run(self, parent: etree.Element, blocks: list[str]) -> None: diff --git a/markdown/core.py b/markdown/core.py index 6c7a21be9..47d3ba3b4 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -85,6 +85,14 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ + tab_length: int + ESCAPED_CHARS: list[str] + block_level_elements: list[str] + registeredExtensions: list[Extension] + stripTopLevelTags: bool + references: dict[str, tuple[str, str]] + htmlStash: util.HtmlStash + def __init__(self, **kwargs): """ Creates a new Markdown instance. @@ -106,23 +114,23 @@ def __init__(self, **kwargs): """ - self.tab_length: int = kwargs.get('tab_length', 4) + self.tab_length = kwargs.get('tab_length', 4) - self.ESCAPED_CHARS: list[str] = [ + self.ESCAPED_CHARS = [ '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!' ] """ List of characters which get the backslash escape treatment. """ - self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy() + self.block_level_elements = BLOCK_LEVEL_ELEMENTS.copy() - self.registeredExtensions: list[Extension] = [] + self.registeredExtensions = [] self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore. - self.stripTopLevelTags: bool = True + self.stripTopLevelTags = True self.build_parser() - self.references: dict[str, tuple[str, str]] = {} - self.htmlStash: util.HtmlStash = util.HtmlStash() + self.references = {} + self.htmlStash = util.HtmlStash() self.registerExtensions(extensions=kwargs.get('extensions', []), configs=kwargs.get('extension_configs', {})) self.set_output_format(kwargs.get('output_format', 'xhtml')) @@ -446,7 +454,7 @@ def convertFile( else: # Encode manually and write bytes to stdout. html = html.encode(encoding, "xmlcharrefreplace") - sys.stdout.buffer.write(html) + sys.stdout.buffer.write(html) # type: ignore return self diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 738368afe..179768e75 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -94,7 +94,7 @@ def __init__(self, pattern: str, title: str): super().__init__(pattern) self.title = title - def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # type: ignore[override] abbr = etree.Element('abbr') abbr.text = AtomicString(m.group('abbr')) abbr.set('title', self.title) diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 7ce3f9925..e1d4b0fe9 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -56,7 +56,7 @@ def _handle_word(s, t): return t, t -_scanner = re.Scanner([ +_scanner = re.Scanner([ # type: ignore[attr-defined] (r'[^ =]+=".*?"', _handle_double_quote), (r"[^ =]+='.*?'", _handle_single_quote), (r'[^ =]+=[^ =]+', _handle_key_value), diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index 92e7d8f2b..6c32a9a2b 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -161,7 +161,7 @@ def hilite(self, shebang: bool = True) -> str: lexer = get_lexer_by_name('text', **self.options) if not self.lang: # Use the guessed lexer's language instead - self.lang = lexer.aliases[0] + self.lang = lexer.aliases[0] # type: ignore[attr-defined] lang_str = f'{self.lang_prefix}{self.lang}' if isinstance(self.pygments_formatter, str): try: diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index da1a9be1e..40e01018e 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -159,7 +159,7 @@ def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str] """ Return tuple: `(id, [list, of, classes], {configs})` """ id = '' classes = [] - configs = {} + configs: dict[str, Any] = {} for k, v in attrs: if k == 'id': id = v diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 30c081138..c2d9a7034 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -38,6 +38,9 @@ class FootnoteExtension(Extension): """ Footnote Extension. """ + found_refs: dict[str, int] + used_refs: set[str] + def __init__(self, **kwargs): """ Setup configs. """ @@ -68,8 +71,8 @@ def __init__(self, **kwargs): # In multiple invocations, emit links that don't get tangled. self.unique_prefix = 0 - self.found_refs: dict[str, int] = {} - self.used_refs: set[str] = set() + self.found_refs = {} + self.used_refs = set() self.reset() @@ -290,7 +293,7 @@ def detectTabbed(self, blocks: list[str]) -> list[str]: break return fn_blocks - def detab(self, block: str) -> str: + def detab(self, block: str) -> str: # type: ignore[override] """ Remove one level of indent from a block. Preserve lazily indented blocks by only removing indent from indented lines. diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 64b84a5f4..cb14b4818 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -40,6 +40,10 @@ class HTMLExtractorExtra(HTMLExtractor): Markdown. """ + mdstack: list[str] = [] # When markdown=1, stack contains a list of tags + treebuilder: etree.TreeBuilder + mdstate: list[Literal['block', 'span', 'off', None]] + def __init__(self, md: Markdown, *args, **kwargs): # All block-level tags. self.block_level_tags = set(md.block_level_elements.copy()) @@ -58,9 +62,9 @@ def __init__(self, md: Markdown, *args, **kwargs): def reset(self): """Reset this instance. Loses all unprocessed data.""" - self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags + self.mdstack = [] # When markdown=1, stack contains a list of tags self.treebuilder = etree.TreeBuilder() - self.mdstate: list[Literal['block', 'span', 'off', None]] = [] + self.mdstate = [] super().reset() def close(self): diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index cb703399b..f26ed78d1 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -78,7 +78,7 @@ def run(self, lines: list[str]) -> list[str]: else: lines.insert(0, line) break # no meta data - done - self.md.Meta = meta + self.md.Meta = meta # type: ignore[attr-defined] return lines diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 0ce7772a7..1e001b33f 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -179,6 +179,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: class SmartyExtension(Extension): """ Add Smarty to Markdown. """ + + substitutions: dict[str, str] + def __init__(self, **kwargs): self.config = { 'smart_quotes': [True, 'Educate quotes'], @@ -189,7 +192,7 @@ def __init__(self, **kwargs): } """ Default configuration options. """ super().__init__(**kwargs) - self.substitutions: dict[str, str] = dict(substitutions) + self.substitutions = dict(substitutions) self.substitutions.update(self.getConfig('substitutions', default={})) def _addPatterns( @@ -199,9 +202,8 @@ def _addPatterns( serie: str, priority: int, ): - for ind, pattern in enumerate(patterns): - pattern += (md,) - pattern = SubstituteTextPattern(*pattern) + for ind, pattern_args in enumerate(patterns): + pattern = SubstituteTextPattern(*pattern_args, md) name = 'smarty-%s-%d' % (serie, ind) self.inlinePatterns.register(pattern, name, priority-ind) @@ -253,7 +255,7 @@ def educateQuotes(self, md: Markdown) -> None: ) self._addPatterns(md, patterns, 'quotes', 30) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown): configs = self.getConfigs() self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() if configs['smart_ellipses']: diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index a17d7241c..6d39ec834 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -80,7 +80,7 @@ def stashedHTML2text(text: str, md: Markdown, strip_entities: bool = True) -> st def _html_sub(m: re.Match[str]) -> str: """ Substitute raw html with plain text. """ try: - raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + raw: str = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) # Strip out tags and/or entities - leaving text @@ -335,8 +335,8 @@ def run(self, doc: etree.Element) -> None: toc = self.md.serializer(div) for pp in self.md.postprocessors: toc = pp.run(toc) - self.md.toc_tokens = toc_tokens - self.md.toc = toc + self.md.toc_tokens = toc_tokens # type: ignore[attr-defined] + self.md.toc = toc # type: ignore[attr-defined] class TocExtension(Extension): diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 3f3cbe2dd..b3bcbfd8c 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -65,6 +65,7 @@ def __init__(self, pattern: str, config: dict[str, Any]): self.config = config def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]: + a: etree.Element | str if m.group(1).strip(): base_url, end_url, html_class = self._getMeta() label = m.group(1).strip() diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 5155ef69d..04e9911c1 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -28,7 +28,7 @@ import re import importlib.util import sys -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING, Any, Sequence if TYPE_CHECKING: # pragma: no cover from markdown import Markdown @@ -37,7 +37,7 @@ # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. # Users can still do `from html import parser` and get the default behavior. spec = importlib.util.find_spec('html.parser') -htmlparser = importlib.util.module_from_spec(spec) +htmlparser: Any = importlib.util.module_from_spec(spec) spec.loader.exec_module(htmlparser) sys.modules['htmlparser'] = htmlparser @@ -80,6 +80,9 @@ class HTMLExtractor(htmlparser.HTMLParser): is stored in `cleandoc` as a list of strings. """ + stack: list[str] + cleandoc: list[str] + def __init__(self, md: Markdown, *args, **kwargs): if 'convert_charrefs' not in kwargs: kwargs['convert_charrefs'] = False @@ -97,9 +100,9 @@ def reset(self): """Reset this instance. Loses all unprocessed data.""" self.inraw = False self.intail = False - self.stack: list[str] = [] # When `inraw==True`, stack contains a list of tags - self._cache: list[str] = [] - self.cleandoc: list[str] = [] + self.stack = [] # When `inraw==True`, stack contains a list of tags + self._cache = [] + self.cleandoc = [] self.lineno_start_cache = [0] super().reset() diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 3d366ad96..dfd91a9b4 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -188,7 +188,7 @@ class EmStrongItem(NamedTuple): # ----------------------------------------------------------------------------- -class Pattern: # pragma: no cover +class _BasePattern: """ Base class that inline patterns subclass. @@ -238,19 +238,6 @@ def getCompiledRegExp(self) -> re.Pattern: """ Return a compiled regular expression. """ return self.compiled_re - def handleMatch(self, m: re.Match[str]) -> etree.Element | str: - """Return a ElementTree element from the given match. - - Subclasses should override this method. - - Arguments: - m: A match object containing a match of the pattern. - - Returns: An ElementTree Element object. - - """ - pass # pragma: no cover - def type(self) -> str: """ Return class name, to define pattern type """ return self.__class__.__name__ @@ -258,11 +245,11 @@ def type(self) -> str: def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: - stash = self.md.treeprocessors['inline'].stashed_nodes + stash = self.md.treeprocessors['inline'].stashed_nodes # type: ignore[attr-defined] except KeyError: # pragma: no cover return text - def get_stash(m): + def get_stash(m: re.Match[str]) -> str: id = m.group(1) if id in stash: value = stash.get(id) @@ -274,6 +261,27 @@ def get_stash(m): return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) +class LegacyPattern(_BasePattern): + def handleMatch(self, m: re.Match[str]) -> etree.Element | str: + """Return a ElementTree element from the given match. + + Subclasses should override this method. + + Arguments: + m: A match object containing a match of the pattern. + + Returns: An ElementTree Element object. + + """ + pass # pragma: no cover + + +if TYPE_CHECKING: # pragma: no cover + Pattern = _BasePattern +else: + Pattern = LegacyPattern + + class InlineProcessor(Pattern): """ Base class that inline processors subclass. @@ -505,13 +513,13 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: - stash = self.md.treeprocessors['inline'].stashed_nodes + stash = self.md.treeprocessors['inline'].stashed_nodes # type: ignore[attr-defined] except KeyError: # pragma: no cover return text def get_stash(m: re.Match[str]) -> str: id = m.group(1) - value = stash.get(id) + value: etree.Element | None = stash.get(id) if value is not None: try: return self.md.serializer(value) @@ -523,7 +531,7 @@ def get_stash(m: re.Match[str]) -> str: def backslash_unescape(self, text: str) -> str: """ Return text with backslash escapes undone (backslashes are restored). """ try: - RE = self.md.treeprocessors['unescape'].RE + RE = self.md.treeprocessors['unescape'].RE # type: ignore[attr-defined] except KeyError: # pragma: no cover return text diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7f5ede90c..a620ab098 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -75,7 +75,7 @@ def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) # type: ignore[arg-type] if self.isblocklevel(html): replacements["
{}
".format( self.md.htmlStash.get_placeholder(i))] = html diff --git a/markdown/serializers.py b/markdown/serializers.py index 573b26483..67116b88a 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -45,8 +45,8 @@ from __future__ import annotations -from xml.etree.ElementTree import ProcessingInstruction -from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY +from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName +from xml.etree.ElementTree import HTML_EMPTY # type: ignore[attr-defined] import re from typing import Callable, Literal, NoReturn diff --git a/markdown/test_tools.py b/markdown/test_tools.py index 895e44ec5..5f2cfbd8f 100644 --- a/markdown/test_tools.py +++ b/markdown/test_tools.py @@ -29,7 +29,7 @@ from . import markdown, Markdown, util try: - import tidylib + import tidylib # type: ignore except ImportError: tidylib = None diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 83630999e..52f2409e8 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -252,7 +252,7 @@ def linkText(text: str | None) -> None: def __applyPattern( self, - pattern: inlinepatterns.Pattern, + pattern: inlinepatterns.InlineProcessor | inlinepatterns.LegacyPattern, data: str, patternIndex: int, startIndex: int = 0 @@ -277,13 +277,15 @@ def __applyPattern( if exclude.lower() in self.ancestors: return data, False, 0 + start: int | None + end: int | None if new_style: match = None # Since `handleMatch` may reject our first match, # we iterate over the buffer looking for matches # until we can't find any more. for match in pattern.getCompiledRegExp().finditer(data, startIndex): - node, start, end = pattern.handleMatch(match, data) + node, start, end = pattern.handleMatch(match, data) # type: ignore if start is None or end is None: startIndex += match.end(0) match = None @@ -297,7 +299,7 @@ def __applyPattern( return data, False, 0 if not new_style: # pragma: no cover - node = pattern.handleMatch(match) + node = pattern.handleMatch(match) # type: ignore start = match.start(0) end = match.end(0) @@ -398,9 +400,9 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. child.tail = dumby.tail pos = list(currElement).index(child) + 1 tailResult.reverse() - for newChild in tailResult: - self.parent_map[newChild[0]] = currElement - currElement.insert(pos, newChild[0]) + for subChild in tailResult: + self.parent_map[subChild[0]] = currElement + currElement.insert(pos, subChild[0]) if len(child): self.parent_map[child] = currElement stack.append((child, self.ancestors[:])) diff --git a/markdown/util.py b/markdown/util.py index b4642023e..b7ad9b19a 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -218,12 +218,17 @@ class HtmlStash: in the beginning and replace with place-holders. """ + html_counter: int + rawHtmlBlocks: list[str | etree.Element] + tag_counter: int + tag_data: list[TagData] + def __init__(self): """ Create an `HtmlStash`. """ self.html_counter = 0 # for counting inline html segments - self.rawHtmlBlocks: list[str | etree.Element] = [] + self.rawHtmlBlocks = [] self.tag_counter = 0 - self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear + self.tag_data = [] # list of dictionaries in the order tags appear def store(self, html: str | etree.Element) -> str: """ @@ -310,8 +315,8 @@ class Registry(Generic[_T]): """ def __init__(self): - self._data: dict[str, _T] = {} - self._priority: list[_PriorityItem] = [] + self._data = {} + self._priority = [] self._is_sorted = False def __contains__(self, item: str | _T) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 8c9e9bcfe..feb7a2fa7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,3 +97,8 @@ packages = ['markdown', 'markdown.extensions'] [tool.setuptools.dynamic] version = {attr = 'markdown.__meta__.__version__'} + +[tool.mypy] +strict_optional = false +warn_no_return = false +disable_error_code = 'assignment, var-annotated' diff --git a/tox.ini b/tox.ini index d071054ea..7e528d478 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, flake8, checkspelling, pep517check, checklinks +envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, mypy, flake8, checkspelling, pep517check, checklinks isolated_build = True [testenv] @@ -19,6 +19,15 @@ deps = pytidylib pygments=={env:PYGMENTS_VERSION} +[testenv:mypy] +deps = + mypy + types-PyYAML + types-Pygments +allowlist_externals = mypy +commands = mypy {toxinidir}/markdown +skip_install = true + [testenv:flake8] deps = flake8 allowlist_externals = flake8