sphinx-contrib · hugovk · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 12, 2023
diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py
@@ -63,6 +63,10 @@ def check_missing_backtick_after_role(file, lines, options=None):
             yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}"
 
 
+_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL)
+_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX)
+
+
 @checker(".rst", ".po")
 def check_missing_space_after_literal(file, lines, options=None):
     r"""Search for inline literals immediately followed by a character.
@@ -74,8 +78,8 @@ def check_missing_space_after_literal(file, lines, options=None):
         if paragraph.count("|") > 4:
             return  # we don't handle tables yet.
         paragraph = clean_paragraph(paragraph)
-        for role in re.finditer("``.+?``(?!`).", paragraph, flags=re.DOTALL):
-            if not re.match(rst.END_STRING_SUFFIX, role.group(0)[-1]):
+        for role in _RST_ROLE_RE.finditer(paragraph):
+            if not _END_STRING_SUFFIX_RE.match(role[0][-1]):
                 error_offset = paragraph[: role.start()].count("\n")
                 yield (
                     paragraph_lno + error_offset,
@@ -84,6 +88,9 @@ def check_missing_space_after_literal(file, lines, options=None):
                 )
 
 
+_LONE_DOUBLE_BACKTICK_RE = re.compile("(?<!`)``(?!`)")
+
+
 @checker(".rst", ".po")
 def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
     r"""Search for unbalanced inline literals delimiters.
@@ -95,14 +102,18 @@ def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
         if paragraph.count("|") > 4:
             return  # we don't handle tables yet.
         paragraph = clean_paragraph(paragraph)
-        for lone_double_backtick in re.finditer("(?<!`)``(?!`)", paragraph):
+        for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph):
             error_offset = paragraph[: lone_double_backtick.start()].count("\n")
             yield (
                 paragraph_lno + error_offset,
                 "found an unbalanced inline literal markup.",
             )
 
 
+_ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search
+_starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search
+
+
 @checker(".rst", ".po", enabled=False)
 def check_default_role(file, lines, options=None):
     """Search for default roles (but they are allowed in many projects).
@@ -121,12 +132,12 @@ def check_default_role(file, lines, options=None):
             if (stripped_line.startswith("|") and stripped_line.endswith("|") and
                 stripped_line.count("|") >= 4 and "|" in match.group(0)):
                 return  # we don't handle tables yet.
-            if re.search(rst.ROLE_TAG + "$", before_match):
-                # It's not a default role: it starts with a tag.
-                continue
-            if re.search("^" + rst.ROLE_TAG, after_match):
+            if _ends_with_role_tag(before_match):
                 # It's not a default role: it ends with a tag.
                 continue
+            if _starts_with_role_tag(after_match):
+                # It's not a default role: it starts with a tag.
+                continue
             if match.group(0).startswith("``") and match.group(0).endswith("``"):
                 # It's not a default role: it's an inline literal.
                 continue
@@ -274,7 +285,7 @@ def check_role_with_double_backticks(file, lines, options=None):
             if inline_literal is None:
                 break
             before = paragraph[: inline_literal.start()]
-            if re.search(rst.ROLE_TAG + "$", before):
+            if _ends_with_role_tag(before):
                 error_offset = paragraph[: inline_literal.start()].count("\n")
                 yield paragraph_lno + error_offset, "role use a single backtick, double backtick found."
             paragraph = (
@@ -325,6 +336,9 @@ def check_missing_space_before_default_role(file, lines, options=None):
             )
 
 
+_HYPERLINK_REFERENCE_RE = re.compile(r"\S* <https?://[^ ]+>`_")
+
+
 @checker(".rst", ".po")
 def check_hyperlink_reference_missing_backtick(file, lines, options=None):
     """Search for missing backticks in front of hyperlink references.
@@ -337,7 +351,7 @@ def check_hyperlink_reference_missing_backtick(file, lines, options=None):
             return  # we don't handle tables yet.
         paragraph = clean_paragraph(paragraph)
         paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph)
-        for hyperlink_reference in re.finditer(r"\S* <https?://[^ ]+>`_", paragraph):
+        for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph):
             error_offset = paragraph[: hyperlink_reference.start()].count("\n")
             context = hyperlink_reference.group(0)
             yield (
@@ -391,6 +405,12 @@ def check_missing_final_newline(file, lines, options=None):
         yield len(lines), "No newline at end of file."
 
 
+_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match
+_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match
+_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match
+_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match
+
+
 @checker(".rst", ".po", enabled=False, rst_only=True)
 def check_line_too_long(file, lines, options=None):
     """Check for line length; this checker is not run by default."""
@@ -399,13 +419,13 @@ def check_line_too_long(file, lines, options=None):
         if len(line) - 1 > options.max_line_length:
             if line.lstrip()[0] in "+|":
                 continue  # ignore wide tables
-            if re.match(r"^\s*\W*(:(\w+:)+)?`.*`\W*$", line):
+            if _is_long_interpreted_text(line):
                 continue  # ignore long interpreted text
-            if re.match(r"^\s*\.\. ", line):
+            if _starts_with_directive_or_hyperlink(line):
                 continue  # ignore directives and hyperlink targets
-            if re.match(r"^\s*__ ", line):
+            if _starts_with_anonymous_hyperlink(line):
                 continue  # ignore anonymous hyperlink targets
-            if re.match(r"^\s*``[^`]+``$", line):
+            if _is_very_long_string_literal(line):
                 continue  # ignore a very long literal string
             yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})"
 
@@ -438,6 +458,9 @@ def check_triple_backticks(file, lines, options=None):
             yield lno + 1, "There's no rst syntax using triple backticks"
 
 
+_has_bad_dedent = re.compile(" [^ ].*::$").match
+
+
 @checker(".rst", ".po", rst_only=False)
 def check_bad_dedent(file, lines, options=None):
     """Check for mis-alignment in indentation in code blocks.
@@ -455,19 +478,20 @@ def check_bad_dedent(file, lines, options=None):
 
     def check_block(block_lineno, block):
         for lineno, line in enumerate(block.splitlines()):
-            if re.match(" [^ ].*::$", line):
+            if _has_bad_dedent(line):
                 errors.append((block_lineno + lineno, "Bad dedent in block"))
 
     list(hide_non_rst_blocks(lines, hidden_block_cb=check_block))
     yield from errors
 
 
-_DANGLING_HYPHEN_RE = re.compile(r".*[a-z]-$")
+_has_dangling_hyphen = re.compile(r".*[a-z]-$").match
+
 
 @checker(".rst", rst_only=True)
 def check_dangling_hyphen(file, lines, options):
     """Check for lines ending in a hyphen."""
     for lno, line in enumerate(lines):
         stripped_line = line.rstrip("\n")
-        if _DANGLING_HYPHEN_RE.match(stripped_line):
+        if _has_dangling_hyphen(stripped_line):
             yield lno + 1, f"Line ends with dangling hyphen"
diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py
@@ -151,6 +151,7 @@ def is_multiline_non_rst_block(line):
 
 
 _NON_RST_BLOCKS_CACHE = {}
+_ZERO_OR_MORE_SPACES_RE = re.compile(" *")
 
 
 def hide_non_rst_blocks(lines, hidden_block_cb=None):
@@ -172,7 +173,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
     output = []
     for lineno, line in enumerate(lines, start=1):
         if in_literal is not None:
-            current_indentation = len(re.match(" *", line).group(0))
+            current_indentation = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0])
             if current_indentation > in_literal or line == "\n":
                 excluded_lines.append(line if line == "\n" else line[in_literal:])
                 line = "\n"  # Hiding line
@@ -182,12 +183,12 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
                     hidden_block_cb(block_line_start, "".join(excluded_lines))
                 excluded_lines = []
         if in_literal is None and is_multiline_non_rst_block(line):
-            in_literal = len(re.match(" *", line).group(0))
+            in_literal = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0])
             block_line_start = lineno
             assert not excluded_lines
             if (
-                _COMMENT_RE.search(line)
-                and type_of_explicit_markup(line) == "comment"
+                type_of_explicit_markup(line) == "comment"
+                and _COMMENT_RE.search(line)
             ):
                 line = "\n"
         output.append(line)
@@ -199,19 +200,26 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
     return output
 
 
+_starts_with_directive_marker = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match
+_starts_with_footnote_marker = re.compile(r"\.\. \[[0-9]+\] ").match
+_starts_with_citation_marker = re.compile(r"\.\. \[[^\]]+\] ").match
+_starts_with_target = re.compile(r"\.\. _.*[^_]: ").match
+_starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match
+
+
 @lru_cache()
 def type_of_explicit_markup(line):
     """Tell apart various explicit markup blocks."""
     line = line.lstrip()
-    if re.match(rf"\.\. {rst.ALL_DIRECTIVES}::", line):
+    if _starts_with_directive_marker(line):
         return "directive"
-    if re.match(r"\.\. \[[0-9]+\] ", line):
+    if _starts_with_footnote_marker(line):
         return "footnote"
-    if re.match(r"\.\. \[[^\]]+\] ", line):
+    if _starts_with_citation_marker(line):
         return "citation"
-    if re.match(r"\.\. _.*[^_]: ", line):
+    if _starts_with_target(line):
         return "target"
-    if re.match(r"\.\. \|[^\|]*\| ", line):
+    if _starts_with_substitution_definition(line):
         return "substitution_definition"
     return "comment"
 

diff --git a/tests/test_sphinxlint.py b/tests/test_sphinxlint.py
@@ -64,7 +64,7 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys):
         assert expected_error in out
     number_of_expected_errors = len(expected_errors)
     number_of_reported_errors = len(out.splitlines())
-    assert number_of_expected_errors == number_of_reported_errors
+    assert number_of_expected_errors == number_of_reported_errors, f"{number_of_reported_errors=}, {out=}"
 
 
 @pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")])