Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre-compile nearly all regex patterns #82

Merged
merged 6 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions sphinxlint/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def check_missing_backtick_after_role(file, lines, options=None):
yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}"


_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL)
_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX)


@checker(".rst", ".po")
def check_missing_space_after_literal(file, lines, options=None):
r"""Search for inline literals immediately followed by a character.
Expand All @@ -74,8 +78,8 @@ def check_missing_space_after_literal(file, lines, options=None):
if paragraph.count("|") > 4:
return # we don't handle tables yet.
paragraph = clean_paragraph(paragraph)
for role in re.finditer("``.+?``(?!`).", paragraph, flags=re.DOTALL):
if not re.match(rst.END_STRING_SUFFIX, role.group(0)[-1]):
for role in _RST_ROLE_RE.finditer(paragraph):
if not _END_STRING_SUFFIX_RE.match(role[0][-1]):
error_offset = paragraph[: role.start()].count("\n")
yield (
paragraph_lno + error_offset,
Expand All @@ -84,6 +88,9 @@ def check_missing_space_after_literal(file, lines, options=None):
)


_LONE_DOUBLE_BACKTICK_RE = re.compile("(?<!`)``(?!`)")


@checker(".rst", ".po")
def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
r"""Search for unbalanced inline literals delimiters.
Expand All @@ -95,14 +102,18 @@ def check_unbalanced_inline_literals_delimiters(file, lines, options=None):
if paragraph.count("|") > 4:
return # we don't handle tables yet.
paragraph = clean_paragraph(paragraph)
for lone_double_backtick in re.finditer("(?<!`)``(?!`)", paragraph):
for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph):
error_offset = paragraph[: lone_double_backtick.start()].count("\n")
yield (
paragraph_lno + error_offset,
"found an unbalanced inline literal markup.",
)


_ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search
_starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search


@checker(".rst", ".po", enabled=False)
def check_default_role(file, lines, options=None):
"""Search for default roles (but they are allowed in many projects).
Expand All @@ -121,12 +132,12 @@ def check_default_role(file, lines, options=None):
if (stripped_line.startswith("|") and stripped_line.endswith("|") and
stripped_line.count("|") >= 4 and "|" in match.group(0)):
return # we don't handle tables yet.
if re.search(rst.ROLE_TAG + "$", before_match):
# It's not a default role: it starts with a tag.
continue
if re.search("^" + rst.ROLE_TAG, after_match):
if _ends_with_role_tag(before_match):
# It's not a default role: it ends with a tag.
continue
if _starts_with_role_tag(after_match):
# It's not a default role: it starts with a tag.
continue
if match.group(0).startswith("``") and match.group(0).endswith("``"):
# It's not a default role: it's an inline literal.
continue
Expand Down Expand Up @@ -274,7 +285,7 @@ def check_role_with_double_backticks(file, lines, options=None):
if inline_literal is None:
break
before = paragraph[: inline_literal.start()]
if re.search(rst.ROLE_TAG + "$", before):
if _ends_with_role_tag(before):
error_offset = paragraph[: inline_literal.start()].count("\n")
yield paragraph_lno + error_offset, "role use a single backtick, double backtick found."
paragraph = (
Expand Down Expand Up @@ -325,6 +336,9 @@ def check_missing_space_before_default_role(file, lines, options=None):
)


_HYPERLINK_REFERENCE_RE = re.compile(r"\S* <https?://[^ ]+>`_")


@checker(".rst", ".po")
def check_hyperlink_reference_missing_backtick(file, lines, options=None):
"""Search for missing backticks in front of hyperlink references.
Expand All @@ -337,7 +351,7 @@ def check_hyperlink_reference_missing_backtick(file, lines, options=None):
return # we don't handle tables yet.
paragraph = clean_paragraph(paragraph)
paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph)
for hyperlink_reference in re.finditer(r"\S* <https?://[^ ]+>`_", paragraph):
for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph):
error_offset = paragraph[: hyperlink_reference.start()].count("\n")
context = hyperlink_reference.group(0)
yield (
Expand Down Expand Up @@ -391,6 +405,12 @@ def check_missing_final_newline(file, lines, options=None):
yield len(lines), "No newline at end of file."


_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match
_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match
_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match
_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match


@checker(".rst", ".po", enabled=False, rst_only=True)
def check_line_too_long(file, lines, options=None):
"""Check for line length; this checker is not run by default."""
Expand All @@ -399,13 +419,13 @@ def check_line_too_long(file, lines, options=None):
if len(line) - 1 > options.max_line_length:
if line.lstrip()[0] in "+|":
continue # ignore wide tables
if re.match(r"^\s*\W*(:(\w+:)+)?`.*`\W*$", line):
if _is_long_interpreted_text(line):
continue # ignore long interpreted text
if re.match(r"^\s*\.\. ", line):
if _starts_with_directive_or_hyperlink(line):
continue # ignore directives and hyperlink targets
if re.match(r"^\s*__ ", line):
if _starts_with_anonymous_hyperlink(line):
continue # ignore anonymous hyperlink targets
if re.match(r"^\s*``[^`]+``$", line):
if _is_very_long_string_literal(line):
continue # ignore a very long literal string
yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})"

Expand Down Expand Up @@ -438,6 +458,9 @@ def check_triple_backticks(file, lines, options=None):
yield lno + 1, "There's no rst syntax using triple backticks"


_has_bad_dedent = re.compile(" [^ ].*::$").match


@checker(".rst", ".po", rst_only=False)
def check_bad_dedent(file, lines, options=None):
"""Check for mis-alignment in indentation in code blocks.
Expand All @@ -455,19 +478,20 @@ def check_bad_dedent(file, lines, options=None):

def check_block(block_lineno, block):
for lineno, line in enumerate(block.splitlines()):
if re.match(" [^ ].*::$", line):
if _has_bad_dedent(line):
errors.append((block_lineno + lineno, "Bad dedent in block"))

list(hide_non_rst_blocks(lines, hidden_block_cb=check_block))
yield from errors


_DANGLING_HYPHEN_RE = re.compile(r".*[a-z]-$")
_has_dangling_hyphen = re.compile(r".*[a-z]-$").match


@checker(".rst", rst_only=True)
def check_dangling_hyphen(file, lines, options):
"""Check for lines ending in a hyphen."""
for lno, line in enumerate(lines):
stripped_line = line.rstrip("\n")
if _DANGLING_HYPHEN_RE.match(stripped_line):
if _has_dangling_hyphen(stripped_line):
yield lno + 1, f"Line ends with dangling hyphen"
26 changes: 17 additions & 9 deletions sphinxlint/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def is_multiline_non_rst_block(line):


_NON_RST_BLOCKS_CACHE = {}
_ZERO_OR_MORE_SPACES_RE = re.compile(" *")


def hide_non_rst_blocks(lines, hidden_block_cb=None):
Expand All @@ -172,7 +173,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
output = []
for lineno, line in enumerate(lines, start=1):
if in_literal is not None:
current_indentation = len(re.match(" *", line).group(0))
current_indentation = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0])
if current_indentation > in_literal or line == "\n":
excluded_lines.append(line if line == "\n" else line[in_literal:])
line = "\n" # Hiding line
Expand All @@ -182,12 +183,12 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
hidden_block_cb(block_line_start, "".join(excluded_lines))
excluded_lines = []
if in_literal is None and is_multiline_non_rst_block(line):
in_literal = len(re.match(" *", line).group(0))
in_literal = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0])
block_line_start = lineno
assert not excluded_lines
if (
_COMMENT_RE.search(line)
and type_of_explicit_markup(line) == "comment"
type_of_explicit_markup(line) == "comment"
and _COMMENT_RE.search(line)
):
line = "\n"
output.append(line)
Expand All @@ -199,19 +200,26 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
return output


_starts_with_directive_marker = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match
_starts_with_footnote_marker = re.compile(r"\.\. \[[0-9]+\] ").match
_starts_with_citation_marker = re.compile(r"\.\. \[[^\]]+\] ").match
_starts_with_target = re.compile(r"\.\. _.*[^_]: ").match
_starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match


@lru_cache()
def type_of_explicit_markup(line):
"""Tell apart various explicit markup blocks."""
line = line.lstrip()
if re.match(rf"\.\. {rst.ALL_DIRECTIVES}::", line):
if _starts_with_directive_marker(line):
return "directive"
if re.match(r"\.\. \[[0-9]+\] ", line):
if _starts_with_footnote_marker(line):
return "footnote"
if re.match(r"\.\. \[[^\]]+\] ", line):
if _starts_with_citation_marker(line):
return "citation"
if re.match(r"\.\. _.*[^_]: ", line):
if _starts_with_target(line):
return "target"
if re.match(r"\.\. \|[^\|]*\| ", line):
if _starts_with_substitution_definition(line):
return "substitution_definition"
return "comment"

Expand Down
2 changes: 1 addition & 1 deletion tests/test_sphinxlint.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys):
assert expected_error in out
number_of_expected_errors = len(expected_errors)
number_of_reported_errors = len(out.splitlines())
assert number_of_expected_errors == number_of_reported_errors
assert number_of_expected_errors == number_of_reported_errors, f"{number_of_reported_errors=}, {out=}"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not an optimisation, but I found this quite helpful for debugging test failures on my first commit in this PR! Happy to take this out if you prefer, though; I accidentally committed it here 😆



@pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")])
Expand Down