Improve the caching strategy employed in utils.py

sphinx-contrib · Oct 12, 2023 · a901d6a · a901d6a
1 parent 3e99049
commit a901d6a
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 17 deletions.
diff --git a/sphinxlint/sphinxlint.py b/sphinxlint/sphinxlint.py
@@ -2,7 +2,7 @@
 from dataclasses import dataclass
 from os.path import splitext
 
-from sphinxlint.utils import hide_non_rst_blocks, po2rst
+from sphinxlint.utils import PER_FILE_CACHES, hide_non_rst_blocks, po2rst
 
 
 @dataclass(frozen=True)
@@ -50,16 +50,20 @@ def check_text(filename, text, checkers, options=None):
 
 
 def check_file(filename, checkers, options: CheckersOptions = None):
-    ext = splitext(filename)[1]
-    if not any(ext in checker.suffixes for checker in checkers):
-        return Counter()
     try:
-        with open(filename, encoding="utf-8") as f:
-            text = f.read()
-        if filename.endswith(".po"):
-            text = po2rst(text)
-    except OSError as err:
-        return [f"{filename}: cannot open: {err}"]
-    except UnicodeDecodeError as err:
-        return [f"{filename}: cannot decode as UTF-8: {err}"]
-    return check_text(filename, text, checkers, options)
+        ext = splitext(filename)[1]
+        if not any(ext in checker.suffixes for checker in checkers):
+            return Counter()
+        try:
+            with open(filename, encoding="utf-8") as f:
+                text = f.read()
+            if filename.endswith(".po"):
+                text = po2rst(text)
+        except OSError as err:
+            return [f"{filename}: cannot open: {err}"]
+        except UnicodeDecodeError as err:
+            return [f"{filename}: cannot decode as UTF-8: {err}"]
+        return check_text(filename, text, checkers, options)
+    finally:
+        for memoized_function in PER_FILE_CACHES:
+            memoized_function.cache_clear()
diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py
@@ -7,6 +7,15 @@
 from sphinxlint import rst
 
 
+PER_FILE_CACHES = []
+
+
+def _per_file_cache(func):
+    memoized_func = lru_cache(maxsize=None)(func)
+    PER_FILE_CACHES.append(memoized_func)
+    return memoized_func
+
+
 def match_size(re_match):
     return re_match.end() - re_match.start()
 
@@ -29,7 +38,7 @@ def _clean_heuristic(paragraph, regex):
         paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :]
 
 
-@lru_cache()
+@_per_file_cache
 def clean_paragraph(paragraph):
     """Removes all good constructs, so detectors can focus on bad ones.
 
@@ -45,7 +54,7 @@ def clean_paragraph(paragraph):
     return paragraph.replace("\x00", "\\")
 
 
-@lru_cache()
+@_per_file_cache
 def escape2null(text):
     r"""Return a string with escape-backslashes converted to nulls.
 
@@ -79,7 +88,7 @@ def escape2null(text):
         start = found + 2  # skip character after escape
 
 
-@lru_cache()
+@_per_file_cache
 def paragraphs(lines):
     """Yield (paragraph_line_no, paragraph_text) pairs describing
     paragraphs of the given lines.
@@ -207,7 +216,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None):
 _starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match
 
 
-@lru_cache()
+@_per_file_cache
 def type_of_explicit_markup(line):
     """Tell apart various explicit markup blocks."""
     line = line.lstrip()