diff --git a/docs/changelog.md b/docs/changelog.md
index 786b75d..3c03394 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1 +1,10 @@
--8<-- "CHANGELOG.md"
+
+[](#hello){#hello2}
+
+## Hello
+
+Hello.
+
+Link to [Hello 1][hello1].
+Link to [Hello 2][hello2].
diff --git a/docs/index.md b/docs/index.md
index 612c7a5..78b84ee 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1 +1,10 @@
--8<-- "README.md"
+
+[](#hello){#hello1}
+
+## Hello
+
+Hello.
+
+Link to [Hello 1][hello1].
+Link to [Hello 2][hello2].
diff --git a/src/mkdocs_autorefs/plugin.py b/src/mkdocs_autorefs/plugin.py
index 899975a..2649899 100644
--- a/src/mkdocs_autorefs/plugin.py
+++ b/src/mkdocs_autorefs/plugin.py
@@ -19,16 +19,23 @@
from typing import TYPE_CHECKING, Any, Callable, Sequence
from urllib.parse import urlsplit
-from mkdocs.plugins import BasePlugin
+from markdown.extensions import Extension
+from markdown.core import Markdown
from mkdocs.config.base import Config
from mkdocs.config.config_options import Type
+from mkdocs.config.defaults import MkDocsConfig
+from mkdocs.plugins import BasePlugin
+from markdown.treeprocessors import Treeprocessor
-from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
-
+from mkdocs.structure.files import Files
+from mkdocs.structure.pages import Page
+from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url, AnchorScannerTreeProcessor
+from functools import partial
if TYPE_CHECKING:
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.pages import Page
from mkdocs.structure.toc import AnchorLink
+ from xml.etree.ElementTree import Element
try:
from mkdocs.plugins import get_plugin_logger
@@ -40,7 +47,10 @@
class AutorefsConfig(Config):
+ """Configuration options for the Autorefs plugin."""
+
scan_anchors = Type(bool, default=False)
+ """Whether to scan HTML pages for anchors defining references."""
class AutorefsPlugin(BasePlugin[AutorefsConfig]):
@@ -56,16 +66,20 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
for more information about its plugin system.
"""
- scan_anchors: bool = False
scan_toc: bool = True
+ scan_anchors: bool = False
current_page: str | None = None
+ _re_anchors = re.compile(r'')
+
def __init__(self) -> None:
"""Initialize the object."""
super().__init__()
self._url_map: dict[str, str] = {}
self._abs_url_map: dict[str, str] = {}
+ self._extension: AutorefsExtension | None = None
self.get_fallback_anchor: Callable[[str], str | None] | None = None
+ self.current_page: str | None = None
def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
@@ -141,20 +155,13 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
The modified config.
"""
log.debug("Adding AutorefsExtension to the list")
- config["markdown_extensions"].append(AutorefsExtension())
+ anchor_scanner_factory = partial(AnchorScannerTreeProcessor, self) if self.scan_anchors or self.config.scan_anchors else None
+ # anchor_scanner_factory = None
+ self._extension = AutorefsExtension(anchor_scanner_factory=anchor_scanner_factory)
+ config["markdown_extensions"].append(self._extension)
return config
- def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
- """Remember which page is the current one.
-
- Arguments:
- markdown: Input Markdown.
- page: The related MkDocs page instance.
- kwargs: Additional arguments passed by MkDocs.
-
- Returns:
- The same Markdown. We only use this hook to map anchors to URLs.
- """
+ def on_page_markdown(self, markdown: str, *, page: Page, **kwargs: Any) -> str | None:
self.current_page = page.url
return markdown
@@ -179,9 +186,9 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
for item in page.toc.items:
self.map_urls(page.url, item)
- if self.config.scan_anchors or self.scan_anchors:
- for href, id in re.findall(r'', html):
- self.register_anchor(page.url, identifier=id, anchor=href.lstrip("#"))
+ # if self.scan_anchors or self.config.scan_anchors:
+ # for href, hid in re.findall(self._re_anchors, html):
+ # self.register_anchor(page.url, identifier=hid, anchor=href.lstrip("#"))
return html
diff --git a/src/mkdocs_autorefs/references.py b/src/mkdocs_autorefs/references.py
index 66b4931..d187201 100644
--- a/src/mkdocs_autorefs/references.py
+++ b/src/mkdocs_autorefs/references.py
@@ -8,12 +8,15 @@
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element
+from markdown.core import Markdown
from markdown.extensions import Extension
+from markdown.treeprocessors import Treeprocessor
from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor
from markdown.util import INLINE_PLACEHOLDER_RE
if TYPE_CHECKING:
from markdown import Markdown
+ from mkdocs_autorefs.plugin import AutorefsPlugin
AUTO_REF_RE = re.compile(
r"autorefs-identifier|autorefs-optional|autorefs-optional-hover)="
@@ -197,9 +200,31 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
return html, unmapped
+class AnchorScannerTreeProcessor(Treeprocessor):
+ def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
+ super().__init__(md)
+ self.plugin = plugin
+
+ def run(self, root: Element) -> None:
+ if self.plugin.current_page is not None:
+ self._scan_anchors(root)
+
+ def _scan_anchors(self, parent: Element) -> None:
+ for el in parent:
+ if el.tag == "a" and (hid := el.get("id")):
+ self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#"))
+ else:
+ self._scan_anchors(el)
+
+
class AutorefsExtension(Extension):
"""Extension that inserts auto-references in Markdown."""
+ def __init__(self, anchor_scanner_factory: Callable[[Markdown], AnchorScannerTreeProcessor] | None, **kwargs: Any) -> None:
+ super().__init__(**kwargs)
+ self.anchor_scanner_factory = anchor_scanner_factory
+ self.anchor_scanner: AnchorScannerTreeProcessor | None = None
+
def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name)
"""Register the extension.
@@ -213,3 +238,10 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
"mkdocs-autorefs",
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
)
+ if self.anchor_scanner_factory:
+ self.anchor_scanner = self.anchor_scanner_factory(md)
+ md.treeprocessors.register(
+ self.anchor_scanner,
+ "mkdocs-autorefs-anchors-scanner",
+ priority=0,
+ )
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
index 5fb0906..8fcae75 100644
--- a/tests/test_plugin.py
+++ b/tests/test_plugin.py
@@ -5,8 +5,6 @@
import pytest
from mkdocs_autorefs.plugin import AutorefsPlugin
-from mkdocs.structure.pages import Page
-from mkdocs.structure.files import File
def test_url_registration() -> None:
@@ -68,7 +66,7 @@ def test_register_html_anchors() -> None:
"""Check that HT?ML anchors are registered when enabled."""
plugin = AutorefsPlugin()
plugin.scan_toc = False
- plugin.config["scan_anchors"] = plugin.scan_anchors = True
+ plugin.scan_anchors = True
class Page:
url = "/page/url"
@@ -76,9 +74,13 @@ class Page:
plugin.on_page_content(
"""
-
+
+
+
""",
- page=Page(),
+ page=Page(), # type: ignore[arg-type]
)
assert "foo.bar" in plugin._url_map
- assert "foo.baz" in plugin._url_map
+ assert "foo.baz" not in plugin._url_map
+ assert "foo.qux" in plugin._url_map
+ assert "qux.foo" in plugin._url_map