Skip to content

Commit

Permalink
feat: Add backlinks feature
Browse files Browse the repository at this point in the history
This feature only allows recording and retrieving backlinks, for other plugins or systems to render them. Backlinks are not rendered directly by autorefs (though we could consider offering such a feature in the future).

PR-65: #65
Issue-mkdocstrings-723: mkdocstrings/mkdocstrings#723
Issue-mkdocstrings-python-153: mkdocstrings/python#153
  • Loading branch information
pawamoy committed Feb 23, 2025
1 parent 08ea5fe commit 3341add
Show file tree
Hide file tree
Showing 5 changed files with 297 additions and 10 deletions.
45 changes: 45 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,48 @@ You can also change the actual identifier of a heading, thanks again to the `att
```

...though note that this will impact the URL anchor too (and therefore the permalink to the heading).

### Backlinks

The autorefs plugin supports recording backlinks, that other plugins or systems can then use to render backlinks into pages.

For example, when linking from page `foo/`, section `Section` to a heading with identifier `heading` thanks to a cross-reference `[Some heading][heading]`, the plugin will record that `foo/#section` references `heading`.

```md
# Page foo
This is page foo.
## Section
This section references [some heading][heading].
```

Other plugins or systems integrating with the autorefs plugin can then retrieve backlinks for a specific identifier:

```python
backlinks = autorefs_plugin.get_backlinks("heading")
```

The `get_backlinks` method returns a map of backlink types to sets of backlinks. A backlink is a tuple of navigation breadcrumbs, each breadcrumb having a title and URL.

```python
print(backlinks)
# {
# "referenced-by": {
# Backlink(
# crumbs=(
# BacklinkCrumb(title="Foo", url="foo/"),
# BacklinkCrumb(title="Section", url="foo/#section"),
# ),
# ),
# }
```

The default backlink type is `referenced-by`, but can be customized by other plugins or systems thanks to the `backlink-type` HTML data attribute on `autoref` elements. Such plugins and systems can also specify the anchor on the current page to use for the backlink with the `backlink-anchor` HTML data attribute on `autoref` elements.

```html
<autoref identifier="heading" backlink-type="mentionned-by" backlink-anchor="section-paragraph">
```

This feature is typically designed for use in [mkdocstrings](https://mkdocstrings.github.io/) handlers.
81 changes: 81 additions & 0 deletions src/mkdocs_autorefs/backlinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""Backlinks module."""

from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, ClassVar

from markdown.core import Markdown
from markdown.treeprocessors import Treeprocessor

if TYPE_CHECKING:
from xml.etree.ElementTree import Element

from markdown import Markdown

from mkdocs_autorefs.plugin import AutorefsPlugin

try:
from mkdocs.plugins import get_plugin_logger

log = get_plugin_logger(__name__)
except ImportError:
# TODO: remove once support for MkDocs <1.5 is dropped
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]


@dataclass(eq=True, frozen=True, order=True)
class BacklinkCrumb:
"""A navigation breadcrumb for a backlink."""

title: str
url: str


@dataclass(eq=True, frozen=True, order=True)
class Backlink:
"""A backlink (list of breadcrumbs)."""

crumbs: tuple[BacklinkCrumb, ...]


class BacklinksTreeProcessor(Treeprocessor):
"""Enhance autorefs with `backlink-type` and `backlink-anchor` attributes.
These attributes are then used later to register backlinks.
"""

name: str = "mkdocs-autorefs-backlinks"
initial_id: str | None = None
_htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"}

def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
"""Initialize the tree processor.
Parameters:
plugin: A reference to the autorefs plugin, to use its `register_anchor` method.
"""
super().__init__(md)
self.plugin = plugin
self.last_heading_id: str | None = None

def run(self, root: Element) -> None: # noqa: D102
if self.plugin.current_page is not None:
self.last_heading_id = self.initial_id
self._enhance_autorefs(root)

def _enhance_autorefs(self, parent: Element) -> None:
for el in parent:
if el.tag == "a": # Markdown anchor.
if not (el.text or el.get("href") or (el.tail and el.tail.strip())) and (anchor_id := el.get("id")):
self.last_heading_id = anchor_id
elif el.tag in self._htags: # Heading.
self.last_heading_id = el.get("id")
elif el.tag == "autoref":
if "backlink-type" not in el.attrib:
el.set("backlink-type", "referenced-by")
if "backlink-anchor" not in el.attrib and self.last_heading_id:
el.set("backlink-anchor", self.last_heading_id)
else:
self._enhance_autorefs(el)
67 changes: 66 additions & 1 deletion src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import contextlib
import functools
import logging
from collections import defaultdict
from pathlib import PurePosixPath as URL # noqa: N814
from typing import TYPE_CHECKING, Any, Callable, Literal
from urllib.parse import urlsplit
Expand All @@ -22,6 +23,7 @@
from mkdocs.plugins import BasePlugin, event_priority
from mkdocs.structure.pages import Page

from mkdocs_autorefs.backlinks import Backlink, BacklinkCrumb
from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url

if TYPE_CHECKING:
Expand All @@ -30,6 +32,7 @@
from jinja2.environment import Environment
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.files import Files
from mkdocs.structure.nav import Section
from mkdocs.structure.pages import Page
from mkdocs.structure.toc import AnchorLink

Expand Down Expand Up @@ -99,6 +102,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
"""

scan_toc: bool = True
record_backlinks: bool = False
current_page: Page | None = None
# YORE: Bump 2: Remove line.
legacy_refs: bool = True
Expand Down Expand Up @@ -135,7 +139,9 @@ def __init__(self) -> None:
self._primary_url_map: dict[str, list[str]] = {}
self._secondary_url_map: dict[str, list[str]] = {}
self._title_map: dict[str, str] = {}
self._backlink_page_map: dict[str, Page] = {}
self._abs_url_map: dict[str, str] = {}
self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
# YORE: Bump 2: Remove line.
self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
# YORE: Bump 2: Remove line.
Expand All @@ -162,6 +168,56 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
stacklevel=2,
)

def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:
"""Record a backlink.
Arguments:
identifier: The target identifier.
backlink_type: The type of backlink.
backlink_anchor: The backlink target anchor.
page_url: The URL of the page containing the backlink.
"""
# When we record backlinks, all identifiers have been registered.
# If an identifier is not found in the primary or secondary URL maps, it's an absolute URL,
# meaning it comes from an external source (typically an object inventory),
# and we don't need to record backlinks for it.
if identifier in self._primary_url_map or identifier in self._secondary_url_map:
self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")

def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[Backlink]]:
"""Return the backlinks to an identifier relative to the given URL.
Arguments:
*identifiers: The identifiers to get backlinks for.
from_url: The URL of the page where backlinks are rendered.
Returns:
A dictionary of backlinks, with the type of reference as key and a set of backlinks as value.
Each backlink is a tuple of (URL, title) tuples forming navigation breadcrumbs.
"""
relative_backlinks: dict[str, set[Backlink]] = defaultdict(set)
for identifier in set(identifiers):
backlinks = self._backlinks.get(identifier, {})
for backlink_type, backlink_urls in backlinks.items():
for backlink_url in backlink_urls:
relative_backlinks[backlink_type].add(self._crumbs(from_url, backlink_url))
return relative_backlinks

def _crumbs(self, from_url: str, backlink_url: str) -> Backlink:
backlink_page: Page = self._backlink_page_map[backlink_url]
backlink_title = self._title_map.get(backlink_url, "")
crumbs: list[BacklinkCrumb] = [
BacklinkCrumb(backlink_title, relative_url(from_url, backlink_url)),
BacklinkCrumb(backlink_page.title, relative_url(from_url, backlink_page.url + "#")),
]
page: Page | Section = backlink_page
while page.parent:
page = page.parent
if url := getattr(page, "url", ""):
url = relative_url(from_url, url + "#")
crumbs.append(BacklinkCrumb(page.title, url))
return Backlink(tuple(reversed(crumbs)))

def register_anchor(
self,
page: Page,
Expand Down Expand Up @@ -196,6 +252,8 @@ def register_anchor(
url_map[identifier] = [url]
if title and url not in self._title_map:
self._title_map[url] = title
if self.record_backlinks and url not in self._backlink_page_map:
self._backlink_page_map[url] = page

def register_url(self, identifier: str, url: str) -> None:
"""Register that the identifier should be turned into a link to this URL.
Expand Down Expand Up @@ -406,7 +464,7 @@ def map_urls(self, page: Page, anchor: AnchorLink) -> None:

@event_priority(-50) # Late, after mkdocstrings has finished loading inventories.
def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment: # noqa: ARG002
"""Apply cross-references.
"""Apply cross-references and collect backlinks.
Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
Expand All @@ -415,6 +473,9 @@ def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) ->
We log a warning for each reference that we couldn't map to an URL.
We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
for performance reasons (we don't want to run the regular expression on each page twice).
Arguments:
env: The MkDocs environment.
config: The MkDocs config object.
Expand All @@ -433,10 +494,14 @@ def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) ->
from_url=file.page.url,
fallback=self.get_fallback_anchor,
)
backlink_recorder = (
functools.partial(self._record_backlink, page_url=file.page.url) if self.record_backlinks else None
)
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
file.page.content, unmapped = fix_refs(
file.page.content,
url_mapper,
record_backlink=backlink_recorder,
link_titles=self._link_titles,
strip_title_tags=self._strip_title_tags,
_legacy_refs=self.legacy_refs,
Expand Down
50 changes: 41 additions & 9 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from markdown.util import HTML_PLACEHOLDER_RE, INLINE_PLACEHOLDER_RE
from markupsafe import Markup

from mkdocs_autorefs.backlinks import BacklinksTreeProcessor

if TYPE_CHECKING:
from collections.abc import Iterable
from pathlib import Path
Expand Down Expand Up @@ -328,6 +330,8 @@ class _AutorefsAttrs(dict):
"filepath",
"lineno",
"slug",
"backlink-type",
"backlink-anchor",
}

@property
Expand Down Expand Up @@ -416,6 +420,7 @@ def _strip_tags(html: str) -> str:
def fix_ref(
url_mapper: Callable[[str], tuple[str, str | None]],
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]],
record_backlink: Callable[[str, str, str], None] | None = None,
*,
link_titles: bool | Literal["external"] = True,
strip_title_tags: bool = False,
Expand All @@ -432,6 +437,7 @@ def fix_ref(
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
unmapped: A list to store unmapped identifiers.
record_backlink: A callable to record backlinks.
link_titles: How to set HTML titles on links. Always (`True`), never (`False`), or external-only (`"external"`).
strip_title_tags: Whether to strip HTML tags from link titles.
Expand All @@ -449,6 +455,13 @@ def inner(match: Match) -> str:

identifiers = (identifier, slug) if slug else (identifier,)

if (
record_backlink
and (backlink_type := attrs.get("backlink-type"))
and (backlink_anchor := attrs.get("backlink-anchor"))
):
record_backlink(identifier, backlink_type, backlink_anchor)

try:
url, original_title = _find_url(identifiers, url_mapper)
except KeyError:
Expand Down Expand Up @@ -495,6 +508,7 @@ def fix_refs(
html: str,
url_mapper: Callable[[str], tuple[str, str | None]],
*,
record_backlink: Callable[[str, str, str], None] | None = None,
link_titles: bool | Literal["external"] = True,
strip_title_tags: bool = False,
# YORE: Bump 2: Remove line.
Expand All @@ -506,6 +520,7 @@ def fix_refs(
html: The text to fix.
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
record_backlink: A callable to record backlinks.
link_titles: How to set HTML titles on links. Always (`True`), never (`False`), or external-only (`"external"`).
strip_title_tags: Whether to strip HTML tags from link titles.
Expand All @@ -514,7 +529,7 @@ def fix_refs(
"""
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]] = []
html = AUTOREF_RE.sub(
fix_ref(url_mapper, unmapped, link_titles=link_titles, strip_title_tags=strip_title_tags),
fix_ref(url_mapper, unmapped, record_backlink, link_titles=link_titles, strip_title_tags=strip_title_tags),
html,
)

Expand Down Expand Up @@ -599,6 +614,11 @@ def _log_enabling_markdown_anchors() -> None:
log.debug("Enabling Markdown anchors feature")


@lru_cache
def _log_enabling_backlinks() -> None:
log.debug("Enabling backlinks feature")


class AutorefsExtension(Extension):
"""Markdown extension that transforms unresolved references into auto-references.
Expand Down Expand Up @@ -627,7 +647,8 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
Add an instance of our [`AutorefsInlineProcessor`][mkdocs_autorefs.references.AutorefsInlineProcessor] to the Markdown parser.
Also optionally add an instance of our [`AnchorScannerTreeProcessor`][mkdocs_autorefs.references.AnchorScannerTreeProcessor]
to the Markdown parser if a reference to the autorefs plugin was passed to this extension.
and [`BacklinksTreeProcessor`][mkdocs_autorefs.references.BacklinksTreeProcessor] to the Markdown parser
if a reference to the autorefs plugin was passed to this extension.
Arguments:
md: A `markdown.Markdown` instance.
Expand All @@ -637,10 +658,21 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
AutorefsInlineProcessor.name,
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
)
if self.plugin is not None and self.plugin.scan_toc and "attr_list" in md.treeprocessors:
_log_enabling_markdown_anchors()
md.treeprocessors.register(
AnchorScannerTreeProcessor(self.plugin, md),
AnchorScannerTreeProcessor.name,
priority=0,
)
if self.plugin is not None:
# Markdown anchors require the `attr_list` extension.
if self.plugin.scan_toc and "attr_list" in md.treeprocessors:
_log_enabling_markdown_anchors()
md.treeprocessors.register(
AnchorScannerTreeProcessor(self.plugin, md),
AnchorScannerTreeProcessor.name,
priority=0,
)
# Backlinks require IDs on headings, which are either set by `toc`,
# or manually by the user with `attr_list`.
if self.plugin.record_backlinks and ("attr_list" in md.treeprocessors or "toc" in md.treeprocessors):
_log_enabling_backlinks()
md.treeprocessors.register(
BacklinksTreeProcessor(self.plugin, md),
BacklinksTreeProcessor.name,
priority=0,
)
Loading

0 comments on commit 3341add

Please sign in to comment.