feat: Update ToC with generated headings

pawamoy · Apr 17, 2023 · 5ea2263 · 5ea2263
1 parent 5fb5a30
commit 5ea2263
Show file tree

Hide file tree

Showing 7 changed files with 283 additions and 42 deletions.
diff --git a/docs/gallery.md b/docs/gallery.md
@@ -157,6 +157,6 @@ In this example, we inspect the `argparse` parser to build better-looking
 Markdown/HTML contents. We simply use the description and iterate on options,
 but more complex stuff is possible of course.
 
-```python exec="true" source="tabbed-right" title="CLI help using argparse parser"
+```python exec="true" source="tabbed-right" updatetoc="no" title="CLI help using argparse parser"
 --8<-- "gallery/argparse.py"
 ```
diff --git a/docs/usage/index.md b/docs/usage/index.md
@@ -38,6 +38,23 @@ System information:
 ```
 ````
 
+## Generated headings in Table of Contents
+
+If you are using Python Markdown's `toc` extension,
+or writing docs with MkDocs, you will notice that the headings
+you generated by executing a code block appear in the table of contents.
+If you don't want those headings to appear in the ToC, you can use
+the `updatetoc="no"` boolean option:
+
+````md
+```python exec="1" updatetoc="no"
+print("# XL heading\n")
+print("## L heading\n")
+print("### M heading\n")
+print("#### S heading\n")
+```
+````
+
 ## Render the source code as well
 
 It's possible to render both the result of the executed code block

diff --git a/src/markdown_exec/__init__.py b/src/markdown_exec/__init__.py
@@ -70,6 +70,7 @@ def validator(
     result_value = inputs.pop("result", "")
     returncode_value = int(inputs.pop("returncode", "0"))
     session_value = inputs.pop("session", "")
+    update_toc_value = _to_bool(inputs.pop("updatetoc", "yes"))
     tabs_value = inputs.pop("tabs", "|".join(default_tabs))
     tabs = tuple(_tabs_re.split(tabs_value, maxsplit=1))
     options["id"] = id_value
@@ -78,6 +79,7 @@ def validator(
     options["result"] = result_value
     options["returncode"] = returncode_value
     options["session"] = session_value
+    options["update_toc"] = update_toc_value
     options["tabs"] = tabs
     options["extra"] = inputs
     return True

diff --git a/src/markdown_exec/formatters/base.py b/src/markdown_exec/formatters/base.py
@@ -53,6 +53,7 @@ def base_format(
     returncode: int = 0,
     transform_source: Callable[[str], tuple[str, str]] | None = None,
     session: str | None = None,
+    update_toc: bool = True,
     **options: Any,
 ) -> Markup:
     """Execute code and return HTML.
@@ -72,12 +73,14 @@ def base_format(
             The input source is the one that is ran, the output source is the one that is
             rendered (when the source option is enabled).
         session: A session name, to persist state between executed code blocks.
+        update_toc: Whether to include generated headings
+            into the Markdown table of contents (toc extension).
         **options: Additional options passed from the formatter.
 
     Returns:
         HTML contents.
     """
-    markdown = MarkdownConverter(md)
+    markdown = MarkdownConverter(md, update_toc=update_toc)
     extra = options.get("extra", {})
 
     if transform_source:

diff --git a/src/markdown_exec/processors.py b/src/markdown_exec/processors.py
@@ -0,0 +1,118 @@
+"""This module contains a Markdown extension allowing to integrate generated headings into the ToC."""
+
+from __future__ import annotations
+
+import copy
+import re
+from typing import TYPE_CHECKING
+from xml.etree.ElementTree import Element
+
+from markdown.treeprocessors import Treeprocessor
+from markdown.util import HTML_PLACEHOLDER_RE
+
+if TYPE_CHECKING:
+    from markdown import Markdown
+    from markupsafe import Markup
+
+
+# code taken from mkdocstrings, credits to @oprypin
+class IdPrependingTreeprocessor(Treeprocessor):
+    """Prepend the configured prefix to IDs of all HTML elements."""
+
+    name = "markdown_exec_ids"
+
+    def __init__(self, md: Markdown, id_prefix: str) -> None:  # noqa: D107
+        super().__init__(md)
+        self.id_prefix = id_prefix
+
+    def run(self, root: Element) -> None:  # noqa: D102
+        if not self.id_prefix:
+            return
+        for el in root.iter():
+            id_attr = el.get("id")
+            if id_attr:
+                el.set("id", self.id_prefix + id_attr)
+
+            href_attr = el.get("href")
+            if href_attr and href_attr.startswith("#"):
+                el.set("href", "#" + self.id_prefix + href_attr[1:])
+
+            name_attr = el.get("name")
+            if name_attr:
+                el.set("name", self.id_prefix + name_attr)
+
+            if el.tag == "label":
+                for_attr = el.get("for")
+                if for_attr:
+                    el.set("for", self.id_prefix + for_attr)
+
+
+# code taken from mkdocstrings, credits to @oprypin
+class HeadingReportingTreeprocessor(Treeprocessor):
+    """Records the heading elements encountered in the document."""
+
+    name = "mkdocstrings_headings_list"
+    regex = re.compile("[Hh][1-6]")
+
+    def __init__(self, md: Markdown, headings: list[Element]):  # noqa: D107
+        super().__init__(md)
+        self.headings = headings
+
+    def run(self, root: Element) -> None:  # noqa: D102
+        for el in root.iter():
+            if self.regex.fullmatch(el.tag):
+                el = copy.copy(el)  # noqa: PLW2901
+                # 'toc' extension's first pass (which we require to build heading stubs/ids) also edits the HTML.
+                # Undo the permalink edit so we can pass this heading to the outer pass of the 'toc' extension.
+                if len(el) > 0 and el[-1].get("class") == self.md.treeprocessors["toc"].permalink_class:
+                    del el[-1]
+                self.headings.append(el)
+
+
+class InsertHeadings(Treeprocessor):
+    """Our headings insertor."""
+
+    name = "markdown_exec_insert_headings"
+
+    def __init__(self, md: Markdown):
+        """Initialize the object.
+
+        Arguments:
+            md: A `markdown.Markdown` instance.
+        """
+        super().__init__(md)
+        self.headings: dict[Markup, list[Element]] = {}
+
+    def run(self, root: Element) -> None:  # noqa: D102 (ignore missing docstring)
+        if not self.headings:
+            return
+
+        for el in root.iter():
+            match = HTML_PLACEHOLDER_RE.match(el.text or "")
+            if match:
+                counter = int(match.group(1))
+                markup: Markup = self.md.htmlStash.rawHtmlBlocks[counter]  # type: ignore[assignment]
+                if markup in self.headings:
+                    div = Element("div", {"class": "markdown-exec"})
+                    div.extend(self.headings[markup])
+                    el.append(div)
+
+
+class RemoveHeadings(Treeprocessor):
+    """Our headings remover."""
+
+    name = "markdown_exec_remove_headings"
+
+    def run(self, root: Element) -> None:  # noqa: D102
+        carry_text = ""
+        for el in reversed(root):  # Reversed mainly for the ability to mutate during iteration.
+            for subel in reversed(el):
+                if subel.tag == "div" and subel.get("class") == "markdown-exec":
+                    # Delete the duplicated headings along with their container, but keep the text (i.e. the actual HTML).
+                    carry_text = (subel.text or "") + carry_text
+                    el.remove(subel)
+                elif carry_text:
+                    subel.tail = (subel.tail or "") + carry_text
+                    carry_text = ""
+            if carry_text:
+                el.text = (el.text or "") + carry_text
diff --git a/src/markdown_exec/rendering.py b/src/markdown_exec/rendering.py
@@ -2,14 +2,21 @@
 
 from __future__ import annotations
 
+from functools import lru_cache
 from itertools import chain
 from textwrap import indent
 from typing import TYPE_CHECKING
 
 from markdown import Markdown
-from markdown.treeprocessors import Treeprocessor
 from markupsafe import Markup
 
+from markdown_exec.processors import (
+    HeadingReportingTreeprocessor,
+    IdPrependingTreeprocessor,
+    InsertHeadings,
+    RemoveHeadings,
+)
+
 if TYPE_CHECKING:
     from xml.etree.ElementTree import Element
 
@@ -99,49 +106,40 @@ def add_source(
     raise ValueError(f"unsupported location for sources: {location}")
 
 
-# code taken from mkdocstrings, credits to @oprypin
-class _IdPrependingTreeprocessor(Treeprocessor):
-    """Prepend the configured prefix to IDs of all HTML elements."""
-
-    name = "markdown_exec_ids"
-
-    def __init__(self, md: Markdown, id_prefix: str) -> None:
-        super().__init__(md)
-        self.id_prefix = id_prefix
-
-    def run(self, root: Element) -> None:
-        if not self.id_prefix:
-            return
-        for el in root.iter():
-            id_attr = el.get("id")
-            if id_attr:
-                el.set("id", self.id_prefix + id_attr)
-
-            href_attr = el.get("href")
-            if href_attr and href_attr.startswith("#"):
-                el.set("href", "#" + self.id_prefix + href_attr[1:])
-
-            name_attr = el.get("name")
-            if name_attr:
-                el.set("name", self.id_prefix + name_attr)
-
-            if el.tag == "label":
-                for_attr = el.get("for")
-                if for_attr:
-                    el.set("for", self.id_prefix + for_attr)
+@lru_cache(maxsize=None)
+def _register_headings_processors(md: Markdown) -> None:
+    md.treeprocessors.register(
+        InsertHeadings(md),
+        InsertHeadings.name,
+        priority=75,  # right before markdown.blockprocessors.HashHeaderProcessor
+    )
+    md.treeprocessors.register(
+        RemoveHeadings(md),
+        RemoveHeadings.name,
+        priority=4,  # right after toc
+    )
 
 
-def _mimic(md: Markdown) -> Markdown:
+def _mimic(md: Markdown, headings: list[Element], *, update_toc: bool = True) -> Markdown:
     md = getattr(md, "_original_md", md)
     new_md = Markdown()
     extensions = list(chain(md.registeredExtensions, ["tables", "md_in_html"]))
     new_md.registerExtensions(extensions, {})
     new_md.treeprocessors.register(
-        _IdPrependingTreeprocessor(md, ""),
-        _IdPrependingTreeprocessor.name,
-        priority=4,  # right after 'toc' (needed because that extension adds ids to headers)
+        IdPrependingTreeprocessor(md, ""),
+        IdPrependingTreeprocessor.name,
+        priority=4,  # right after 'toc' (needed because that extension adds ids to headings)
     )
     new_md._original_md = md  # type: ignore[attr-defined]
+
+    if update_toc:
+        _register_headings_processors(md)
+        new_md.treeprocessors.register(
+            HeadingReportingTreeprocessor(md, headings),
+            HeadingReportingTreeprocessor.name,
+            priority=1,  # Close to the end.
+        )
+
     return new_md
 
 
@@ -150,8 +148,10 @@ class MarkdownConverter:
 
     counter: int = 0
 
-    def __init__(self, md: Markdown) -> None:  # noqa: D107
+    def __init__(self, md: Markdown, *, update_toc: bool = True) -> None:  # noqa: D107
         self._md_ref: Markdown = md
+        self._headings: list[Element] = []
+        self._update_toc = update_toc
 
     def convert(self, text: str, stash: dict[str, str] | None = None) -> Markup:
         """Convert Markdown text to safe HTML.
@@ -163,19 +163,31 @@ def convert(self, text: str, stash: dict[str, str] | None = None) -> Markup:
         Returns:
             Safe HTML.
         """
-        md = _mimic(self._md_ref)
+        md = _mimic(self._md_ref, self._headings, update_toc=self._update_toc)
 
         # prepare for conversion
-        md.treeprocessors[_IdPrependingTreeprocessor.name].id_prefix = f"exec-{MarkdownConverter.counter}--"
+        md.treeprocessors[IdPrependingTreeprocessor.name].id_prefix = f"exec-{MarkdownConverter.counter}--"
         MarkdownConverter.counter += 1
 
         try:
             converted = md.convert(text)
         finally:
-            md.treeprocessors[_IdPrependingTreeprocessor.name].id_prefix = ""
+            md.treeprocessors[IdPrependingTreeprocessor.name].id_prefix = ""
 
         # restore html from stash
         for placeholder, stashed in (stash or {}).items():
             converted = converted.replace(placeholder, stashed)
 
-        return Markup(converted)
+        markup = Markup(converted)
+
+        # pass headings to upstream conversion layer
+        if self._update_toc:
+            self._md_ref.treeprocessors[InsertHeadings.name].headings[markup] = self.headings
+
+        return markup
+
+    @property
+    def headings(self) -> list[Element]:  # noqa: D102
+        headings = self._headings
+        self._headings = []
+        return headings