Skip to content

Commit

Permalink
feat: Update ToC with generated headings
Browse files Browse the repository at this point in the history
  • Loading branch information
pawamoy committed Apr 17, 2023
1 parent 5fb5a30 commit 5ea2263
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 42 deletions.
2 changes: 1 addition & 1 deletion docs/gallery.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,6 @@ In this example, we inspect the `argparse` parser to build better-looking
Markdown/HTML contents. We simply use the description and iterate on options,
but more complex stuff is possible of course.

```python exec="true" source="tabbed-right" title="CLI help using argparse parser"
```python exec="true" source="tabbed-right" updatetoc="no" title="CLI help using argparse parser"
--8<-- "gallery/argparse.py"
```
17 changes: 17 additions & 0 deletions docs/usage/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@ System information:
```
````

## Generated headings in Table of Contents

If you are using Python Markdown's `toc` extension,
or writing docs with MkDocs, you will notice that the headings
you generated by executing a code block appear in the table of contents.
If you don't want those headings to appear in the ToC, you can use
the `updatetoc="no"` boolean option:

````md
```python exec="1" updatetoc="no"
print("# XL heading\n")
print("## L heading\n")
print("### M heading\n")
print("#### S heading\n")
```
````

## Render the source code as well

It's possible to render both the result of the executed code block
Expand Down
2 changes: 2 additions & 0 deletions src/markdown_exec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def validator(
result_value = inputs.pop("result", "")
returncode_value = int(inputs.pop("returncode", "0"))
session_value = inputs.pop("session", "")
update_toc_value = _to_bool(inputs.pop("updatetoc", "yes"))
tabs_value = inputs.pop("tabs", "|".join(default_tabs))
tabs = tuple(_tabs_re.split(tabs_value, maxsplit=1))
options["id"] = id_value
Expand All @@ -78,6 +79,7 @@ def validator(
options["result"] = result_value
options["returncode"] = returncode_value
options["session"] = session_value
options["update_toc"] = update_toc_value
options["tabs"] = tabs
options["extra"] = inputs
return True
Expand Down
5 changes: 4 additions & 1 deletion src/markdown_exec/formatters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def base_format(
returncode: int = 0,
transform_source: Callable[[str], tuple[str, str]] | None = None,
session: str | None = None,
update_toc: bool = True,
**options: Any,
) -> Markup:
"""Execute code and return HTML.
Expand All @@ -72,12 +73,14 @@ def base_format(
The input source is the one that is ran, the output source is the one that is
rendered (when the source option is enabled).
session: A session name, to persist state between executed code blocks.
update_toc: Whether to include generated headings
into the Markdown table of contents (toc extension).
**options: Additional options passed from the formatter.
Returns:
HTML contents.
"""
markdown = MarkdownConverter(md)
markdown = MarkdownConverter(md, update_toc=update_toc)
extra = options.get("extra", {})

if transform_source:
Expand Down
118 changes: 118 additions & 0 deletions src/markdown_exec/processors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""This module contains a Markdown extension allowing to integrate generated headings into the ToC."""

from __future__ import annotations

import copy
import re
from typing import TYPE_CHECKING
from xml.etree.ElementTree import Element

from markdown.treeprocessors import Treeprocessor
from markdown.util import HTML_PLACEHOLDER_RE

if TYPE_CHECKING:
from markdown import Markdown
from markupsafe import Markup


# code taken from mkdocstrings, credits to @oprypin
class IdPrependingTreeprocessor(Treeprocessor):
"""Prepend the configured prefix to IDs of all HTML elements."""

name = "markdown_exec_ids"

def __init__(self, md: Markdown, id_prefix: str) -> None: # noqa: D107
super().__init__(md)
self.id_prefix = id_prefix

def run(self, root: Element) -> None: # noqa: D102
if not self.id_prefix:
return
for el in root.iter():
id_attr = el.get("id")
if id_attr:
el.set("id", self.id_prefix + id_attr)

href_attr = el.get("href")
if href_attr and href_attr.startswith("#"):
el.set("href", "#" + self.id_prefix + href_attr[1:])

name_attr = el.get("name")
if name_attr:
el.set("name", self.id_prefix + name_attr)

if el.tag == "label":
for_attr = el.get("for")
if for_attr:
el.set("for", self.id_prefix + for_attr)


# code taken from mkdocstrings, credits to @oprypin
class HeadingReportingTreeprocessor(Treeprocessor):
"""Records the heading elements encountered in the document."""

name = "mkdocstrings_headings_list"
regex = re.compile("[Hh][1-6]")

def __init__(self, md: Markdown, headings: list[Element]): # noqa: D107
super().__init__(md)
self.headings = headings

def run(self, root: Element) -> None: # noqa: D102
for el in root.iter():
if self.regex.fullmatch(el.tag):
el = copy.copy(el) # noqa: PLW2901
# 'toc' extension's first pass (which we require to build heading stubs/ids) also edits the HTML.
# Undo the permalink edit so we can pass this heading to the outer pass of the 'toc' extension.
if len(el) > 0 and el[-1].get("class") == self.md.treeprocessors["toc"].permalink_class:
del el[-1]
self.headings.append(el)


class InsertHeadings(Treeprocessor):
"""Our headings insertor."""

name = "markdown_exec_insert_headings"

def __init__(self, md: Markdown):
"""Initialize the object.
Arguments:
md: A `markdown.Markdown` instance.
"""
super().__init__(md)
self.headings: dict[Markup, list[Element]] = {}

def run(self, root: Element) -> None: # noqa: D102 (ignore missing docstring)
if not self.headings:
return

for el in root.iter():
match = HTML_PLACEHOLDER_RE.match(el.text or "")
if match:
counter = int(match.group(1))
markup: Markup = self.md.htmlStash.rawHtmlBlocks[counter] # type: ignore[assignment]
if markup in self.headings:
div = Element("div", {"class": "markdown-exec"})
div.extend(self.headings[markup])
el.append(div)


class RemoveHeadings(Treeprocessor):
"""Our headings remover."""

name = "markdown_exec_remove_headings"

def run(self, root: Element) -> None: # noqa: D102
carry_text = ""
for el in reversed(root): # Reversed mainly for the ability to mutate during iteration.
for subel in reversed(el):
if subel.tag == "div" and subel.get("class") == "markdown-exec":
# Delete the duplicated headings along with their container, but keep the text (i.e. the actual HTML).
carry_text = (subel.text or "") + carry_text
el.remove(subel)
elif carry_text:
subel.tail = (subel.tail or "") + carry_text
carry_text = ""
if carry_text:
el.text = (el.text or "") + carry_text
92 changes: 52 additions & 40 deletions src/markdown_exec/rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@

from __future__ import annotations

from functools import lru_cache
from itertools import chain
from textwrap import indent
from typing import TYPE_CHECKING

from markdown import Markdown
from markdown.treeprocessors import Treeprocessor
from markupsafe import Markup

from markdown_exec.processors import (
HeadingReportingTreeprocessor,
IdPrependingTreeprocessor,
InsertHeadings,
RemoveHeadings,
)

if TYPE_CHECKING:
from xml.etree.ElementTree import Element

Expand Down Expand Up @@ -99,49 +106,40 @@ def add_source(
raise ValueError(f"unsupported location for sources: {location}")


# code taken from mkdocstrings, credits to @oprypin
class _IdPrependingTreeprocessor(Treeprocessor):
"""Prepend the configured prefix to IDs of all HTML elements."""

name = "markdown_exec_ids"

def __init__(self, md: Markdown, id_prefix: str) -> None:
super().__init__(md)
self.id_prefix = id_prefix

def run(self, root: Element) -> None:
if not self.id_prefix:
return
for el in root.iter():
id_attr = el.get("id")
if id_attr:
el.set("id", self.id_prefix + id_attr)

href_attr = el.get("href")
if href_attr and href_attr.startswith("#"):
el.set("href", "#" + self.id_prefix + href_attr[1:])

name_attr = el.get("name")
if name_attr:
el.set("name", self.id_prefix + name_attr)

if el.tag == "label":
for_attr = el.get("for")
if for_attr:
el.set("for", self.id_prefix + for_attr)
@lru_cache(maxsize=None)
def _register_headings_processors(md: Markdown) -> None:
md.treeprocessors.register(
InsertHeadings(md),
InsertHeadings.name,
priority=75, # right before markdown.blockprocessors.HashHeaderProcessor
)
md.treeprocessors.register(
RemoveHeadings(md),
RemoveHeadings.name,
priority=4, # right after toc
)


def _mimic(md: Markdown) -> Markdown:
def _mimic(md: Markdown, headings: list[Element], *, update_toc: bool = True) -> Markdown:
md = getattr(md, "_original_md", md)
new_md = Markdown()
extensions = list(chain(md.registeredExtensions, ["tables", "md_in_html"]))
new_md.registerExtensions(extensions, {})
new_md.treeprocessors.register(
_IdPrependingTreeprocessor(md, ""),
_IdPrependingTreeprocessor.name,
priority=4, # right after 'toc' (needed because that extension adds ids to headers)
IdPrependingTreeprocessor(md, ""),
IdPrependingTreeprocessor.name,
priority=4, # right after 'toc' (needed because that extension adds ids to headings)
)
new_md._original_md = md # type: ignore[attr-defined]

if update_toc:
_register_headings_processors(md)
new_md.treeprocessors.register(
HeadingReportingTreeprocessor(md, headings),
HeadingReportingTreeprocessor.name,
priority=1, # Close to the end.
)

return new_md


Expand All @@ -150,8 +148,10 @@ class MarkdownConverter:

counter: int = 0

def __init__(self, md: Markdown) -> None: # noqa: D107
def __init__(self, md: Markdown, *, update_toc: bool = True) -> None: # noqa: D107
self._md_ref: Markdown = md
self._headings: list[Element] = []
self._update_toc = update_toc

def convert(self, text: str, stash: dict[str, str] | None = None) -> Markup:
"""Convert Markdown text to safe HTML.
Expand All @@ -163,19 +163,31 @@ def convert(self, text: str, stash: dict[str, str] | None = None) -> Markup:
Returns:
Safe HTML.
"""
md = _mimic(self._md_ref)
md = _mimic(self._md_ref, self._headings, update_toc=self._update_toc)

# prepare for conversion
md.treeprocessors[_IdPrependingTreeprocessor.name].id_prefix = f"exec-{MarkdownConverter.counter}--"
md.treeprocessors[IdPrependingTreeprocessor.name].id_prefix = f"exec-{MarkdownConverter.counter}--"
MarkdownConverter.counter += 1

try:
converted = md.convert(text)
finally:
md.treeprocessors[_IdPrependingTreeprocessor.name].id_prefix = ""
md.treeprocessors[IdPrependingTreeprocessor.name].id_prefix = ""

# restore html from stash
for placeholder, stashed in (stash or {}).items():
converted = converted.replace(placeholder, stashed)

return Markup(converted)
markup = Markup(converted)

# pass headings to upstream conversion layer
if self._update_toc:
self._md_ref.treeprocessors[InsertHeadings.name].headings[markup] = self.headings

return markup

@property
def headings(self) -> list[Element]: # noqa: D102
headings = self._headings
self._headings = []
return headings
Loading

0 comments on commit 5ea2263

Please sign in to comment.