From 2b242494b0b407fa6d795fec3189359309511b98 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 16 Sep 2025 20:14:00 -0700 Subject: [PATCH] Add a new HTML rendering pipeline This rewrites the HTML rendering pipeline to use a tree data structure, and implements a custom HTML serializer. The intent is to make it easier to make changes and to manipulate the output. This should make some future changes much easier. This is a large change, but I'll try to briefly summarize what's changing: - All of the HTML rendering support has been moved out of mdbook-markdown into mdbook-html. For now, all of the API surface is private, though we may consider ways to safely expose it in the future. - Instead of using pulldown-cmark's html serializer, this takes the pulldown-cmark events and translates them into a tree data structure (using the ego-tree crate to define the tree). See `tree.rs`. - HTML in the markdown document is parsed using html5ever, and then lives inside the same tree data structure. See `tokenizer.rs`. - Transformations are then applied to the tree data structure. For example, adding header links or hiding code lines. - Serialization is a simple process of writing out the nodes to a string. See `serialize.rs`. - The search indexer works on the tree structure instead of re-rendering every chapter twice. See `html_handlebars/search.rs`. - The print page now takes a very different approach of taking the same tree structure built for rendering the chapters, and applies transformations to it. This avoid re-parsing everything again. See `print.rs`. - I changed the linking behavior so that links on the print page link to items on the print page instead of outside the print page. - There are a variety of small changes to how it serializes as can be seen in the changes to the tests. Some highlights: - Code blocks no longer have a second layer of `
` tags wrapping
      it.
    - Fixed a minor issue where a rust code block with a specific
      edition was having the wrong classes when there was a default
      edition.
- Drops the ammonia dependency, which significantly reduces the number
  of dependencies. It was only being used for a very minor task, and
  we can handle it much more easily now.
- Drops `pretty_assertions`, they are no longer used (mostly being
  migrated to the testsuite).

There's obviously a lot of risk trying to parse everything to such a low
level, but I think the benefits are worth it. Also, the API isn't super
ergonomic compared to say javascript (there are no selectors), but it
works well enough so far.

I have not run this through rigorous benchmarking, but it does have a
very noticeable performance improvement, especially in a debug build.

I expect in the future that we'll want to expose some kind of
integration with extensions so they have access to this tree structure
(or some kind of tree structure).

Closes https://github.com/rust-lang/mdBook/issues/1736
---
 .github/workflows/main.yml                    |    2 +-
 Cargo.lock                                    |  365 +-----
 Cargo.toml                                    |    6 +-
 crates/mdbook-core/src/config.rs              |    7 +
 crates/mdbook-core/src/utils/html.rs          |   78 ++
 crates/mdbook-core/src/utils/mod.rs           |  174 +--
 crates/mdbook-driver/src/load.rs              |    5 +-
 crates/mdbook-html/Cargo.toml                 |    7 +-
 crates/mdbook-html/src/html/hide_lines.rs     |  182 +++
 crates/mdbook-html/src/html/mod.rs            |  105 ++
 crates/mdbook-html/src/html/print.rs          |  217 ++++
 crates/mdbook-html/src/html/serialize.rs      |  112 ++
 crates/mdbook-html/src/html/tests.rs          |   53 +
 crates/mdbook-html/src/html/tokenizer.rs      |   83 ++
 crates/mdbook-html/src/html/tree.rs           | 1054 +++++++++++++++++
 .../src/html_handlebars/hbs_renderer.rs       |  744 +-----------
 .../src/html_handlebars/helpers/toc.rs        |   11 +-
 .../mdbook-html/src/html_handlebars/search.rs |  277 ++---
 crates/mdbook-html/src/lib.rs                 |   14 +-
 crates/mdbook-html/src/utils.rs               |  107 +-
 crates/mdbook-markdown/src/lib.rs             |  376 +-----
 crates/mdbook-markdown/src/tests.rs           |  153 ---
 guide/src/guide/installation.md               |    2 +-
 tests/testsuite/includes.rs                   |   16 +-
 tests/testsuite/markdown.rs                   |   36 +-
 .../basic_markdown/expected/blockquotes.html  |    7 +-
 .../basic_markdown/expected/code-blocks.html  |   14 +-
 .../basic_markdown/expected/html.html         |   16 +-
 .../basic_markdown/expected/images.html       |    4 +-
 .../basic_markdown/expected/inlines.html      |    6 +-
 .../basic_markdown/expected/lists.html        |   10 +-
 .../footnotes/expected/footnotes.html         |    3 +-
 tests/testsuite/playground.rs                 |    4 +-
 .../print/chapter_no_h1/expected/print.html   |   10 +-
 .../print/duplicate_ids/expected/print.html   |   17 +-
 .../print/relative_links/expected/print.html  |   24 +-
 tests/testsuite/rendering.rs                  |   40 +-
 .../expected/default-rust-edition.html        |   12 +-
 .../expected/editable-rust.html               |    8 +-
 .../rendering/fontawesome/expected/fa.html    |    2 +-
 .../hidelines/expected/hide-lines.html        |    4 +-
 tests/testsuite/search.rs                     |    2 +-
 .../reasonable_search_index/expected_index.js |    2 +-
 43 files changed, 2307 insertions(+), 2064 deletions(-)
 create mode 100644 crates/mdbook-core/src/utils/html.rs
 create mode 100644 crates/mdbook-html/src/html/hide_lines.rs
 create mode 100644 crates/mdbook-html/src/html/mod.rs
 create mode 100644 crates/mdbook-html/src/html/print.rs
 create mode 100644 crates/mdbook-html/src/html/serialize.rs
 create mode 100644 crates/mdbook-html/src/html/tests.rs
 create mode 100644 crates/mdbook-html/src/html/tokenizer.rs
 create mode 100644 crates/mdbook-html/src/html/tree.rs
 delete mode 100644 crates/mdbook-markdown/src/tests.rs

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 18b4b82ec2..ac21ae7eda 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -40,7 +40,7 @@ jobs:
           - name: msrv
             os: ubuntu-22.04
             # sync MSRV with docs: guide/src/guide/installation.md and Cargo.toml
-            rust: 1.85.0
+            rust: 1.88.0
             target: x86_64-unknown-linux-gnu
     name: ${{ matrix.name }}
     steps:
diff --git a/Cargo.lock b/Cargo.lock
index b32495c33f..e913aa3040 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -26,19 +26,6 @@ dependencies = [
  "memchr",
 ]
 
-[[package]]
-name = "ammonia"
-version = "4.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6b346764dd0814805de8abf899fe03065bcee69bb1a4771c785817e39f3978f"
-dependencies = [
- "cssparser",
- "html5ever 0.35.0",
- "maplit",
- "tendril",
- "url",
-]
-
 [[package]]
 name = "anstream"
 version = "0.6.19"
@@ -356,29 +343,6 @@ dependencies = [
  "typenum",
 ]
 
-[[package]]
-name = "cssparser"
-version = "0.35.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa"
-dependencies = [
- "cssparser-macros",
- "dtoa-short",
- "itoa",
- "phf 0.11.3",
- "smallvec",
-]
-
-[[package]]
-name = "cssparser-macros"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
-dependencies = [
- "quote",
- "syn 2.0.104",
-]
-
 [[package]]
 name = "darling"
 version = "0.20.11"
@@ -451,12 +415,6 @@ dependencies = [
  "syn 2.0.104",
 ]
 
-[[package]]
-name = "diff"
-version = "0.1.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -467,38 +425,18 @@ dependencies = [
  "crypto-common",
 ]
 
-[[package]]
-name = "displaydoc"
-version = "0.2.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.104",
-]
-
-[[package]]
-name = "dtoa"
-version = "1.0.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
-
-[[package]]
-name = "dtoa-short"
-version = "0.3.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
-dependencies = [
- "dtoa",
-]
-
 [[package]]
 name = "dunce"
 version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
 
+[[package]]
+name = "ego-tree"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
+
 [[package]]
 name = "elasticlunr-rs"
 version = "3.0.2"
@@ -855,119 +793,12 @@ dependencies = [
  "tower-service",
 ]
 
-[[package]]
-name = "icu_collections"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
-dependencies = [
- "displaydoc",
- "potential_utf",
- "yoke",
- "zerofrom",
- "zerovec",
-]
-
-[[package]]
-name = "icu_locale_core"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
-dependencies = [
- "displaydoc",
- "litemap",
- "tinystr",
- "writeable",
- "zerovec",
-]
-
-[[package]]
-name = "icu_normalizer"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
-dependencies = [
- "displaydoc",
- "icu_collections",
- "icu_normalizer_data",
- "icu_properties",
- "icu_provider",
- "smallvec",
- "zerovec",
-]
-
-[[package]]
-name = "icu_normalizer_data"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
-
-[[package]]
-name = "icu_properties"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
-dependencies = [
- "displaydoc",
- "icu_collections",
- "icu_locale_core",
- "icu_properties_data",
- "icu_provider",
- "potential_utf",
- "zerotrie",
- "zerovec",
-]
-
-[[package]]
-name = "icu_properties_data"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
-
-[[package]]
-name = "icu_provider"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
-dependencies = [
- "displaydoc",
- "icu_locale_core",
- "stable_deref_trait",
- "tinystr",
- "writeable",
- "yoke",
- "zerofrom",
- "zerotrie",
- "zerovec",
-]
-
 [[package]]
 name = "ident_case"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
 
-[[package]]
-name = "idna"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
-dependencies = [
- "idna_adapter",
- "smallvec",
- "utf8_iter",
-]
-
-[[package]]
-name = "idna_adapter"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
-dependencies = [
- "icu_normalizer",
- "icu_properties",
-]
-
 [[package]]
 name = "ignore"
 version = "0.4.23"
@@ -1086,12 +917,6 @@ version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
 
-[[package]]
-name = "litemap"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
-
 [[package]]
 name = "lock_api"
 version = "0.4.13"
@@ -1114,12 +939,6 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
 
-[[package]]
-name = "maplit"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
-
 [[package]]
 name = "markup5ever"
 version = "0.11.0"
@@ -1262,16 +1081,17 @@ dependencies = [
 name = "mdbook-html"
 version = "0.5.0-alpha.1"
 dependencies = [
- "ammonia",
  "anyhow",
+ "ego-tree",
  "elasticlunr-rs",
  "font-awesome-as-a-crate",
  "handlebars",
  "hex",
+ "html5ever 0.35.0",
+ "indexmap",
  "mdbook-core",
  "mdbook-markdown",
  "mdbook-renderer",
- "pretty_assertions",
  "pulldown-cmark",
  "regex",
  "serde",
@@ -1583,7 +1403,6 @@ version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
 dependencies = [
- "phf_macros",
  "phf_shared 0.11.3",
 ]
 
@@ -1627,19 +1446,6 @@ dependencies = [
  "rand 0.8.5",
 ]
 
-[[package]]
-name = "phf_macros"
-version = "0.11.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
-dependencies = [
- "phf_generator 0.11.3",
- "phf_shared 0.11.3",
- "proc-macro2",
- "quote",
- "syn 2.0.104",
-]
-
 [[package]]
 name = "phf_shared"
 version = "0.10.0"
@@ -1670,15 +1476,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
-[[package]]
-name = "potential_utf"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
-dependencies = [
- "zerovec",
-]
-
 [[package]]
 name = "ppv-lite86"
 version = "0.2.21"
@@ -1694,16 +1491,6 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
 
-[[package]]
-name = "pretty_assertions"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
-dependencies = [
- "diff",
- "yansi",
-]
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.95"
@@ -2086,12 +1873,6 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
-[[package]]
-name = "stable_deref_trait"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
-
 [[package]]
 name = "string_cache"
 version = "0.8.9"
@@ -2151,17 +1932,6 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
 
-[[package]]
-name = "synstructure"
-version = "0.13.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.104",
-]
-
 [[package]]
 name = "tempfile"
 version = "3.20.0"
@@ -2225,16 +1995,6 @@ dependencies = [
  "cfg-if",
 ]
 
-[[package]]
-name = "tinystr"
-version = "0.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
-dependencies = [
- "displaydoc",
- "zerovec",
-]
-
 [[package]]
 name = "tokio"
 version = "1.46.1"
@@ -2497,17 +2257,6 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
 
-[[package]]
-name = "url"
-version = "2.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
-dependencies = [
- "form_urlencoded",
- "idna",
- "percent-encoding",
-]
-
 [[package]]
 name = "utf-8"
 version = "0.7.6"
@@ -2520,12 +2269,6 @@ version = "0.1.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
 
-[[package]]
-name = "utf8_iter"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
-
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -2760,12 +2503,6 @@ dependencies = [
  "bitflags 2.9.1",
 ]
 
-[[package]]
-name = "writeable"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
-
 [[package]]
 name = "xml5ever"
 version = "0.17.0"
@@ -2777,36 +2514,6 @@ dependencies = [
  "markup5ever 0.11.0",
 ]
 
-[[package]]
-name = "yansi"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
-
-[[package]]
-name = "yoke"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
-dependencies = [
- "serde",
- "stable_deref_trait",
- "yoke-derive",
- "zerofrom",
-]
-
-[[package]]
-name = "yoke-derive"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.104",
- "synstructure",
-]
-
 [[package]]
 name = "zerocopy"
 version = "0.8.26"
@@ -2826,57 +2533,3 @@ dependencies = [
  "quote",
  "syn 2.0.104",
 ]
-
-[[package]]
-name = "zerofrom"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
-dependencies = [
- "zerofrom-derive",
-]
-
-[[package]]
-name = "zerofrom-derive"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.104",
- "synstructure",
-]
-
-[[package]]
-name = "zerotrie"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
-dependencies = [
- "displaydoc",
- "yoke",
- "zerofrom",
-]
-
-[[package]]
-name = "zerovec"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
-dependencies = [
- "yoke",
- "zerofrom",
- "zerovec-derive",
-]
-
-[[package]]
-name = "zerovec-derive"
-version = "0.11.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.104",
-]
diff --git a/Cargo.toml b/Cargo.toml
index d36bc3be70..4804376fd3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,20 +22,21 @@ unreachable_pub = "warn"
 edition = "2024"
 license = "MPL-2.0"
 repository = "https://github.com/rust-lang/mdBook"
-rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflows/main.yml
+rust-version = "1.88.0" # Keep in sync with installation.md and .github/workflows/main.yml
 
 [workspace.dependencies]
-ammonia = "4.1.1"
 anyhow = "1.0.98"
 axum = "0.8.4"
 clap = { version = "4.5.41", features = ["cargo", "wrap_help"] }
 clap_complete = "4.5.55"
+ego-tree = "0.10.0"
 elasticlunr-rs = "3.0.2"
 font-awesome-as-a-crate = "0.3.0"
 futures-util = "0.3.31"
 glob = "0.3.3"
 handlebars = "6.3.2"
 hex = "0.4.3"
+html5ever = "0.35.0"
 indexmap = "2.10.0"
 ignore = "0.4.23"
 mdbook-core = { path = "crates/mdbook-core" }
@@ -50,7 +51,6 @@ notify = "8.1.0"
 notify-debouncer-mini = "0.6.0"
 opener = "0.8.2"
 pathdiff = "0.2.3"
-pretty_assertions = "1.4.1"
 pulldown-cmark = { version = "0.13.0", default-features = false, features = ["html"] } # Do not update, part of the public api.
 regex = "1.11.1"
 select = "0.6.1"
diff --git a/crates/mdbook-core/src/config.rs b/crates/mdbook-core/src/config.rs
index 98ca169dd5..a452702254 100644
--- a/crates/mdbook-core/src/config.rs
+++ b/crates/mdbook-core/src/config.rs
@@ -1135,4 +1135,11 @@ mod tests {
         assert!(html_config.print.enable);
         assert!(!html_config.print.page_break);
     }
+
+    #[test]
+    fn test_json_direction() {
+        use serde_json::json;
+        assert_eq!(json!(TextDirection::RightToLeft), json!("rtl"));
+        assert_eq!(json!(TextDirection::LeftToRight), json!("ltr"));
+    }
 }
diff --git a/crates/mdbook-core/src/utils/html.rs b/crates/mdbook-core/src/utils/html.rs
new file mode 100644
index 0000000000..0bbeffb5aa
--- /dev/null
+++ b/crates/mdbook-core/src/utils/html.rs
@@ -0,0 +1,78 @@
+//! Utilities for dealing with HTML.
+
+use std::borrow::Cow;
+
+/// Escape characters to make it safe for an HTML string.
+pub fn escape_html_attribute(text: &str) -> Cow<'_, str> {
+    let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&'];
+    let mut s = text;
+    let mut output = String::new();
+    while let Some(next) = s.find(needs_escape) {
+        output.push_str(&s[..next]);
+        match s.as_bytes()[next] {
+            b'<' => output.push_str("<"),
+            b'>' => output.push_str(">"),
+            b'\'' => output.push_str("'"),
+            b'"' => output.push_str("""),
+            b'\\' => output.push_str("\"),
+            b'&' => output.push_str("&"),
+            _ => unreachable!(),
+        }
+        s = &s[next + 1..];
+    }
+    if output.is_empty() {
+        Cow::Borrowed(text)
+    } else {
+        output.push_str(s);
+        Cow::Owned(output)
+    }
+}
+
+/// Escape `<`, `>`, and '&' for HTML.
+pub fn escape_html(text: &str) -> Cow<'_, str> {
+    let needs_escape: &[char] = &['<', '>', '&'];
+    let mut s = text;
+    let mut output = String::new();
+    while let Some(next) = s.find(needs_escape) {
+        output.push_str(&s[..next]);
+        match s.as_bytes()[next] {
+            b'<' => output.push_str("<"),
+            b'>' => output.push_str(">"),
+            b'&' => output.push_str("&"),
+            _ => unreachable!(),
+        }
+        s = &s[next + 1..];
+    }
+    if output.is_empty() {
+        Cow::Borrowed(text)
+    } else {
+        output.push_str(s);
+        Cow::Owned(output)
+    }
+}
+
+#[test]
+fn attributes_are_escaped() {
+    assert_eq!(escape_html_attribute(""), "");
+    assert_eq!(escape_html_attribute("<"), "<");
+    assert_eq!(escape_html_attribute(">"), ">");
+    assert_eq!(escape_html_attribute("<>"), "<>");
+    assert_eq!(escape_html_attribute(""), "<test>");
+    assert_eq!(escape_html_attribute("ab"), "a<test>b");
+    assert_eq!(escape_html_attribute("'"), "'");
+    assert_eq!(escape_html_attribute("\\"), "\");
+    assert_eq!(escape_html_attribute("&"), "&");
+}
+
+#[test]
+fn html_is_escaped() {
+    assert_eq!(escape_html(""), "");
+    assert_eq!(escape_html("<"), "<");
+    assert_eq!(escape_html(">"), ">");
+    assert_eq!(escape_html("&"), "&");
+    assert_eq!(escape_html("<>"), "<>");
+    assert_eq!(escape_html(""), "<test>");
+    assert_eq!(escape_html("ab"), "a<test>b");
+    assert_eq!(escape_html("'"), "'");
+    assert_eq!(escape_html("\\"), "\\");
+}
diff --git a/crates/mdbook-core/src/utils/mod.rs b/crates/mdbook-core/src/utils/mod.rs
index a6ab262726..5e78d5e3f0 100644
--- a/crates/mdbook-core/src/utils/mod.rs
+++ b/crates/mdbook-core/src/utils/mod.rs
@@ -1,17 +1,17 @@
 //! Various helpers and utilities.
 
 use anyhow::Error;
-use std::borrow::Cow;
-use std::collections::HashMap;
 use std::fmt::Write;
 use tracing::error;
 
 pub mod fs;
+mod html;
 mod string;
 mod toml_ext;
 
 pub(crate) use self::toml_ext::TomlExt;
 
+pub use self::html::{escape_html, escape_html_attribute};
 pub use self::string::{
     take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines,
     take_rustdoc_include_lines,
@@ -30,65 +30,6 @@ macro_rules! static_regex {
     };
 }
 
-/// Replaces multiple consecutive whitespace characters with a single space character.
-pub fn collapse_whitespace(text: &str) -> Cow<'_, str> {
-    static_regex!(WS, r"\s\s+");
-    WS.replace_all(text, " ")
-}
-
-/// Convert the given string to a valid HTML element ID.
-/// The only restriction is that the ID must not contain any ASCII whitespace.
-pub fn normalize_id(content: &str) -> String {
-    content
-        .chars()
-        .filter_map(|ch| {
-            if ch.is_alphanumeric() || ch == '_' || ch == '-' {
-                Some(ch.to_ascii_lowercase())
-            } else if ch.is_whitespace() {
-                Some('-')
-            } else {
-                None
-            }
-        })
-        .collect::()
-}
-
-/// Generate an ID for use with anchors which is derived from a "normalised"
-/// string.
-fn id_from_content(content: &str) -> String {
-    let mut content = content.to_string();
-
-    // Skip any tags or html-encoded stuff
-    static_regex!(HTML, r"(<.*?>)");
-    content = HTML.replace_all(&content, "").into();
-    const REPL_SUB: &[&str] = &["<", ">", "&", "'", """];
-    for sub in REPL_SUB {
-        content = content.replace(sub, "");
-    }
-
-    // Remove spaces and hashes indicating a header
-    let trimmed = content.trim().trim_start_matches('#').trim();
-    normalize_id(trimmed)
-}
-
-/// Generate an ID for use with anchors which is derived from a "normalised"
-/// string.
-///
-/// Each ID returned will be unique, if the same `id_counter` is provided on
-/// each call.
-pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap) -> String {
-    let id = id_from_content(content);
-
-    // If we have headers with the same normalized id, append an incrementing counter
-    let id_count = id_counter.entry(id.clone()).or_insert(0);
-    let unique_id = match *id_count {
-        0 => id,
-        id_count => format!("{id}-{id_count}"),
-    };
-    *id_count += 1;
-    unique_id
-}
-
 /// Prints a "backtrace" of some `Error`.
 pub fn log_backtrace(e: &Error) {
     let mut message = format!("{e}");
@@ -99,114 +40,3 @@ pub fn log_backtrace(e: &Error) {
 
     error!("{message}");
 }
-
-/// Escape `<` and `>` for HTML.
-pub fn bracket_escape(mut s: &str) -> String {
-    let mut escaped = String::with_capacity(s.len());
-    let needs_escape: &[char] = &['<', '>'];
-    while let Some(next) = s.find(needs_escape) {
-        escaped.push_str(&s[..next]);
-        match s.as_bytes()[next] {
-            b'<' => escaped.push_str("<"),
-            b'>' => escaped.push_str(">"),
-            _ => unreachable!(),
-        }
-        s = &s[next + 1..];
-    }
-    escaped.push_str(s);
-    escaped
-}
-
-#[cfg(test)]
-mod tests {
-    use super::bracket_escape;
-
-    #[allow(deprecated)]
-    mod id_from_content {
-        use super::super::id_from_content;
-
-        #[test]
-        fn it_generates_anchors() {
-            assert_eq!(
-                id_from_content("## Method-call expressions"),
-                "method-call-expressions"
-            );
-            assert_eq!(id_from_content("## **Bold** title"), "bold-title");
-            assert_eq!(id_from_content("## `Code` title"), "code-title");
-            assert_eq!(
-                id_from_content("## title foo"),
-                "title-foo"
-            );
-        }
-
-        #[test]
-        fn it_generates_anchors_from_non_ascii_initial() {
-            assert_eq!(
-                id_from_content("## `--passes`: add more rustdoc passes"),
-                "--passes-add-more-rustdoc-passes"
-            );
-            assert_eq!(
-                id_from_content("## 中文標題 CJK title"),
-                "中文標題-cjk-title"
-            );
-            assert_eq!(id_from_content("## Über"), "Über");
-        }
-    }
-
-    mod html_munging {
-        use super::super::{normalize_id, unique_id_from_content};
-
-        #[test]
-        fn it_normalizes_ids() {
-            assert_eq!(
-                normalize_id("`--passes`: add more rustdoc passes"),
-                "--passes-add-more-rustdoc-passes"
-            );
-            assert_eq!(
-                normalize_id("Method-call 🐙 expressions \u{1f47c}"),
-                "method-call--expressions-"
-            );
-            assert_eq!(normalize_id("_-_12345"), "_-_12345");
-            assert_eq!(normalize_id("12345"), "12345");
-            assert_eq!(normalize_id("中文"), "中文");
-            assert_eq!(normalize_id("にほんご"), "にほんご");
-            assert_eq!(normalize_id("한국어"), "한국어");
-            assert_eq!(normalize_id(""), "");
-        }
-
-        #[test]
-        fn it_generates_unique_ids_from_content() {
-            // Same id if not given shared state
-            assert_eq!(
-                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
-                "中文標題-cjk-title"
-            );
-            assert_eq!(
-                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
-                "中文標題-cjk-title"
-            );
-
-            // Different id if given shared state
-            let mut id_counter = Default::default();
-            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
-            assert_eq!(
-                unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
-                "中文標題-cjk-title"
-            );
-            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
-            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
-        }
-    }
-
-    #[test]
-    fn escaped_brackets() {
-        assert_eq!(bracket_escape(""), "");
-        assert_eq!(bracket_escape("<"), "<");
-        assert_eq!(bracket_escape(">"), ">");
-        assert_eq!(bracket_escape("<>"), "<>");
-        assert_eq!(bracket_escape(""), "<test>");
-        assert_eq!(bracket_escape("ab"), "a<test>b");
-        assert_eq!(bracket_escape("'"), "'");
-        assert_eq!(bracket_escape("\\"), "\\");
-    }
-}
diff --git a/crates/mdbook-driver/src/load.rs b/crates/mdbook-driver/src/load.rs
index ab7b13b49e..9a69579cea 100644
--- a/crates/mdbook-driver/src/load.rs
+++ b/crates/mdbook-driver/src/load.rs
@@ -1,7 +1,7 @@
 use anyhow::{Context, Result};
 use mdbook_core::book::{Book, BookItem, Chapter};
 use mdbook_core::config::BuildConfig;
-use mdbook_core::utils::bracket_escape;
+use mdbook_core::utils::escape_html;
 use mdbook_summary::{Link, Summary, SummaryItem, parse_summary};
 use std::fs::{self, File};
 use std::io::{Read, Write};
@@ -51,7 +51,8 @@ fn create_missing(src_dir: &Path, summary: &Summary) -> Result<()> {
                     let mut f = File::create(&filename).with_context(|| {
                         format!("Unable to create missing file: {}", filename.display())
                     })?;
-                    writeln!(f, "# {}", bracket_escape(&link.name))?;
+                    let title = escape_html(&link.name);
+                    writeln!(f, "# {title}")?;
                 }
             }
 
diff --git a/crates/mdbook-html/Cargo.toml b/crates/mdbook-html/Cargo.toml
index 0b5e5fcb1a..fef26bbfa9 100644
--- a/crates/mdbook-html/Cargo.toml
+++ b/crates/mdbook-html/Cargo.toml
@@ -8,12 +8,14 @@ repository.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-ammonia = { workspace = true, optional = true }
 anyhow.workspace = true
+ego-tree.workspace = true
 elasticlunr-rs = { workspace = true, optional = true }
 font-awesome-as-a-crate.workspace = true
 handlebars.workspace = true
 hex.workspace = true
+html5ever.workspace = true
+indexmap.workspace = true
 mdbook-core.workspace = true
 mdbook-markdown.workspace = true
 mdbook-renderer.workspace = true
@@ -25,7 +27,6 @@ sha2.workspace = true
 tracing.workspace = true
 
 [dev-dependencies]
-pretty_assertions.workspace = true
 tempfile.workspace = true
 toml.workspace = true
 
@@ -33,4 +34,4 @@ toml.workspace = true
 workspace = true
 
 [features]
-search = ["dep:ammonia", "dep:elasticlunr-rs"]
+search = ["dep:elasticlunr-rs"]
diff --git a/crates/mdbook-html/src/html/hide_lines.rs b/crates/mdbook-html/src/html/hide_lines.rs
new file mode 100644
index 0000000000..6852b6c4c5
--- /dev/null
+++ b/crates/mdbook-html/src/html/hide_lines.rs
@@ -0,0 +1,182 @@
+//! Support for hiding code lines.
+
+use crate::html::{Element, Node};
+use ego_tree::{NodeId, Tree};
+use html5ever::tendril::StrTendril;
+use mdbook_core::static_regex;
+use std::collections::HashMap;
+
+/// Wraps hidden lines in a `` for the given code block.
+pub(crate) fn hide_lines(
+    tree: &mut Tree,
+    code_id: NodeId,
+    hidelines: &HashMap,
+) {
+    let mut node = tree.get_mut(code_id).unwrap();
+    let el = node.value().as_element().unwrap();
+
+    let classes: Vec<_> = el.attr("class").unwrap_or_default().split(' ').collect();
+    let language = classes
+        .iter()
+        .filter_map(|cls| cls.strip_prefix("language-"))
+        .next()
+        .unwrap_or_default()
+        .to_string();
+    let hideline_info = classes
+        .iter()
+        .filter_map(|cls| cls.strip_prefix("hidelines="))
+        .map(|prefix| prefix.to_string())
+        .next();
+
+    if let Some(mut child) = node.first_child()
+        && let Node::Text(text) = child.value()
+    {
+        if language == "rust" {
+            let new_nodes = hide_lines_rust(text);
+            child.detach();
+            let root = tree.extend_tree(new_nodes);
+            let root_id = root.id();
+            let mut node = tree.get_mut(code_id).unwrap();
+            node.reparent_from_id_append(root_id);
+        } else {
+            // Use the prefix from the code block, else the prefix from config.
+            let hidelines_prefix = hideline_info
+                .as_deref()
+                .or_else(|| hidelines.get(&language).map(|p| p.as_str()));
+            if let Some(prefix) = hidelines_prefix {
+                let new_nodes = hide_lines_with_prefix(text, prefix);
+                child.detach();
+                let root = tree.extend_tree(new_nodes);
+                let root_id = root.id();
+                let mut node = tree.get_mut(code_id).unwrap();
+                node.reparent_from_id_append(root_id);
+            }
+        }
+    }
+}
+
+/// Wraps hidden lines in a `` specifically for Rust code blocks.
+fn hide_lines_rust(text: &StrTendril) -> Tree {
+    static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$");
+
+    let mut tree = Tree::new(Node::Fragment);
+    let mut root = tree.root_mut();
+    let mut lines = text.lines().peekable();
+    while let Some(line) = lines.next() {
+        // Don't include newline on the last line.
+        let newline = if lines.peek().is_none() { "" } else { "\n" };
+        if let Some(caps) = BORING_LINES_REGEX.captures(line) {
+            if &caps[2] == "#" {
+                root.append(Node::Text(
+                    format!("{}{}{}{newline}", &caps[1], &caps[2], &caps[3]).into(),
+                ));
+                continue;
+            } else if matches!(&caps[2], "" | " ") {
+                let mut span = Element::new("span");
+                span.insert_attr("class", "boring".into());
+                let mut span = root.append(Node::Element(span));
+                span.append(Node::Text(
+                    format!("{}{}{newline}", &caps[1], &caps[3]).into(),
+                ));
+                continue;
+            }
+        }
+        root.append(Node::Text(format!("{line}{newline}").into()));
+    }
+    tree
+}
+
+/// Wraps hidden lines in a `` tag for lines starting with the given prefix.
+fn hide_lines_with_prefix(content: &str, prefix: &str) -> Tree {
+    let mut tree = Tree::new(Node::Fragment);
+    let mut root = tree.root_mut();
+    for line in content.lines() {
+        if line.trim_start().starts_with(prefix) {
+            let pos = line.find(prefix).unwrap();
+            let (ws, rest) = (&line[..pos], &line[pos + prefix.len()..]);
+            let mut span = Element::new("span");
+            span.insert_attr("class", "boring".into());
+            let mut span = root.append(Node::Element(span));
+            span.append(Node::Text(format!("{ws}{rest}\n").into()));
+        } else {
+            root.append(Node::Text(format!("{line}\n").into()));
+        }
+    }
+    tree
+}
+
+/// If this code text is missing an `fn main`, the wrap it with `fn main` in a
+/// fashion similar to rustdoc, with the wrapper hidden.
+pub(crate) fn wrap_rust_main(text: &str) -> Option {
+    if !text.contains("fn main") && !text.contains("quick_main!") {
+        let (attrs, code) = partition_rust_source(text);
+        let newline = if code.is_empty() || code.ends_with('\n') {
+            ""
+        } else {
+            "\n"
+        };
+        Some(format!(
+            "# #![allow(unused)]\n{attrs}# fn main() {{\n{code}{newline}# }}"
+        ))
+    } else {
+        None
+    }
+}
+
+/// Splits Rust inner attributes from the given source string.
+///
+/// Returns `(inner_attrs, rest_of_code)`.
+fn partition_rust_source(s: &str) -> (&str, &str) {
+    static_regex!(
+        HEADER_RE,
+        r"^(?mx)
+        (
+            (?:
+                ^[ \t]*\#!\[.* (?:\r?\n)?
+                |
+                ^\s* (?:\r?\n)?
+            )*
+        )"
+    );
+    let split_idx = match HEADER_RE.captures(s) {
+        Some(caps) => {
+            let attributes = &caps[1];
+            attributes.len()
+        }
+        None => 0,
+    };
+    s.split_at(split_idx)
+}
+
+#[test]
+fn it_partitions_rust_source() {
+    assert_eq!(partition_rust_source(""), ("", ""));
+    assert_eq!(partition_rust_source("let x = 1;"), ("", "let x = 1;"));
+    assert_eq!(
+        partition_rust_source("fn main()\n{ let x = 1; }\n"),
+        ("", "fn main()\n{ let x = 1; }\n")
+    );
+    assert_eq!(
+        partition_rust_source("#![allow(foo)]"),
+        ("#![allow(foo)]", "")
+    );
+    assert_eq!(
+        partition_rust_source("#![allow(foo)]\n"),
+        ("#![allow(foo)]\n", "")
+    );
+    assert_eq!(
+        partition_rust_source("#![allow(foo)]\nlet x = 1;"),
+        ("#![allow(foo)]\n", "let x = 1;")
+    );
+    assert_eq!(
+        partition_rust_source(
+            "\n\
+        #![allow(foo)]\n\
+        \n\
+        #![allow(bar)]\n\
+        \n\
+        let x = 1;"
+        ),
+        ("\n#![allow(foo)]\n\n#![allow(bar)]\n\n", "let x = 1;")
+    );
+}
diff --git a/crates/mdbook-html/src/html/mod.rs b/crates/mdbook-html/src/html/mod.rs
new file mode 100644
index 0000000000..65f28fa102
--- /dev/null
+++ b/crates/mdbook-html/src/html/mod.rs
@@ -0,0 +1,105 @@
+//! HTML rendering support.
+//!
+//! This module's primary entry point is [`render_markdown`] which will take
+//! markdown text and render it to HTML. In summary, the general procedure of
+//! that function is:
+//!
+//! 1. Use [`pulldown_cmark`] to parse the markdown and generate events.
+//! 2. [`tree`] converts those events to a tree data structure.
+//!      1. Parse HTML inside the markdown using [`tokenizer`].
+//!      2. Apply various transformations to the tree data structure, such as adding header links.
+//! 3. Serialize the tree to HTML in [`serialize()`].
+
+use ego_tree::Tree;
+use mdbook_core::book::{Book, Chapter};
+use mdbook_core::config::{HtmlConfig, RustEdition};
+use mdbook_markdown::{MarkdownOptions, new_cmark_parser};
+use std::path::{Path, PathBuf};
+
+mod hide_lines;
+mod print;
+mod serialize;
+#[cfg(test)]
+mod tests;
+mod tokenizer;
+mod tree;
+
+pub(crate) use hide_lines::{hide_lines, wrap_rust_main};
+pub(crate) use print::render_print_page;
+pub(crate) use serialize::serialize;
+pub(crate) use tree::{Element, Node};
+
+/// Options for converting a single chapter's markdown to HTML.
+pub(crate) struct HtmlRenderOptions<'a> {
+    /// Options for parsing markdown.
+    pub markdown_options: MarkdownOptions,
+    /// The chapter's location, relative to the `SUMMARY.md` file.
+    pub path: &'a Path,
+    /// The default Rust edition, used to set the proper class on the code blocks.
+    pub edition: Option,
+    /// The [`HtmlConfig`], whose options affect how the HTML is generated.
+    pub config: &'a HtmlConfig,
+}
+
+impl<'a> HtmlRenderOptions<'a> {
+    /// Creates a new [`HtmlRenderOptions`].
+    pub(crate) fn new(
+        path: &'a Path,
+        config: &'a HtmlConfig,
+        edition: Option,
+    ) -> HtmlRenderOptions<'a> {
+        let mut markdown_options = MarkdownOptions::default();
+        markdown_options.smart_punctuation = config.smart_punctuation;
+        HtmlRenderOptions {
+            markdown_options,
+            path,
+            edition,
+            config,
+        }
+    }
+}
+
+/// Renders markdown to HTML.
+pub(crate) fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String {
+    let tree = build_tree(text, options);
+    let mut output = String::new();
+    serialize::serialize(&tree, &mut output);
+    output
+}
+
+/// Renders markdown to a [`Tree`].
+fn build_tree(text: &str, options: &HtmlRenderOptions<'_>) -> Tree {
+    let events = new_cmark_parser(text, &options.markdown_options);
+    tree::MarkdownTreeBuilder::build(options, events)
+}
+
+/// The parsed chapter, and some information about the chapter.
+pub(crate) struct ChapterTree<'book> {
+    pub(crate) chapter: &'book Chapter,
+    /// The path to the chapter relative to the root with the `.html` extension.
+    pub(crate) html_path: PathBuf,
+    /// The chapter tree.
+    pub(crate) tree: Tree,
+}
+
+/// Creates all of the [`ChapterTree`]s for the book.
+pub(crate) fn build_trees<'book>(
+    book: &'book Book,
+    html_config: &HtmlConfig,
+    edition: Option,
+) -> Vec> {
+    book.chapters()
+        .map(|ch| {
+            let path = ch.path.as_ref().unwrap();
+            let html_path = ch.path.as_ref().unwrap().with_extension("html");
+            let options = HtmlRenderOptions::new(path, html_config, edition);
+            let tree = build_tree(&ch.content, &options);
+
+            ChapterTree {
+                chapter: ch,
+                html_path,
+                tree,
+            }
+        })
+        .collect()
+}
diff --git a/crates/mdbook-html/src/html/print.rs b/crates/mdbook-html/src/html/print.rs
new file mode 100644
index 0000000000..5996ef2b19
--- /dev/null
+++ b/crates/mdbook-html/src/html/print.rs
@@ -0,0 +1,217 @@
+//! Support for generating the print page.
+//!
+//! The print page takes all the individual chapters (as `Tree`
+//! elements) and modifies the chapters so that they work on a consolidated
+//! print page, and then serializes it all as one HTML page.
+
+use super::Node;
+use crate::html::{ChapterTree, Element, serialize};
+use crate::utils::{ToUrlPath, id_from_content, normalize_path, unique_id};
+use mdbook_core::static_regex;
+use std::collections::HashMap;
+use std::path::{Component, PathBuf};
+
+/// Takes all the chapter trees, modifies them to be suitable to render for
+/// the print page, and returns an string of all the chapters rendered to a
+/// single HTML page.
+pub(crate) fn render_print_page(mut chapter_trees: Vec>) -> String {
+    let (id_remap, mut id_counter) = make_ids_unique(&mut chapter_trees);
+    let path_to_root_id = make_root_id_map(&mut chapter_trees, &mut id_counter);
+    rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id);
+
+    let mut print_content = String::new();
+    for ChapterTree { tree, .. } in chapter_trees {
+        if !print_content.is_empty() {
+            // Add page break between chapters
+            // See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
+            // Add both two CSS properties because of the compatibility issue
+            print_content
+                .push_str(r#"
"#); + } + serialize(&tree, &mut print_content); + } + print_content +} + +/// Make all IDs unique, and create a map from old to new IDs. +/// +/// The first map is a map of the chapter path to the IDs that were rewritten +/// in that chapter (old ID to new ID). +/// +/// The second map is a map of every ID seen to the number of times it has +/// been seen. This is used to generate unique IDs. +fn make_ids_unique( + chapter_trees: &mut [ChapterTree<'_>], +) -> ( + HashMap>, + HashMap, +) { + let mut id_remap = HashMap::new(); + let mut id_counter = HashMap::new(); + for ChapterTree { + html_path, tree, .. + } in chapter_trees + { + for value in tree.values_mut() { + if let Node::Element(el) = value + && let Some(id) = el.attr("id") + { + let new_id = unique_id(id, &mut id_counter); + if new_id != id { + let id = id.to_string(); + el.insert_attr("id", new_id.clone().into()); + + let map: &mut HashMap<_, _> = id_remap.entry(html_path.clone()).or_default(); + map.insert(id, new_id); + } + } + } + } + (id_remap, id_counter) +} + +/// Generates a map of a chapter path to the ID of the top of the chapter. +/// +/// If a chapter is missing an `h1` tag, then one is synthesized so that the +/// print output has something to link to. +fn make_root_id_map( + chapter_trees: &mut [ChapterTree<'_>], + id_counter: &mut HashMap, +) -> HashMap { + let mut path_to_root_id = HashMap::new(); + for ChapterTree { + chapter, + html_path, + tree, + .. + } in chapter_trees + { + let mut h1_found = false; + for value in tree.values_mut() { + if let Node::Element(el) = value { + if el.name() == "h1" { + if let Some(id) = el.attr("id") { + h1_found = true; + path_to_root_id.insert(html_path.clone(), id.to_string()); + } + break; + } else if matches!(el.name(), "h2" | "h3" | "h4" | "h5" | "h6") { + // h1 not found. + break; + } + } + } + if !h1_found { + // Synthesize a root id to be able to link to the start of the page. + // TODO: This might want to be a warning? Chapters generally + // should start with an h1. + let mut h1 = Element::new("h1"); + let id = id_from_content(&chapter.name); + let id = unique_id(&id, id_counter); + h1.insert_attr("id", id.clone().into()); + let mut root = tree.root_mut(); + let mut h1 = root.prepend(Node::Element(h1)); + let mut a = Element::new("a"); + a.insert_attr("href", format!("#{id}").into()); + a.insert_attr("class", "header".into()); + let mut a = h1.append(Node::Element(a)); + a.append(Node::Text(chapter.name.clone().into())); + path_to_root_id.insert(html_path.clone(), id); + } + } + + path_to_root_id +} + +/// Rewrite links so that they point to IDs on the print page. +fn rewrite_links( + chapter_trees: &mut [ChapterTree<'_>], + id_remap: &HashMap>, + path_to_root_id: &HashMap, +) { + static_regex!( + LINK, + r"(?x) + (?P^[a-z][a-z0-9+.-]*:)? + (?P[^\#]+)? + (?:\#(?P.*))?" + ); + + // Rewrite path links to go to the appropriate place. + for ChapterTree { + html_path, tree, .. + } in chapter_trees + { + let base = html_path.parent().expect("path can't be empty"); + + for value in tree.values_mut() { + let Node::Element(el) = value else { + continue; + }; + if !matches!(el.name(), "a" | "img") { + continue; + } + for attr in ["href", "src", "xlink:href"] { + let Some(dest) = el.attr(attr) else { + continue; + }; + let Some(caps) = LINK.captures(&dest) else { + continue; + }; + if caps.name("scheme").is_some() { + continue; + } + // The lookup_key is the key to look up in the remap table. + let mut lookup_key = html_path.clone(); + if let Some(href_path) = caps.name("path") + && let href_path = href_path.as_str() + && !href_path.is_empty() + { + lookup_key.pop(); + lookup_key.push(href_path); + let normalized = normalize_path(&lookup_key); + // If this points outside of the book, don't modify it. + let is_outside = matches!( + normalized.components().next(), + Some(Component::ParentDir | Component::RootDir) + ); + if is_outside || !href_path.ends_with(".html") { + // Make the link relative to the print page location. + let mut rel_path = normalize_path(&base.join(href_path)).to_url_path(); + if let Some(anchor) = caps.name("anchor") { + rel_path.push('#'); + rel_path.push_str(anchor.as_str()); + } + el.insert_attr(attr, rel_path.into()); + continue; + } + } + + let lookup_key = normalize_path(&lookup_key); + + let anchor = caps.name("anchor"); + let id = match anchor { + Some(anchor_id) => { + let anchor_id = anchor_id.as_str().to_string(); + match id_remap.get(&lookup_key) { + Some(id_map) => match id_map.get(&anchor_id) { + Some(new_id) => new_id.clone(), + None => anchor_id, + }, + None => { + // Assume the anchor goes to some non-remapped + // ID that already exists. + anchor_id + } + } + } + None => match path_to_root_id.get(&lookup_key) { + Some(id) => id.to_string(), + None => continue, + }, + }; + el.insert_attr(attr, format!("#{id}").into()); + } + } + } +} diff --git a/crates/mdbook-html/src/html/serialize.rs b/crates/mdbook-html/src/html/serialize.rs new file mode 100644 index 0000000000..3917585465 --- /dev/null +++ b/crates/mdbook-html/src/html/serialize.rs @@ -0,0 +1,112 @@ +//! Serializes the [`Node`] tree to an HTML string. + +use super::tree::is_void_element; +use super::tree::{Element, Node}; +use ego_tree::{Tree, iter::Edge}; +use html5ever::{local_name, ns}; +use mdbook_core::utils::{escape_html, escape_html_attribute}; +use std::ops::Deref; + +/// Serializes the given tree of [`Node`] elements to an HTML string. +pub(crate) fn serialize(tree: &Tree, output: &mut String) { + for edge in tree.root().traverse() { + match edge { + Edge::Open(node) => match node.value() { + Node::Element(el) => serialize_start(el, output), + Node::Text(text) => { + output.push_str(&escape_html(text)); + } + Node::Comment(comment) => { + output.push_str(""); + } + Node::Fragment => {} + Node::RawData(html) => { + output.push_str(html); + } + }, + Edge::Close(node) => { + if let Node::Element(el) = node.value() { + serialize_end(el, output); + } + } + } + } +} + +/// Returns true if this HTML element wants a newline to keep the emitted +/// output more readable. +fn wants_pretty_html_newline(name: &str) -> bool { + matches!(name, |"blockquote"| "dd" + | "div" + | "dl" + | "dt" + | "h1" + | "h2" + | "h3" + | "h4" + | "h5" + | "h6" + | "hr" + | "li" + | "ol" + | "p" + | "pre" + | "table" + | "tbody" + | "thead" + | "tr" + | "ul") +} + +/// Emit the start tag of an element. +fn serialize_start(el: &Element, output: &mut String) { + let el_name = el.name(); + if wants_pretty_html_newline(el_name) { + if !output.is_empty() { + if !output.ends_with('\n') { + output.push('\n'); + } + } + } + output.push('<'); + output.push_str(el_name); + for (attr_name, value) in &el.attrs { + output.push(' '); + match attr_name.ns { + ns!() => (), + ns!(xml) => output.push_str("xml:"), + ns!(xmlns) => { + if el.name.local != local_name!("xmlns") { + output.push_str("xmlns:"); + } + } + ns!(xlink) => output.push_str("xlink:"), + _ => (), // TODO what should it do here? + } + output.push_str(attr_name.local.deref()); + output.push_str("=\""); + output.push_str(&escape_html_attribute(&value)); + output.push('"'); + } + if el.self_closing { + output.push_str(" /"); + } + output.push('>'); +} + +/// Emit the end tag of an element. +fn serialize_end(el: &Element, output: &mut String) { + // Void elements do not have an end tag. + if el.self_closing || is_void_element(el.name()) { + return; + } + let name = el.name(); + output.push_str("'); + if wants_pretty_html_newline(name) { + output.push('\n'); + } +} diff --git a/crates/mdbook-html/src/html/tests.rs b/crates/mdbook-html/src/html/tests.rs new file mode 100644 index 0000000000..58825c9c81 --- /dev/null +++ b/crates/mdbook-html/src/html/tests.rs @@ -0,0 +1,53 @@ +use crate::html::tokenizer::parse_html; +use html5ever::tokenizer::{Tag, TagKind, Token}; + +// Basic tokenizer behavior of a script. +#[test] +fn parse_html_script() { + let script = r#" +if (3 < 5 > 10) +{ + alert("The sky is falling!"); +} +"#; + let t = format!(""); + let ts = parse_html(&t); + eprintln!("{ts:#?}",); + let mut output = String::new(); + let mut in_script = false; + for t in ts { + match t { + Token::ParseError(e) => panic!("{e:?}"), + Token::CharacterTokens(s) => { + if in_script { + output.push_str(&s) + } + } + Token::TagToken(Tag { + kind: TagKind::StartTag, + .. + }) => in_script = true, + Token::TagToken(Tag { + kind: TagKind::EndTag, + .. + }) => in_script = false, + _ => {} + } + } + assert_eq!(output, script); +} + +// What happens if a script doesn't end. +#[test] +fn parse_html_script_unclosed() { + let t = r#"`. The ` - +