diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 18b4b82ec2..ac21ae7eda 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -40,7 +40,7 @@ jobs: - name: msrv os: ubuntu-22.04 # sync MSRV with docs: guide/src/guide/installation.md and Cargo.toml - rust: 1.85.0 + rust: 1.88.0 target: x86_64-unknown-linux-gnu name: ${{ matrix.name }} steps: diff --git a/Cargo.lock b/Cargo.lock index b32495c33f..e913aa3040 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,19 +26,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ammonia" -version = "4.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6b346764dd0814805de8abf899fe03065bcee69bb1a4771c785817e39f3978f" -dependencies = [ - "cssparser", - "html5ever 0.35.0", - "maplit", - "tendril", - "url", -] - [[package]] name = "anstream" version = "0.6.19" @@ -356,29 +343,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "cssparser" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e901edd733a1472f944a45116df3f846f54d37e67e68640ac8bb69689aca2aa" -dependencies = [ - "cssparser-macros", - "dtoa-short", - "itoa", - "phf 0.11.3", - "smallvec", -] - -[[package]] -name = "cssparser-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" -dependencies = [ - "quote", - "syn 2.0.104", -] - [[package]] name = "darling" version = "0.20.11" @@ -451,12 +415,6 @@ dependencies = [ "syn 2.0.104", ] -[[package]] -name = "diff" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" - [[package]] name = "digest" version = "0.10.7" @@ -467,38 +425,18 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - -[[package]] -name = "dtoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" - -[[package]] -name = "dtoa-short" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" -dependencies = [ - "dtoa", -] - [[package]] name = "dunce" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "elasticlunr-rs" version = "3.0.2" @@ -855,119 +793,12 @@ dependencies = [ "tower-service", ] -[[package]] -name = "icu_collections" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" - -[[package]] -name = "icu_properties" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "potential_utf", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" - -[[package]] -name = "icu_provider" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" -dependencies = [ - "displaydoc", - "icu_locale_core", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - [[package]] name = "ignore" version = "0.4.23" @@ -1086,12 +917,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" -[[package]] -name = "litemap" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" - [[package]] name = "lock_api" version = "0.4.13" @@ -1114,12 +939,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - [[package]] name = "markup5ever" version = "0.11.0" @@ -1262,16 +1081,17 @@ dependencies = [ name = "mdbook-html" version = "0.5.0-alpha.1" dependencies = [ - "ammonia", "anyhow", + "ego-tree", "elasticlunr-rs", "font-awesome-as-a-crate", "handlebars", "hex", + "html5ever 0.35.0", + "indexmap", "mdbook-core", "mdbook-markdown", "mdbook-renderer", - "pretty_assertions", "pulldown-cmark", "regex", "serde", @@ -1583,7 +1403,6 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_macros", "phf_shared 0.11.3", ] @@ -1627,19 +1446,6 @@ dependencies = [ "rand 0.8.5", ] -[[package]] -name = "phf_macros" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" -dependencies = [ - "phf_generator 0.11.3", - "phf_shared 0.11.3", - "proc-macro2", - "quote", - "syn 2.0.104", -] - [[package]] name = "phf_shared" version = "0.10.0" @@ -1670,15 +1476,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "potential_utf" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" -dependencies = [ - "zerovec", -] - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1694,16 +1491,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" -[[package]] -name = "pretty_assertions" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" -dependencies = [ - "diff", - "yansi", -] - [[package]] name = "proc-macro2" version = "1.0.95" @@ -2086,12 +1873,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "string_cache" version = "0.8.9" @@ -2151,17 +1932,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - [[package]] name = "tempfile" version = "3.20.0" @@ -2225,16 +1995,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "tinystr" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" -dependencies = [ - "displaydoc", - "zerovec", -] - [[package]] name = "tokio" version = "1.46.1" @@ -2497,17 +2257,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" -[[package]] -name = "url" -version = "2.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - [[package]] name = "utf-8" version = "0.7.6" @@ -2520,12 +2269,6 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - [[package]] name = "utf8parse" version = "0.2.2" @@ -2760,12 +2503,6 @@ dependencies = [ "bitflags 2.9.1", ] -[[package]] -name = "writeable" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" - [[package]] name = "xml5ever" version = "0.17.0" @@ -2777,36 +2514,6 @@ dependencies = [ "markup5ever 0.11.0", ] -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" - -[[package]] -name = "yoke" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", - "synstructure", -] - [[package]] name = "zerocopy" version = "0.8.26" @@ -2826,57 +2533,3 @@ dependencies = [ "quote", "syn 2.0.104", ] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", - "synstructure", -] - -[[package]] -name = "zerotrie" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] diff --git a/Cargo.toml b/Cargo.toml index d36bc3be70..4804376fd3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,20 +22,21 @@ unreachable_pub = "warn" edition = "2024" license = "MPL-2.0" repository = "https://github.com/rust-lang/mdBook" -rust-version = "1.85.0" # Keep in sync with installation.md and .github/workflows/main.yml +rust-version = "1.88.0" # Keep in sync with installation.md and .github/workflows/main.yml [workspace.dependencies] -ammonia = "4.1.1" anyhow = "1.0.98" axum = "0.8.4" clap = { version = "4.5.41", features = ["cargo", "wrap_help"] } clap_complete = "4.5.55" +ego-tree = "0.10.0" elasticlunr-rs = "3.0.2" font-awesome-as-a-crate = "0.3.0" futures-util = "0.3.31" glob = "0.3.3" handlebars = "6.3.2" hex = "0.4.3" +html5ever = "0.35.0" indexmap = "2.10.0" ignore = "0.4.23" mdbook-core = { path = "crates/mdbook-core" } @@ -50,7 +51,6 @@ notify = "8.1.0" notify-debouncer-mini = "0.6.0" opener = "0.8.2" pathdiff = "0.2.3" -pretty_assertions = "1.4.1" pulldown-cmark = { version = "0.13.0", default-features = false, features = ["html"] } # Do not update, part of the public api. regex = "1.11.1" select = "0.6.1" diff --git a/crates/mdbook-core/src/config.rs b/crates/mdbook-core/src/config.rs index 98ca169dd5..a452702254 100644 --- a/crates/mdbook-core/src/config.rs +++ b/crates/mdbook-core/src/config.rs @@ -1135,4 +1135,11 @@ mod tests { assert!(html_config.print.enable); assert!(!html_config.print.page_break); } + + #[test] + fn test_json_direction() { + use serde_json::json; + assert_eq!(json!(TextDirection::RightToLeft), json!("rtl")); + assert_eq!(json!(TextDirection::LeftToRight), json!("ltr")); + } } diff --git a/crates/mdbook-core/src/utils/html.rs b/crates/mdbook-core/src/utils/html.rs new file mode 100644 index 0000000000..0bbeffb5aa --- /dev/null +++ b/crates/mdbook-core/src/utils/html.rs @@ -0,0 +1,78 @@ +//! Utilities for dealing with HTML. + +use std::borrow::Cow; + +/// Escape characters to make it safe for an HTML string. +pub fn escape_html_attribute(text: &str) -> Cow<'_, str> { + let needs_escape: &[char] = &['<', '>', '\'', '"', '\\', '&']; + let mut s = text; + let mut output = String::new(); + while let Some(next) = s.find(needs_escape) { + output.push_str(&s[..next]); + match s.as_bytes()[next] { + b'<' => output.push_str("<"), + b'>' => output.push_str(">"), + b'\'' => output.push_str("'"), + b'"' => output.push_str("""), + b'\\' => output.push_str("\"), + b'&' => output.push_str("&"), + _ => unreachable!(), + } + s = &s[next + 1..]; + } + if output.is_empty() { + Cow::Borrowed(text) + } else { + output.push_str(s); + Cow::Owned(output) + } +} + +/// Escape `<`, `>`, and '&' for HTML. +pub fn escape_html(text: &str) -> Cow<'_, str> { + let needs_escape: &[char] = &['<', '>', '&']; + let mut s = text; + let mut output = String::new(); + while let Some(next) = s.find(needs_escape) { + output.push_str(&s[..next]); + match s.as_bytes()[next] { + b'<' => output.push_str("<"), + b'>' => output.push_str(">"), + b'&' => output.push_str("&"), + _ => unreachable!(), + } + s = &s[next + 1..]; + } + if output.is_empty() { + Cow::Borrowed(text) + } else { + output.push_str(s); + Cow::Owned(output) + } +} + +#[test] +fn attributes_are_escaped() { + assert_eq!(escape_html_attribute(""), ""); + assert_eq!(escape_html_attribute("<"), "<"); + assert_eq!(escape_html_attribute(">"), ">"); + assert_eq!(escape_html_attribute("<>"), "<>"); + assert_eq!(escape_html_attribute(""), "<test>"); + assert_eq!(escape_html_attribute("ab"), "a<test>b"); + assert_eq!(escape_html_attribute("'"), "'"); + assert_eq!(escape_html_attribute("\\"), "\"); + assert_eq!(escape_html_attribute("&"), "&"); +} + +#[test] +fn html_is_escaped() { + assert_eq!(escape_html(""), ""); + assert_eq!(escape_html("<"), "<"); + assert_eq!(escape_html(">"), ">"); + assert_eq!(escape_html("&"), "&"); + assert_eq!(escape_html("<>"), "<>"); + assert_eq!(escape_html(""), "<test>"); + assert_eq!(escape_html("ab"), "a<test>b"); + assert_eq!(escape_html("'"), "'"); + assert_eq!(escape_html("\\"), "\\"); +} diff --git a/crates/mdbook-core/src/utils/mod.rs b/crates/mdbook-core/src/utils/mod.rs index a6ab262726..5e78d5e3f0 100644 --- a/crates/mdbook-core/src/utils/mod.rs +++ b/crates/mdbook-core/src/utils/mod.rs @@ -1,17 +1,17 @@ //! Various helpers and utilities. use anyhow::Error; -use std::borrow::Cow; -use std::collections::HashMap; use std::fmt::Write; use tracing::error; pub mod fs; +mod html; mod string; mod toml_ext; pub(crate) use self::toml_ext::TomlExt; +pub use self::html::{escape_html, escape_html_attribute}; pub use self::string::{ take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, take_rustdoc_include_lines, @@ -30,65 +30,6 @@ macro_rules! static_regex { }; } -/// Replaces multiple consecutive whitespace characters with a single space character. -pub fn collapse_whitespace(text: &str) -> Cow<'_, str> { - static_regex!(WS, r"\s\s+"); - WS.replace_all(text, " ") -} - -/// Convert the given string to a valid HTML element ID. -/// The only restriction is that the ID must not contain any ASCII whitespace. -pub fn normalize_id(content: &str) -> String { - content - .chars() - .filter_map(|ch| { - if ch.is_alphanumeric() || ch == '_' || ch == '-' { - Some(ch.to_ascii_lowercase()) - } else if ch.is_whitespace() { - Some('-') - } else { - None - } - }) - .collect::() -} - -/// Generate an ID for use with anchors which is derived from a "normalised" -/// string. -fn id_from_content(content: &str) -> String { - let mut content = content.to_string(); - - // Skip any tags or html-encoded stuff - static_regex!(HTML, r"(<.*?>)"); - content = HTML.replace_all(&content, "").into(); - const REPL_SUB: &[&str] = &["<", ">", "&", "'", """]; - for sub in REPL_SUB { - content = content.replace(sub, ""); - } - - // Remove spaces and hashes indicating a header - let trimmed = content.trim().trim_start_matches('#').trim(); - normalize_id(trimmed) -} - -/// Generate an ID for use with anchors which is derived from a "normalised" -/// string. -/// -/// Each ID returned will be unique, if the same `id_counter` is provided on -/// each call. -pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap) -> String { - let id = id_from_content(content); - - // If we have headers with the same normalized id, append an incrementing counter - let id_count = id_counter.entry(id.clone()).or_insert(0); - let unique_id = match *id_count { - 0 => id, - id_count => format!("{id}-{id_count}"), - }; - *id_count += 1; - unique_id -} - /// Prints a "backtrace" of some `Error`. pub fn log_backtrace(e: &Error) { let mut message = format!("{e}"); @@ -99,114 +40,3 @@ pub fn log_backtrace(e: &Error) { error!("{message}"); } - -/// Escape `<` and `>` for HTML. -pub fn bracket_escape(mut s: &str) -> String { - let mut escaped = String::with_capacity(s.len()); - let needs_escape: &[char] = &['<', '>']; - while let Some(next) = s.find(needs_escape) { - escaped.push_str(&s[..next]); - match s.as_bytes()[next] { - b'<' => escaped.push_str("<"), - b'>' => escaped.push_str(">"), - _ => unreachable!(), - } - s = &s[next + 1..]; - } - escaped.push_str(s); - escaped -} - -#[cfg(test)] -mod tests { - use super::bracket_escape; - - #[allow(deprecated)] - mod id_from_content { - use super::super::id_from_content; - - #[test] - fn it_generates_anchors() { - assert_eq!( - id_from_content("## Method-call expressions"), - "method-call-expressions" - ); - assert_eq!(id_from_content("## **Bold** title"), "bold-title"); - assert_eq!(id_from_content("## `Code` title"), "code-title"); - assert_eq!( - id_from_content("## title foo"), - "title-foo" - ); - } - - #[test] - fn it_generates_anchors_from_non_ascii_initial() { - assert_eq!( - id_from_content("## `--passes`: add more rustdoc passes"), - "--passes-add-more-rustdoc-passes" - ); - assert_eq!( - id_from_content("## 中文標題 CJK title"), - "中文標題-cjk-title" - ); - assert_eq!(id_from_content("## Über"), "Über"); - } - } - - mod html_munging { - use super::super::{normalize_id, unique_id_from_content}; - - #[test] - fn it_normalizes_ids() { - assert_eq!( - normalize_id("`--passes`: add more rustdoc passes"), - "--passes-add-more-rustdoc-passes" - ); - assert_eq!( - normalize_id("Method-call 🐙 expressions \u{1f47c}"), - "method-call--expressions-" - ); - assert_eq!(normalize_id("_-_12345"), "_-_12345"); - assert_eq!(normalize_id("12345"), "12345"); - assert_eq!(normalize_id("中文"), "中文"); - assert_eq!(normalize_id("にほんご"), "にほんご"); - assert_eq!(normalize_id("한국어"), "한국어"); - assert_eq!(normalize_id(""), ""); - } - - #[test] - fn it_generates_unique_ids_from_content() { - // Same id if not given shared state - assert_eq!( - unique_id_from_content("## 中文標題 CJK title", &mut Default::default()), - "中文標題-cjk-title" - ); - assert_eq!( - unique_id_from_content("## 中文標題 CJK title", &mut Default::default()), - "中文標題-cjk-title" - ); - - // Different id if given shared state - let mut id_counter = Default::default(); - assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über"); - assert_eq!( - unique_id_from_content("## 中文標題 CJK title", &mut id_counter), - "中文標題-cjk-title" - ); - assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1"); - assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2"); - } - } - - #[test] - fn escaped_brackets() { - assert_eq!(bracket_escape(""), ""); - assert_eq!(bracket_escape("<"), "<"); - assert_eq!(bracket_escape(">"), ">"); - assert_eq!(bracket_escape("<>"), "<>"); - assert_eq!(bracket_escape(""), "<test>"); - assert_eq!(bracket_escape("ab"), "a<test>b"); - assert_eq!(bracket_escape("'"), "'"); - assert_eq!(bracket_escape("\\"), "\\"); - } -} diff --git a/crates/mdbook-driver/src/load.rs b/crates/mdbook-driver/src/load.rs index ab7b13b49e..9a69579cea 100644 --- a/crates/mdbook-driver/src/load.rs +++ b/crates/mdbook-driver/src/load.rs @@ -1,7 +1,7 @@ use anyhow::{Context, Result}; use mdbook_core::book::{Book, BookItem, Chapter}; use mdbook_core::config::BuildConfig; -use mdbook_core::utils::bracket_escape; +use mdbook_core::utils::escape_html; use mdbook_summary::{Link, Summary, SummaryItem, parse_summary}; use std::fs::{self, File}; use std::io::{Read, Write}; @@ -51,7 +51,8 @@ fn create_missing(src_dir: &Path, summary: &Summary) -> Result<()> { let mut f = File::create(&filename).with_context(|| { format!("Unable to create missing file: {}", filename.display()) })?; - writeln!(f, "# {}", bracket_escape(&link.name))?; + let title = escape_html(&link.name); + writeln!(f, "# {title}")?; } } diff --git a/crates/mdbook-html/Cargo.toml b/crates/mdbook-html/Cargo.toml index 0b5e5fcb1a..fef26bbfa9 100644 --- a/crates/mdbook-html/Cargo.toml +++ b/crates/mdbook-html/Cargo.toml @@ -8,12 +8,14 @@ repository.workspace = true rust-version.workspace = true [dependencies] -ammonia = { workspace = true, optional = true } anyhow.workspace = true +ego-tree.workspace = true elasticlunr-rs = { workspace = true, optional = true } font-awesome-as-a-crate.workspace = true handlebars.workspace = true hex.workspace = true +html5ever.workspace = true +indexmap.workspace = true mdbook-core.workspace = true mdbook-markdown.workspace = true mdbook-renderer.workspace = true @@ -25,7 +27,6 @@ sha2.workspace = true tracing.workspace = true [dev-dependencies] -pretty_assertions.workspace = true tempfile.workspace = true toml.workspace = true @@ -33,4 +34,4 @@ toml.workspace = true workspace = true [features] -search = ["dep:ammonia", "dep:elasticlunr-rs"] +search = ["dep:elasticlunr-rs"] diff --git a/crates/mdbook-html/src/html/hide_lines.rs b/crates/mdbook-html/src/html/hide_lines.rs new file mode 100644 index 0000000000..6852b6c4c5 --- /dev/null +++ b/crates/mdbook-html/src/html/hide_lines.rs @@ -0,0 +1,182 @@ +//! Support for hiding code lines. + +use crate::html::{Element, Node}; +use ego_tree::{NodeId, Tree}; +use html5ever::tendril::StrTendril; +use mdbook_core::static_regex; +use std::collections::HashMap; + +/// Wraps hidden lines in a `` for the given code block. +pub(crate) fn hide_lines( + tree: &mut Tree, + code_id: NodeId, + hidelines: &HashMap, +) { + let mut node = tree.get_mut(code_id).unwrap(); + let el = node.value().as_element().unwrap(); + + let classes: Vec<_> = el.attr("class").unwrap_or_default().split(' ').collect(); + let language = classes + .iter() + .filter_map(|cls| cls.strip_prefix("language-")) + .next() + .unwrap_or_default() + .to_string(); + let hideline_info = classes + .iter() + .filter_map(|cls| cls.strip_prefix("hidelines=")) + .map(|prefix| prefix.to_string()) + .next(); + + if let Some(mut child) = node.first_child() + && let Node::Text(text) = child.value() + { + if language == "rust" { + let new_nodes = hide_lines_rust(text); + child.detach(); + let root = tree.extend_tree(new_nodes); + let root_id = root.id(); + let mut node = tree.get_mut(code_id).unwrap(); + node.reparent_from_id_append(root_id); + } else { + // Use the prefix from the code block, else the prefix from config. + let hidelines_prefix = hideline_info + .as_deref() + .or_else(|| hidelines.get(&language).map(|p| p.as_str())); + if let Some(prefix) = hidelines_prefix { + let new_nodes = hide_lines_with_prefix(text, prefix); + child.detach(); + let root = tree.extend_tree(new_nodes); + let root_id = root.id(); + let mut node = tree.get_mut(code_id).unwrap(); + node.reparent_from_id_append(root_id); + } + } + } +} + +/// Wraps hidden lines in a `` specifically for Rust code blocks. +fn hide_lines_rust(text: &StrTendril) -> Tree { + static_regex!(BORING_LINES_REGEX, r"^(\s*)#(.?)(.*)$"); + + let mut tree = Tree::new(Node::Fragment); + let mut root = tree.root_mut(); + let mut lines = text.lines().peekable(); + while let Some(line) = lines.next() { + // Don't include newline on the last line. + let newline = if lines.peek().is_none() { "" } else { "\n" }; + if let Some(caps) = BORING_LINES_REGEX.captures(line) { + if &caps[2] == "#" { + root.append(Node::Text( + format!("{}{}{}{newline}", &caps[1], &caps[2], &caps[3]).into(), + )); + continue; + } else if matches!(&caps[2], "" | " ") { + let mut span = Element::new("span"); + span.insert_attr("class", "boring".into()); + let mut span = root.append(Node::Element(span)); + span.append(Node::Text( + format!("{}{}{newline}", &caps[1], &caps[3]).into(), + )); + continue; + } + } + root.append(Node::Text(format!("{line}{newline}").into())); + } + tree +} + +/// Wraps hidden lines in a `` tag for lines starting with the given prefix. +fn hide_lines_with_prefix(content: &str, prefix: &str) -> Tree { + let mut tree = Tree::new(Node::Fragment); + let mut root = tree.root_mut(); + for line in content.lines() { + if line.trim_start().starts_with(prefix) { + let pos = line.find(prefix).unwrap(); + let (ws, rest) = (&line[..pos], &line[pos + prefix.len()..]); + let mut span = Element::new("span"); + span.insert_attr("class", "boring".into()); + let mut span = root.append(Node::Element(span)); + span.append(Node::Text(format!("{ws}{rest}\n").into())); + } else { + root.append(Node::Text(format!("{line}\n").into())); + } + } + tree +} + +/// If this code text is missing an `fn main`, the wrap it with `fn main` in a +/// fashion similar to rustdoc, with the wrapper hidden. +pub(crate) fn wrap_rust_main(text: &str) -> Option { + if !text.contains("fn main") && !text.contains("quick_main!") { + let (attrs, code) = partition_rust_source(text); + let newline = if code.is_empty() || code.ends_with('\n') { + "" + } else { + "\n" + }; + Some(format!( + "# #![allow(unused)]\n{attrs}# fn main() {{\n{code}{newline}# }}" + )) + } else { + None + } +} + +/// Splits Rust inner attributes from the given source string. +/// +/// Returns `(inner_attrs, rest_of_code)`. +fn partition_rust_source(s: &str) -> (&str, &str) { + static_regex!( + HEADER_RE, + r"^(?mx) + ( + (?: + ^[ \t]*\#!\[.* (?:\r?\n)? + | + ^\s* (?:\r?\n)? + )* + )" + ); + let split_idx = match HEADER_RE.captures(s) { + Some(caps) => { + let attributes = &caps[1]; + attributes.len() + } + None => 0, + }; + s.split_at(split_idx) +} + +#[test] +fn it_partitions_rust_source() { + assert_eq!(partition_rust_source(""), ("", "")); + assert_eq!(partition_rust_source("let x = 1;"), ("", "let x = 1;")); + assert_eq!( + partition_rust_source("fn main()\n{ let x = 1; }\n"), + ("", "fn main()\n{ let x = 1; }\n") + ); + assert_eq!( + partition_rust_source("#![allow(foo)]"), + ("#![allow(foo)]", "") + ); + assert_eq!( + partition_rust_source("#![allow(foo)]\n"), + ("#![allow(foo)]\n", "") + ); + assert_eq!( + partition_rust_source("#![allow(foo)]\nlet x = 1;"), + ("#![allow(foo)]\n", "let x = 1;") + ); + assert_eq!( + partition_rust_source( + "\n\ + #![allow(foo)]\n\ + \n\ + #![allow(bar)]\n\ + \n\ + let x = 1;" + ), + ("\n#![allow(foo)]\n\n#![allow(bar)]\n\n", "let x = 1;") + ); +} diff --git a/crates/mdbook-html/src/html/mod.rs b/crates/mdbook-html/src/html/mod.rs new file mode 100644 index 0000000000..65f28fa102 --- /dev/null +++ b/crates/mdbook-html/src/html/mod.rs @@ -0,0 +1,105 @@ +//! HTML rendering support. +//! +//! This module's primary entry point is [`render_markdown`] which will take +//! markdown text and render it to HTML. In summary, the general procedure of +//! that function is: +//! +//! 1. Use [`pulldown_cmark`] to parse the markdown and generate events. +//! 2. [`tree`] converts those events to a tree data structure. +//! 1. Parse HTML inside the markdown using [`tokenizer`]. +//! 2. Apply various transformations to the tree data structure, such as adding header links. +//! 3. Serialize the tree to HTML in [`serialize()`]. + +use ego_tree::Tree; +use mdbook_core::book::{Book, Chapter}; +use mdbook_core::config::{HtmlConfig, RustEdition}; +use mdbook_markdown::{MarkdownOptions, new_cmark_parser}; +use std::path::{Path, PathBuf}; + +mod hide_lines; +mod print; +mod serialize; +#[cfg(test)] +mod tests; +mod tokenizer; +mod tree; + +pub(crate) use hide_lines::{hide_lines, wrap_rust_main}; +pub(crate) use print::render_print_page; +pub(crate) use serialize::serialize; +pub(crate) use tree::{Element, Node}; + +/// Options for converting a single chapter's markdown to HTML. +pub(crate) struct HtmlRenderOptions<'a> { + /// Options for parsing markdown. + pub markdown_options: MarkdownOptions, + /// The chapter's location, relative to the `SUMMARY.md` file. + pub path: &'a Path, + /// The default Rust edition, used to set the proper class on the code blocks. + pub edition: Option, + /// The [`HtmlConfig`], whose options affect how the HTML is generated. + pub config: &'a HtmlConfig, +} + +impl<'a> HtmlRenderOptions<'a> { + /// Creates a new [`HtmlRenderOptions`]. + pub(crate) fn new( + path: &'a Path, + config: &'a HtmlConfig, + edition: Option, + ) -> HtmlRenderOptions<'a> { + let mut markdown_options = MarkdownOptions::default(); + markdown_options.smart_punctuation = config.smart_punctuation; + HtmlRenderOptions { + markdown_options, + path, + edition, + config, + } + } +} + +/// Renders markdown to HTML. +pub(crate) fn render_markdown(text: &str, options: &HtmlRenderOptions<'_>) -> String { + let tree = build_tree(text, options); + let mut output = String::new(); + serialize::serialize(&tree, &mut output); + output +} + +/// Renders markdown to a [`Tree`]. +fn build_tree(text: &str, options: &HtmlRenderOptions<'_>) -> Tree { + let events = new_cmark_parser(text, &options.markdown_options); + tree::MarkdownTreeBuilder::build(options, events) +} + +/// The parsed chapter, and some information about the chapter. +pub(crate) struct ChapterTree<'book> { + pub(crate) chapter: &'book Chapter, + /// The path to the chapter relative to the root with the `.html` extension. + pub(crate) html_path: PathBuf, + /// The chapter tree. + pub(crate) tree: Tree, +} + +/// Creates all of the [`ChapterTree`]s for the book. +pub(crate) fn build_trees<'book>( + book: &'book Book, + html_config: &HtmlConfig, + edition: Option, +) -> Vec> { + book.chapters() + .map(|ch| { + let path = ch.path.as_ref().unwrap(); + let html_path = ch.path.as_ref().unwrap().with_extension("html"); + let options = HtmlRenderOptions::new(path, html_config, edition); + let tree = build_tree(&ch.content, &options); + + ChapterTree { + chapter: ch, + html_path, + tree, + } + }) + .collect() +} diff --git a/crates/mdbook-html/src/html/print.rs b/crates/mdbook-html/src/html/print.rs new file mode 100644 index 0000000000..5996ef2b19 --- /dev/null +++ b/crates/mdbook-html/src/html/print.rs @@ -0,0 +1,217 @@ +//! Support for generating the print page. +//! +//! The print page takes all the individual chapters (as `Tree` +//! elements) and modifies the chapters so that they work on a consolidated +//! print page, and then serializes it all as one HTML page. + +use super::Node; +use crate::html::{ChapterTree, Element, serialize}; +use crate::utils::{ToUrlPath, id_from_content, normalize_path, unique_id}; +use mdbook_core::static_regex; +use std::collections::HashMap; +use std::path::{Component, PathBuf}; + +/// Takes all the chapter trees, modifies them to be suitable to render for +/// the print page, and returns an string of all the chapters rendered to a +/// single HTML page. +pub(crate) fn render_print_page(mut chapter_trees: Vec>) -> String { + let (id_remap, mut id_counter) = make_ids_unique(&mut chapter_trees); + let path_to_root_id = make_root_id_map(&mut chapter_trees, &mut id_counter); + rewrite_links(&mut chapter_trees, &id_remap, &path_to_root_id); + + let mut print_content = String::new(); + for ChapterTree { tree, .. } in chapter_trees { + if !print_content.is_empty() { + // Add page break between chapters + // See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before + // Add both two CSS properties because of the compatibility issue + print_content + .push_str(r#"
"#); + } + serialize(&tree, &mut print_content); + } + print_content +} + +/// Make all IDs unique, and create a map from old to new IDs. +/// +/// The first map is a map of the chapter path to the IDs that were rewritten +/// in that chapter (old ID to new ID). +/// +/// The second map is a map of every ID seen to the number of times it has +/// been seen. This is used to generate unique IDs. +fn make_ids_unique( + chapter_trees: &mut [ChapterTree<'_>], +) -> ( + HashMap>, + HashMap, +) { + let mut id_remap = HashMap::new(); + let mut id_counter = HashMap::new(); + for ChapterTree { + html_path, tree, .. + } in chapter_trees + { + for value in tree.values_mut() { + if let Node::Element(el) = value + && let Some(id) = el.attr("id") + { + let new_id = unique_id(id, &mut id_counter); + if new_id != id { + let id = id.to_string(); + el.insert_attr("id", new_id.clone().into()); + + let map: &mut HashMap<_, _> = id_remap.entry(html_path.clone()).or_default(); + map.insert(id, new_id); + } + } + } + } + (id_remap, id_counter) +} + +/// Generates a map of a chapter path to the ID of the top of the chapter. +/// +/// If a chapter is missing an `h1` tag, then one is synthesized so that the +/// print output has something to link to. +fn make_root_id_map( + chapter_trees: &mut [ChapterTree<'_>], + id_counter: &mut HashMap, +) -> HashMap { + let mut path_to_root_id = HashMap::new(); + for ChapterTree { + chapter, + html_path, + tree, + .. + } in chapter_trees + { + let mut h1_found = false; + for value in tree.values_mut() { + if let Node::Element(el) = value { + if el.name() == "h1" { + if let Some(id) = el.attr("id") { + h1_found = true; + path_to_root_id.insert(html_path.clone(), id.to_string()); + } + break; + } else if matches!(el.name(), "h2" | "h3" | "h4" | "h5" | "h6") { + // h1 not found. + break; + } + } + } + if !h1_found { + // Synthesize a root id to be able to link to the start of the page. + // TODO: This might want to be a warning? Chapters generally + // should start with an h1. + let mut h1 = Element::new("h1"); + let id = id_from_content(&chapter.name); + let id = unique_id(&id, id_counter); + h1.insert_attr("id", id.clone().into()); + let mut root = tree.root_mut(); + let mut h1 = root.prepend(Node::Element(h1)); + let mut a = Element::new("a"); + a.insert_attr("href", format!("#{id}").into()); + a.insert_attr("class", "header".into()); + let mut a = h1.append(Node::Element(a)); + a.append(Node::Text(chapter.name.clone().into())); + path_to_root_id.insert(html_path.clone(), id); + } + } + + path_to_root_id +} + +/// Rewrite links so that they point to IDs on the print page. +fn rewrite_links( + chapter_trees: &mut [ChapterTree<'_>], + id_remap: &HashMap>, + path_to_root_id: &HashMap, +) { + static_regex!( + LINK, + r"(?x) + (?P^[a-z][a-z0-9+.-]*:)? + (?P[^\#]+)? + (?:\#(?P.*))?" + ); + + // Rewrite path links to go to the appropriate place. + for ChapterTree { + html_path, tree, .. + } in chapter_trees + { + let base = html_path.parent().expect("path can't be empty"); + + for value in tree.values_mut() { + let Node::Element(el) = value else { + continue; + }; + if !matches!(el.name(), "a" | "img") { + continue; + } + for attr in ["href", "src", "xlink:href"] { + let Some(dest) = el.attr(attr) else { + continue; + }; + let Some(caps) = LINK.captures(&dest) else { + continue; + }; + if caps.name("scheme").is_some() { + continue; + } + // The lookup_key is the key to look up in the remap table. + let mut lookup_key = html_path.clone(); + if let Some(href_path) = caps.name("path") + && let href_path = href_path.as_str() + && !href_path.is_empty() + { + lookup_key.pop(); + lookup_key.push(href_path); + let normalized = normalize_path(&lookup_key); + // If this points outside of the book, don't modify it. + let is_outside = matches!( + normalized.components().next(), + Some(Component::ParentDir | Component::RootDir) + ); + if is_outside || !href_path.ends_with(".html") { + // Make the link relative to the print page location. + let mut rel_path = normalize_path(&base.join(href_path)).to_url_path(); + if let Some(anchor) = caps.name("anchor") { + rel_path.push('#'); + rel_path.push_str(anchor.as_str()); + } + el.insert_attr(attr, rel_path.into()); + continue; + } + } + + let lookup_key = normalize_path(&lookup_key); + + let anchor = caps.name("anchor"); + let id = match anchor { + Some(anchor_id) => { + let anchor_id = anchor_id.as_str().to_string(); + match id_remap.get(&lookup_key) { + Some(id_map) => match id_map.get(&anchor_id) { + Some(new_id) => new_id.clone(), + None => anchor_id, + }, + None => { + // Assume the anchor goes to some non-remapped + // ID that already exists. + anchor_id + } + } + } + None => match path_to_root_id.get(&lookup_key) { + Some(id) => id.to_string(), + None => continue, + }, + }; + el.insert_attr(attr, format!("#{id}").into()); + } + } + } +} diff --git a/crates/mdbook-html/src/html/serialize.rs b/crates/mdbook-html/src/html/serialize.rs new file mode 100644 index 0000000000..3917585465 --- /dev/null +++ b/crates/mdbook-html/src/html/serialize.rs @@ -0,0 +1,112 @@ +//! Serializes the [`Node`] tree to an HTML string. + +use super::tree::is_void_element; +use super::tree::{Element, Node}; +use ego_tree::{Tree, iter::Edge}; +use html5ever::{local_name, ns}; +use mdbook_core::utils::{escape_html, escape_html_attribute}; +use std::ops::Deref; + +/// Serializes the given tree of [`Node`] elements to an HTML string. +pub(crate) fn serialize(tree: &Tree, output: &mut String) { + for edge in tree.root().traverse() { + match edge { + Edge::Open(node) => match node.value() { + Node::Element(el) => serialize_start(el, output), + Node::Text(text) => { + output.push_str(&escape_html(text)); + } + Node::Comment(comment) => { + output.push_str(""); + } + Node::Fragment => {} + Node::RawData(html) => { + output.push_str(html); + } + }, + Edge::Close(node) => { + if let Node::Element(el) = node.value() { + serialize_end(el, output); + } + } + } + } +} + +/// Returns true if this HTML element wants a newline to keep the emitted +/// output more readable. +fn wants_pretty_html_newline(name: &str) -> bool { + matches!(name, |"blockquote"| "dd" + | "div" + | "dl" + | "dt" + | "h1" + | "h2" + | "h3" + | "h4" + | "h5" + | "h6" + | "hr" + | "li" + | "ol" + | "p" + | "pre" + | "table" + | "tbody" + | "thead" + | "tr" + | "ul") +} + +/// Emit the start tag of an element. +fn serialize_start(el: &Element, output: &mut String) { + let el_name = el.name(); + if wants_pretty_html_newline(el_name) { + if !output.is_empty() { + if !output.ends_with('\n') { + output.push('\n'); + } + } + } + output.push('<'); + output.push_str(el_name); + for (attr_name, value) in &el.attrs { + output.push(' '); + match attr_name.ns { + ns!() => (), + ns!(xml) => output.push_str("xml:"), + ns!(xmlns) => { + if el.name.local != local_name!("xmlns") { + output.push_str("xmlns:"); + } + } + ns!(xlink) => output.push_str("xlink:"), + _ => (), // TODO what should it do here? + } + output.push_str(attr_name.local.deref()); + output.push_str("=\""); + output.push_str(&escape_html_attribute(&value)); + output.push('"'); + } + if el.self_closing { + output.push_str(" /"); + } + output.push('>'); +} + +/// Emit the end tag of an element. +fn serialize_end(el: &Element, output: &mut String) { + // Void elements do not have an end tag. + if el.self_closing || is_void_element(el.name()) { + return; + } + let name = el.name(); + output.push_str("'); + if wants_pretty_html_newline(name) { + output.push('\n'); + } +} diff --git a/crates/mdbook-html/src/html/tests.rs b/crates/mdbook-html/src/html/tests.rs new file mode 100644 index 0000000000..58825c9c81 --- /dev/null +++ b/crates/mdbook-html/src/html/tests.rs @@ -0,0 +1,53 @@ +use crate::html::tokenizer::parse_html; +use html5ever::tokenizer::{Tag, TagKind, Token}; + +// Basic tokenizer behavior of a script. +#[test] +fn parse_html_script() { + let script = r#" +if (3 < 5 > 10) +{ + alert("The sky is falling!"); +} +"#; + let t = format!(""); + let ts = parse_html(&t); + eprintln!("{ts:#?}",); + let mut output = String::new(); + let mut in_script = false; + for t in ts { + match t { + Token::ParseError(e) => panic!("{e:?}"), + Token::CharacterTokens(s) => { + if in_script { + output.push_str(&s) + } + } + Token::TagToken(Tag { + kind: TagKind::StartTag, + .. + }) => in_script = true, + Token::TagToken(Tag { + kind: TagKind::EndTag, + .. + }) => in_script = false, + _ => {} + } + } + assert_eq!(output, script); +} + +// What happens if a script doesn't end. +#[test] +fn parse_html_script_unclosed() { + let t = r#"`. The ` - +