From 51df99f3c2bac3ee7158b115ff6d54b687d018e1 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 24 Jan 2023 15:39:59 -0700 Subject: [PATCH] rustdoc: use smarter encoding for playground URL The old way would compress okay with DEFLATE, but this version makes uncompressed docs smaller, which matters for memory usage and stuff like `cargo doc`. Try it out: In local testing, this change shrinks sample pages by anywhere between 4.0% and 0.031% $ du -b after.dir/std/vec/struct.Vec.html before.dir/std/vec/struct.Vec.html 759235 after.dir/std/vec/struct.Vec.html 781842 before.dir/std/vec/struct.Vec.html 100*((759235-781842)/781842)=-2.8 $ du -b after.dir/std/num/struct.Wrapping.html before.dir/std/num/struct.Wrapping.html 3194173 after.dir/std/num/struct.Wrapping.html 3204351 before.dir/std/num/struct.Wrapping.html 100*((3194173-3204351)/3204351)=-0.031 $ du -b after.dir/std/keyword.match.html before.dir/std/keyword.match.html 8151 after.dir/std/keyword.match.html 8495 before.dir/std/keyword.match.html 100*((8151-8495)/8495)=-4.0 Gzipped tarball sizes seem shrunk, but not by much. du -s before.tar.gz after.tar.gz 69600 before.tar.gz 69480 after.tar.gz 100*((69480-69600)/69600)=-0.17 --- src/librustdoc/html/markdown.rs | 19 ++++++++++++++++++- tests/rustdoc/playground-arg.rs | 2 +- tests/rustdoc/playground.rs | 6 +++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 4ff67fe1551dd..b1efbf4bdcaf7 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -296,7 +296,9 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" }; // These characters don't need to be escaped in a URI. - // FIXME: use a library function for percent encoding. + // See https://url.spec.whatwg.org/#query-percent-encode-set + // and https://url.spec.whatwg.org/#urlencoded-parsing + // and https://url.spec.whatwg.org/#url-code-points fn dont_escape(c: u8) -> bool { (b'a' <= c && c <= b'z') || (b'A' <= c && c <= b'Z') @@ -304,17 +306,32 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { || c == b'-' || c == b'_' || c == b'.' + || c == b',' || c == b'~' || c == b'!' || c == b'\'' || c == b'(' || c == b')' || c == b'*' + || c == b'/' + || c == b';' + || c == b':' + || c == b'?' + // As described in urlencoded-parsing, the + // first `=` is the one that separates key from + // value. Following `=`s are part of the value. + || c == b'=' } let mut test_escaped = String::new(); for b in test.bytes() { if dont_escape(b) { test_escaped.push(char::from(b)); + } else if b == b' ' { + // URL queries are decoded with + replaced with SP + test_escaped.push('+'); + } else if b == b'%' { + test_escaped.push('%'); + test_escaped.push('%'); } else { write!(test_escaped, "%{:02X}", b).unwrap(); } diff --git a/tests/rustdoc/playground-arg.rs b/tests/rustdoc/playground-arg.rs index 69c8962653931..f3811fe0b0ad1 100644 --- a/tests/rustdoc/playground-arg.rs +++ b/tests/rustdoc/playground-arg.rs @@ -10,4 +10,4 @@ pub fn dummy() {} // ensure that `extern crate foo;` was inserted into code snips automatically: -// @matches foo/index.html '//a[@class="test-arrow"][@href="https://example.com/?code=%23!%5Ballow(unused)%5D%0Aextern%20crate%20r%23foo%3B%0Afn%20main()%20%7B%0Ause%20foo%3A%3Adummy%3B%0Adummy()%3B%0A%7D&edition=2015"]' "Run" +// @matches foo/index.html '//a[@class="test-arrow"][@href="https://example.com/?code=%23!%5Ballow(unused)%5D%0Aextern+crate+r%23foo;%0Afn+main()+%7B%0Ause+foo::dummy;%0Adummy();%0A%7D&edition=2015"]' "Run" diff --git a/tests/rustdoc/playground.rs b/tests/rustdoc/playground.rs index 877ea1cfba15a..5c7fa33efc5e5 100644 --- a/tests/rustdoc/playground.rs +++ b/tests/rustdoc/playground.rs @@ -22,6 +22,6 @@ //! } //! ``` -// @matches foo/index.html '//a[@class="test-arrow"][@href="https://www.example.com/?code=%23!%5Ballow(unused)%5D%0Afn%20main()%20%7B%0A%20%20%20%20println!(%22Hello%2C%20world!%22)%3B%0A%7D&edition=2015"]' "Run" -// @matches foo/index.html '//a[@class="test-arrow"][@href="https://www.example.com/?code=%23!%5Ballow(unused)%5D%0Afn%20main()%20%7B%0Aprintln!(%22Hello%2C%20world!%22)%3B%0A%7D&edition=2015"]' "Run" -// @matches foo/index.html '//a[@class="test-arrow"][@href="https://www.example.com/?code=%23!%5Ballow(unused)%5D%0A%23!%5Bfeature(something)%5D%0A%0Afn%20main()%20%7B%0A%20%20%20%20println!(%22Hello%2C%20world!%22)%3B%0A%7D&version=nightly&edition=2015"]' "Run" +// @matches foo/index.html '//a[@class="test-arrow"][@href="https://www.example.com/?code=%23!%5Ballow(unused)%5D%0Afn+main()+%7B%0Aprintln!(%22Hello,+world!%22);%0A%7D&edition=2015"]' "Run" +// @matches foo/index.html '//a[@class="test-arrow"][@href="https://www.example.com/?code=%23!%5Ballow(unused)%5D%0Afn+main()+%7B%0A++++println!(%22Hello,+world!%22);%0A%7D&edition=2015"]' "Run" +// @matches foo/index.html '//a[@class="test-arrow"][@href="https://www.example.com/?code=%23!%5Ballow(unused)%5D%0A%23!%5Bfeature(something)%5D%0A%0Afn+main()+%7B%0A++++println!(%22Hello,+world!%22);%0A%7D&version=nightly&edition=2015"]' "Run"