From a265450a599b72fae8276db7430989f057572f2f Mon Sep 17 00:00:00 2001 From: Florian Dieminger Date: Tue, 10 Dec 2024 21:01:14 +0100 Subject: [PATCH] feat(md): custom html escape We don't want percent encoded links internally. --- crates/rari-md/src/html.rs | 42 ++++++++++++++++++-------------------- crates/rari-md/src/lib.rs | 17 +++++++++++++++ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/crates/rari-md/src/html.rs b/crates/rari-md/src/html.rs index 5ce23ff..23147a7 100644 --- a/crates/rari-md/src/html.rs +++ b/crates/rari-md/src/html.rs @@ -274,40 +274,38 @@ pub fn escape(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> { /// the string "a b", rather than "?q=a%2520b", a search for the literal /// string "a%20b". pub fn escape_href(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> { - const HREF_SAFE: [bool; 256] = character_set!( - b"-_.+!*(),%#@?=;:/,+$~", - b"abcdefghijklmnopqrstuvwxyz", - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" - ); - let size = buffer.len(); let mut i = 0; + let mut escaped = ""; while i < size { let org = i; - while i < size && HREF_SAFE[buffer[i] as usize] { - i += 1; + while i < size { + escaped = match buffer[i] { + b'&' => "&", + b'<' => "<", + b'>' => ">", + b'"' => """, + b'\'' => "'", + _ => { + i += 1; + "" + } + }; + if !escaped.is_empty() { + break; + } } if i > org { output.write_all(&buffer[org..i])?; } - if i >= size { - break; - } - - match buffer[i] as char { - '&' => { - output.write_all(b"&")?; - } - '\'' => { - output.write_all(b"'")?; - } - _ => write!(output, "%{:02X}", buffer[i])?, + if !escaped.is_empty() { + output.write_all(escaped.as_bytes())?; + escaped = ""; + i += 1; } - - i += 1; } Ok(()) diff --git a/crates/rari-md/src/lib.rs b/crates/rari-md/src/lib.rs index a1d9fc2..7e54f78 100644 --- a/crates/rari-md/src/lib.rs +++ b/crates/rari-md/src/lib.rs @@ -83,6 +83,8 @@ pub fn m2h_internal( #[cfg(test)] mod test { + use html::escape_href; + use super::*; #[test] @@ -169,4 +171,19 @@ mod test { ); Ok(()) } + + #[test] + fn escape_hrefs() -> Result<(), anyhow::Error> { + fn eh(s: &str) -> Result { + let mut out = Vec::with_capacity(s.len()); + escape_href(&mut out, s.as_bytes())?; + Ok(String::from_utf8(out)?) + } + + assert_eq!(eh("/en-US/foo/bar")?, "/en-US/foo/bar"); + assert_eq!(eh("/en-US/foo/\"")?, "/en-US/foo/""); + assert_eq!(eh("/en-US/foo