Skip to content

Commit

Permalink
feat(md): custom html escape
Browse files Browse the repository at this point in the history
We don't want percent encoded links internally.
  • Loading branch information
fiji-flo committed Dec 10, 2024
1 parent f3b85f4 commit a265450
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 22 deletions.
42 changes: 20 additions & 22 deletions crates/rari-md/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,40 +274,38 @@ pub fn escape(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> {
/// the string "a b", rather than "?q=a%2520b", a search for the literal
/// string "a%20b".
pub fn escape_href(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> {
const HREF_SAFE: [bool; 256] = character_set!(
b"-_.+!*(),%#@?=;:/,+$~",
b"abcdefghijklmnopqrstuvwxyz",
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
);

let size = buffer.len();
let mut i = 0;
let mut escaped = "";

while i < size {
let org = i;
while i < size && HREF_SAFE[buffer[i] as usize] {
i += 1;
while i < size {
escaped = match buffer[i] {
b'&' => "&amp;",
b'<' => "&lt;",
b'>' => "&gt;",
b'"' => "&quot;",
b'\'' => "&#x27;",
_ => {
i += 1;
""
}
};
if !escaped.is_empty() {
break;
}
}

if i > org {
output.write_all(&buffer[org..i])?;
}

if i >= size {
break;
}

match buffer[i] as char {
'&' => {
output.write_all(b"&amp;")?;
}
'\'' => {
output.write_all(b"&#x27;")?;
}
_ => write!(output, "%{:02X}", buffer[i])?,
if !escaped.is_empty() {
output.write_all(escaped.as_bytes())?;
escaped = "";
i += 1;
}

i += 1;
}

Ok(())
Expand Down
17 changes: 17 additions & 0 deletions crates/rari-md/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ pub fn m2h_internal(

#[cfg(test)]
mod test {
use html::escape_href;

use super::*;

#[test]
Expand Down Expand Up @@ -169,4 +171,19 @@ mod test {
);
Ok(())
}

#[test]
fn escape_hrefs() -> Result<(), anyhow::Error> {
fn eh(s: &str) -> Result<String, anyhow::Error> {
let mut out = Vec::with_capacity(s.len());
escape_href(&mut out, s.as_bytes())?;
Ok(String::from_utf8(out)?)
}

assert_eq!(eh("/en-US/foo/bar")?, "/en-US/foo/bar");
assert_eq!(eh("/en-US/foo/\"")?, "/en-US/foo/&quot;");
assert_eq!(eh("/en-US/foo<script")?, "/en-US/foo&lt;script");
assert_eq!(eh("/en-US/foo&bar")?, "/en-US/foo&amp;bar");
Ok(())
}
}

0 comments on commit a265450

Please sign in to comment.