diff --git a/Cargo.lock b/Cargo.lock index 4c13ac137..992465371 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3607,7 +3607,6 @@ dependencies = [ "paste", "quoted_printable", "serde", - "textwrap", "thiserror 1.0.69", "uuid", ] diff --git a/crates/mailparsing/Cargo.toml b/crates/mailparsing/Cargo.toml index f3ad4c2ab..d9f7c214c 100644 --- a/crates/mailparsing/Cargo.toml +++ b/crates/mailparsing/Cargo.toml @@ -15,7 +15,6 @@ nom_locate = {workspace=true} paste = {workspace=true} quoted_printable = {workspace=true} serde = {workspace=true} -textwrap = {workspace=true} thiserror = {workspace=true} uuid = {workspace=true, features=["v4", "fast-rng"]} diff --git a/crates/mailparsing/src/header.rs b/crates/mailparsing/src/header.rs index ffdc63668..1dc6214b4 100644 --- a/crates/mailparsing/src/header.rs +++ b/crates/mailparsing/src/header.rs @@ -112,14 +112,7 @@ impl<'a> Header<'a> { let value = value.into(); let value = if value.chars().all(|c| c.is_ascii()) { - textwrap::fill( - &value, - textwrap::Options::new(75) - .initial_indent("") - .line_ending(textwrap::LineEnding::CRLF) - .word_separator(textwrap::WordSeparator::AsciiSpace) - .subsequent_indent("\t"), - ) + crate::textwrap::wrap(&value) } else { crate::rfc5322_parser::qp_encode(&value) } @@ -691,6 +684,22 @@ Subject: =?UTF-8?q?hello_there_Andr=C3=A9,_this_is_a_longer_header_than_the_stan k9::assert_equal!(header.as_unstructured().unwrap(), input_text); } + #[test] + fn test_wrapping_in_from_header() { + let header = Header::new_unstructured( + "From", + "=?UTF-8?q?=D8=B1=D9=87=D9=86=D9=85=D8=A7_=DA=A9=D8=A7=D9=84=D8=AC?= \ + ", + ); + + eprintln!("made: {}", header.to_header_string()); + + // In the original problem report, the header got wrapped onto a second + // line at `from-` which broke parsing the header as a mailbox. + // This line asserts that it does parse. + let _ = header.as_mailbox_list().unwrap(); + } + #[test] fn test_date() { let header = Header::with_name_value("Date", "Tue, 1 Jul 2003 10:52:37 +0200"); diff --git a/crates/mailparsing/src/lib.rs b/crates/mailparsing/src/lib.rs index cbdac207c..d1d5046e8 100644 --- a/crates/mailparsing/src/lib.rs +++ b/crates/mailparsing/src/lib.rs @@ -8,6 +8,7 @@ mod nom_utils; mod normalize; mod rfc5322_parser; mod strings; +mod textwrap; pub use error::MailParsingError; pub type Result = std::result::Result; diff --git a/crates/mailparsing/src/textwrap.rs b/crates/mailparsing/src/textwrap.rs new file mode 100644 index 000000000..53f498b5e --- /dev/null +++ b/crates/mailparsing/src/textwrap.rs @@ -0,0 +1,94 @@ +pub fn wrap(value: &str) -> String { + const SOFT_WIDTH: usize = 75; + const HARD_WIDTH: usize = 1000; + wrap_impl(value, SOFT_WIDTH, HARD_WIDTH) +} + +/// We can't use textwrap::fill here because it will prefer to break +/// a line rather than finding stuff that fits. We use a simple +/// algorithm that tries to fill up to the desired width, allowing +/// for overflow if there is a word that is too long to fit in +/// the header, but breaking after a hard limit threshold. +fn wrap_impl(value: &str, soft_width: usize, hard_width: usize) -> String { + let mut result = String::new(); + let mut line = String::new(); + + for word in value.split_ascii_whitespace() { + if line.len() + word.len() + 1 <= soft_width { + if !line.is_empty() { + line.push(' '); + } + line.push_str(word); + continue; + } + + // Need to wrap. + + // Accumulate line so far, if any + if !line.is_empty() { + if !result.is_empty() { + // There's an existing line, start a new one, indented + result.push('\t'); + } + result.push_str(&line); + result.push_str("\r\n"); + line.clear(); + } + + // build out a line from the characters of this one + if word.len() <= hard_width { + line.push_str(word); + } else { + for c in word.chars() { + line.push(c); + if line.len() >= hard_width { + if !result.is_empty() { + result.push('\t'); + } + result.push_str(&line); + result.push_str("\r\n"); + line.clear(); + continue; + } + } + } + } + + if !line.is_empty() { + if !result.is_empty() { + result.push('\t'); + } + result.push_str(&line); + } + + result +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn wrapping() { + for (input, expect) in [ + ("foo", "foo"), + ("hi there", "hi there"), + ("hello world", "hello\r\n\tworld"), + ( + "hello world foo bar baz woot woot", + "hello\r\n\tworld foo\r\n\tbar baz\r\n\twoot woot", + ), + ( + "hi there breakmepleaseIamtoolong", + "hi there\r\n\tbreakmepleaseIa\r\n\tmtoolong", + ), + ] { + let wrapped = wrap_impl(input, 10, 15); + k9::assert_equal!( + wrapped, + expect, + "input: '{input}' should produce '{expect}'" + ); + } + } +} diff --git a/docs/changelog/main.md b/docs/changelog/main.md index 21ff59378..874e3dc47 100644 --- a/docs/changelog/main.md +++ b/docs/changelog/main.md @@ -90,3 +90,7 @@ 1-new-connection-per-10-minute period if that ready queue had no new messages being added to it and if a connection limit had prevented new connections being opened. + +* When using the HTTP injection API to construct a mailbox using UTF-8 characters, + the resulting From header could wrap in an undesirable location and produce + an invalid From header that fails to parse.