Skip to content

Commit

Permalink
mailparsing: implement own text wrapping function
Browse files Browse the repository at this point in the history
A user reported that constructing certain UTF-8 From headers
in the HTTP injection API could produce a From header that
could not be parsed by the DKIM helper when subsequently
attempting to sign the message.

The issue was that the textwrap crate will try to fill out
the wrap, preferring to break an existing word rather than
generating a new line to accommodate one when it would
produce a line that was too long.

This commit adds our own text wrapping algorithm that is
more forgiving.
  • Loading branch information
wez committed Dec 31, 2024
1 parent ff8c0cc commit 4909fe8
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 10 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion crates/mailparsing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ nom_locate = {workspace=true}
paste = {workspace=true}
quoted_printable = {workspace=true}
serde = {workspace=true}
textwrap = {workspace=true}
thiserror = {workspace=true}
uuid = {workspace=true, features=["v4", "fast-rng"]}

Expand Down
25 changes: 17 additions & 8 deletions crates/mailparsing/src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,7 @@ impl<'a> Header<'a> {
let value = value.into();

let value = if value.chars().all(|c| c.is_ascii()) {
textwrap::fill(
&value,
textwrap::Options::new(75)
.initial_indent("")
.line_ending(textwrap::LineEnding::CRLF)
.word_separator(textwrap::WordSeparator::AsciiSpace)
.subsequent_indent("\t"),
)
crate::textwrap::wrap(&value)
} else {
crate::rfc5322_parser::qp_encode(&value)
}
Expand Down Expand Up @@ -691,6 +684,22 @@ Subject: =?UTF-8?q?hello_there_Andr=C3=A9,_this_is_a_longer_header_than_the_stan
k9::assert_equal!(header.as_unstructured().unwrap(), input_text);
}

#[test]
fn test_wrapping_in_from_header() {
let header = Header::new_unstructured(
"From",
"=?UTF-8?q?=D8=B1=D9=87=D9=86=D9=85=D8=A7_=DA=A9=D8=A7=D9=84=D8=AC?= \
<from-dash-wrap-me@example.com>",
);

eprintln!("made: {}", header.to_header_string());

// In the original problem report, the header got wrapped onto a second
// line at `from-` which broke parsing the header as a mailbox.
// This line asserts that it does parse.
let _ = header.as_mailbox_list().unwrap();
}

#[test]
fn test_date() {
let header = Header::with_name_value("Date", "Tue, 1 Jul 2003 10:52:37 +0200");
Expand Down
1 change: 1 addition & 0 deletions crates/mailparsing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod nom_utils;
mod normalize;
mod rfc5322_parser;
mod strings;
mod textwrap;

pub use error::MailParsingError;
pub type Result<T> = std::result::Result<T, MailParsingError>;
Expand Down
94 changes: 94 additions & 0 deletions crates/mailparsing/src/textwrap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
pub fn wrap(value: &str) -> String {
const SOFT_WIDTH: usize = 75;
const HARD_WIDTH: usize = 1000;
wrap_impl(value, SOFT_WIDTH, HARD_WIDTH)
}

/// We can't use textwrap::fill here because it will prefer to break
/// a line rather than finding stuff that fits. We use a simple
/// algorithm that tries to fill up to the desired width, allowing
/// for overflow if there is a word that is too long to fit in
/// the header, but breaking after a hard limit threshold.
fn wrap_impl(value: &str, soft_width: usize, hard_width: usize) -> String {
let mut result = String::new();
let mut line = String::new();

for word in value.split_ascii_whitespace() {
if line.len() + word.len() + 1 <= soft_width {
if !line.is_empty() {
line.push(' ');
}
line.push_str(word);
continue;
}

// Need to wrap.

// Accumulate line so far, if any
if !line.is_empty() {
if !result.is_empty() {
// There's an existing line, start a new one, indented
result.push('\t');
}
result.push_str(&line);
result.push_str("\r\n");
line.clear();
}

// build out a line from the characters of this one
if word.len() <= hard_width {
line.push_str(word);
} else {
for c in word.chars() {
line.push(c);
if line.len() >= hard_width {
if !result.is_empty() {
result.push('\t');
}
result.push_str(&line);
result.push_str("\r\n");
line.clear();
continue;
}
}
}
}

if !line.is_empty() {
if !result.is_empty() {
result.push('\t');
}
result.push_str(&line);
}

result
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn wrapping() {
for (input, expect) in [
("foo", "foo"),
("hi there", "hi there"),
("hello world", "hello\r\n\tworld"),
(
"hello world foo bar baz woot woot",
"hello\r\n\tworld foo\r\n\tbar baz\r\n\twoot woot",
),
(
"hi there breakmepleaseIamtoolong",
"hi there\r\n\tbreakmepleaseIa\r\n\tmtoolong",
),
] {
let wrapped = wrap_impl(input, 10, 15);
k9::assert_equal!(
wrapped,
expect,
"input: '{input}' should produce '{expect}'"
);
}
}
}
4 changes: 4 additions & 0 deletions docs/changelog/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,7 @@
1-new-connection-per-10-minute period if that ready queue had no new messages
being added to it and if a connection limit had prevented new connections
being opened.

* When using the HTTP injection API to construct a mailbox using UTF-8 characters,
the resulting From header could wrap in an undesirable location and produce
an invalid From header that fails to parse.

0 comments on commit 4909fe8

Please sign in to comment.