From bb340e0c8e2de6bb7cb8258277783a37064047b5 Mon Sep 17 00:00:00 2001 From: Christopher Illarionova Date: Wed, 19 Nov 2025 01:09:16 +0000 Subject: [PATCH 1/4] Adding combining character support for fold --- src/uu/fold/src/fold.rs | 12 +++++++++++- tests/by-util/test_fold.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index f14ed3cf071..4de49482213 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -434,7 +434,17 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes let mut iter = line.char_indices().peekable(); while let Some((byte_idx, ch)) = iter.next() { - let next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); + let mut next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); + + // Include combining characters with the base character + while let Some(&(_, next_ch)) = iter.peek() { + if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 { + iter.next(); + next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); + } else { + break; + } + } if ch == '\n' { *ctx.last_space = None; diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index 04072ab157f..ffc3da9d7f2 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -597,3 +597,36 @@ fn test_all_tab_advances_at_non_utf8_character() { .succeeds() .stdout_is_fixture_bytes("non_utf8_tab_stops_w16.expected"); } + +#[test] +fn test_combining_characters_nfc() { + // e acute NFC form (single character) + let e_acute_nfc = "\u{00E9}"; // é as single character + new_ucmd!() + .arg("-w2") + .pipe_in(format!("{}{}{}", e_acute_nfc, e_acute_nfc, e_acute_nfc)) + .succeeds() + .stdout_is(format!("{}{}\n{}", e_acute_nfc, e_acute_nfc, e_acute_nfc)); +} + +#[test] +fn test_combining_characters_nfd() { + // e acute NFD form (base + combining acute) + let e_acute_nfd = "e\u{0301}"; // e + combining acute accent + new_ucmd!() + .arg("-w2") + .pipe_in(format!("{}{}{}", e_acute_nfd, e_acute_nfd, e_acute_nfd)) + .succeeds() + .stdout_is(format!("{}{}\n{}", e_acute_nfd, e_acute_nfd, e_acute_nfd)); +} + +#[test] +fn test_fullwidth_characters() { + // e fullwidth (takes 2 columns) + let e_fullwidth = "\u{FF45}"; // e + new_ucmd!() + .arg("-w2") + .pipe_in(format!("{}{}", e_fullwidth, e_fullwidth)) + .succeeds() + .stdout_is(format!("{}\n{}", e_fullwidth, e_fullwidth)); +} From 29d90eab12bd6abdf61672e3364ac7d3991a0522 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 19 Nov 2025 08:32:45 +0100 Subject: [PATCH 2/4] add fullwidth to the spell ignore list --- tests/by-util/test_fold.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index ffc3da9d7f2..8986d05791d 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -2,6 +2,8 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +// spell-checker:ignore fullwidth + use uutests::new_ucmd; #[test] From 1d34653820a8fbad057a667bc8704f8832a55a60 Mon Sep 17 00:00:00 2001 From: Christopher Illarionova Date: Wed, 19 Nov 2025 17:24:21 +0000 Subject: [PATCH 3/4] addressing comments and cargo fmt fixes --- src/uu/fold/src/fold.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/uu/fold/src/fold.rs b/src/uu/fold/src/fold.rs index 4de49482213..a2ddbed6a68 100644 --- a/src/uu/fold/src/fold.rs +++ b/src/uu/fold/src/fold.rs @@ -434,18 +434,17 @@ fn process_utf8_line(line: &str, ctx: &mut FoldContext<'_, W>) -> URes let mut iter = line.char_indices().peekable(); while let Some((byte_idx, ch)) = iter.next() { - let mut next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); - // Include combining characters with the base character while let Some(&(_, next_ch)) = iter.peek() { if unicode_width::UnicodeWidthChar::width(next_ch).unwrap_or(1) == 0 { iter.next(); - next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); } else { break; } } + let next_idx = iter.peek().map(|(idx, _)| *idx).unwrap_or(line_bytes.len()); + if ch == '\n' { *ctx.last_space = None; emit_output(ctx)?; From 01c25b6029639c6f3db0ff72d03c4a3a4f755ed8 Mon Sep 17 00:00:00 2001 From: Christopher Illarionova Date: Wed, 19 Nov 2025 17:50:54 +0000 Subject: [PATCH 4/4] clippy fixes for test files --- tests/by-util/test_fold.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/by-util/test_fold.rs b/tests/by-util/test_fold.rs index 8986d05791d..9497044c910 100644 --- a/tests/by-util/test_fold.rs +++ b/tests/by-util/test_fold.rs @@ -606,9 +606,9 @@ fn test_combining_characters_nfc() { let e_acute_nfc = "\u{00E9}"; // é as single character new_ucmd!() .arg("-w2") - .pipe_in(format!("{}{}{}", e_acute_nfc, e_acute_nfc, e_acute_nfc)) + .pipe_in(format!("{e_acute_nfc}{e_acute_nfc}{e_acute_nfc}")) .succeeds() - .stdout_is(format!("{}{}\n{}", e_acute_nfc, e_acute_nfc, e_acute_nfc)); + .stdout_is(format!("{e_acute_nfc}{e_acute_nfc}\n{e_acute_nfc}")); } #[test] @@ -617,9 +617,9 @@ fn test_combining_characters_nfd() { let e_acute_nfd = "e\u{0301}"; // e + combining acute accent new_ucmd!() .arg("-w2") - .pipe_in(format!("{}{}{}", e_acute_nfd, e_acute_nfd, e_acute_nfd)) + .pipe_in(format!("{e_acute_nfd}{e_acute_nfd}{e_acute_nfd}")) .succeeds() - .stdout_is(format!("{}{}\n{}", e_acute_nfd, e_acute_nfd, e_acute_nfd)); + .stdout_is(format!("{e_acute_nfd}{e_acute_nfd}\n{e_acute_nfd}")); } #[test] @@ -628,7 +628,7 @@ fn test_fullwidth_characters() { let e_fullwidth = "\u{FF45}"; // e new_ucmd!() .arg("-w2") - .pipe_in(format!("{}{}", e_fullwidth, e_fullwidth)) + .pipe_in(format!("{e_fullwidth}{e_fullwidth}")) .succeeds() - .stdout_is(format!("{}\n{}", e_fullwidth, e_fullwidth)); + .stdout_is(format!("{e_fullwidth}\n{e_fullwidth}")); }