Treat control characters as width 1, fixes #16

This is consistent with how unicode-width handles string width vs char width. See also unicode-rs/unicode-width#45
Aetf · Jun 24, 2024 · 8731fef · 8731fef
1 parent 9e49ef4
commit 8731fef
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 6 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -44,3 +44,5 @@ harness = false
 codegen-units = 1
 lto = true
 
+[profile.test]
+debug-assertions = true
diff --git a/src/lib.rs b/src/lib.rs
@@ -152,7 +152,9 @@ impl UnicodeTruncateStr for str {
         let (byte_index, new_width) = self
             .char_indices()
             // map to byte index and the width of char start at the index
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // chain a final element representing the position past the last char
             .chain(core::iter::once((self.len(), 0)))
             // fold to byte index and the width up to the index
@@ -164,6 +166,7 @@ impl UnicodeTruncateStr for str {
                 *sum = sum.checked_add(char_width)?;
                 Some((byte_index, current_width))
             })
+            .inspect(|&(bidx, cw)| println!("bidx={bidx}, cw={cw}"))
             // take the longest but still shorter than requested
             .take_while(|&(_, current_width)| current_width <= max_width)
             .last()
@@ -182,7 +185,9 @@ impl UnicodeTruncateStr for str {
             // instead of start checking from the start do so from the end
             .rev()
             // map to byte index and the width of char start at the index
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // skip any position with zero width, the cut won't happen at these points
             // this also helps with not including zero width char at the beginning
             .filter(|&(_, char_width)| char_width > 0)
@@ -223,7 +228,9 @@ impl UnicodeTruncateStr for str {
 
         let from_start = self
             .char_indices()
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // skip any position with zero width, the cut won't happen at these points
             // this also helps with removing zero width char at the beginning
             .filter(|&(_, char_width)| char_width > 0)
@@ -242,7 +249,9 @@ impl UnicodeTruncateStr for str {
 
         let from_end = self
             .char_indices()
-            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
+            // control characters treated as of width 1
+            // https://github.com/unicode-rs/unicode-width/pull/45
+            .map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
             // skip any position with zero width, the cut won't happen at these points
             // this also helps with keeping zero width char at the end
             .filter(|&(_, char_width)| char_width > 0)
@@ -511,6 +520,13 @@ mod tests {
                 ("b\u{0306}y\u{0306}", 2)
             );
         }
+
+        #[test]
+        fn control_char() {
+            assert_eq!("\u{0019}".width(), 1);
+            assert_eq!('\u{0019}'.width(), None);
+            assert_eq!("\u{0019}".unicode_truncate(2), ("\u{0019}", 1));
+        }
     }
 
     #[test]