Skip to content

Commit

Permalink
Treat control characters as width 1, fixes #16
Browse files Browse the repository at this point in the history
This is consistent with how unicode-width handles string width vs char
width.

See also unicode-rs/unicode-width#45
  • Loading branch information
Aetf committed Jun 24, 2024
1 parent 9e49ef4 commit 8731fef
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 6 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ harness = false
codegen-units = 1
lto = true

[profile.test]
debug-assertions = true
24 changes: 20 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@ impl UnicodeTruncateStr for str {
let (byte_index, new_width) = self
.char_indices()
// map to byte index and the width of char start at the index
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
// control characters treated as of width 1
// https://github.com/unicode-rs/unicode-width/pull/45
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
// chain a final element representing the position past the last char
.chain(core::iter::once((self.len(), 0)))
// fold to byte index and the width up to the index
Expand All @@ -164,6 +166,7 @@ impl UnicodeTruncateStr for str {
*sum = sum.checked_add(char_width)?;
Some((byte_index, current_width))
})
.inspect(|&(bidx, cw)| println!("bidx={bidx}, cw={cw}"))
// take the longest but still shorter than requested
.take_while(|&(_, current_width)| current_width <= max_width)
.last()
Expand All @@ -182,7 +185,9 @@ impl UnicodeTruncateStr for str {
// instead of start checking from the start do so from the end
.rev()
// map to byte index and the width of char start at the index
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
// control characters treated as of width 1
// https://github.com/unicode-rs/unicode-width/pull/45
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
// skip any position with zero width, the cut won't happen at these points
// this also helps with not including zero width char at the beginning
.filter(|&(_, char_width)| char_width > 0)
Expand Down Expand Up @@ -223,7 +228,9 @@ impl UnicodeTruncateStr for str {

let from_start = self
.char_indices()
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
// control characters treated as of width 1
// https://github.com/unicode-rs/unicode-width/pull/45
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
// skip any position with zero width, the cut won't happen at these points
// this also helps with removing zero width char at the beginning
.filter(|&(_, char_width)| char_width > 0)
Expand All @@ -242,7 +249,9 @@ impl UnicodeTruncateStr for str {

let from_end = self
.char_indices()
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(0)))
// control characters treated as of width 1
// https://github.com/unicode-rs/unicode-width/pull/45
.map(|(byte_index, char)| (byte_index, char.width().unwrap_or(1)))
// skip any position with zero width, the cut won't happen at these points
// this also helps with keeping zero width char at the end
.filter(|&(_, char_width)| char_width > 0)
Expand Down Expand Up @@ -511,6 +520,13 @@ mod tests {
("b\u{0306}y\u{0306}", 2)
);
}

#[test]
fn control_char() {
assert_eq!("\u{0019}".width(), 1);
assert_eq!('\u{0019}'.width(), None);
assert_eq!("\u{0019}".unicode_truncate(2), ("\u{0019}", 1));
}
}

#[test]
Expand Down

0 comments on commit 8731fef

Please sign in to comment.