Skip to content

Commit

Permalink
add unicode-width to str stats (nushell#14014)
Browse files Browse the repository at this point in the history
# Description

This PR adds another type of length to `str stats`, unicode-width.
```nushell
❯ "\u{ff03}" | str stats
╭───────────────┬───╮
│ lines         │ 1 │
│ words         │ 0 │
│ bytes         │ 3 │
│ chars         │ 1 │
│ graphemes     │ 1 │
│ unicode-width │ 2 │
╰───────────────┴───╯
❯ "Amélie Amelie" | str stats
╭───────────────┬────╮
│ lines         │ 1  │
│ words         │ 2  │
│ bytes         │ 15 │
│ chars         │ 14 │
│ graphemes     │ 13 │
│ unicode-width │ 13 │
╰───────────────┴────╯
❯ '今天天气真好' | str stats
╭───────────────┬────╮
│ lines         │ 1  │
│ words         │ 6  │
│ bytes         │ 18 │
│ chars         │ 6  │
│ graphemes     │ 6  │
│ unicode-width │ 12 │
╰───────────────┴────╯
❯ "Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα." | str stats
╭───────────────┬────╮
│ lines         │ 1  │
│ words         │ 9  │
│ bytes         │ 96 │
│ chars         │ 50 │
│ graphemes     │ 50 │
│ unicode-width │ 50 │
╰───────────────┴────╯
❯ "\n" | str stats
╭───────────────┬───╮
│ lines         │ 1 │
│ words         │ 0 │
│ bytes         │ 1 │
│ chars         │ 1 │
│ graphemes     │ 1 │
│ unicode-width │ 0 │
╰───────────────┴───╯
```
The idea of this PR came from me wondering if we could replace `#` with
`\u{ff03}` in tables.

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
  • Loading branch information
fdncred authored Oct 6, 2024
1 parent d6f4e4c commit 6dc71f5
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions crates/nu-command/src/strings/str_/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,19 @@ impl Command for SubCommand {
"bytes" => Value::test_int(38),
"chars" => Value::test_int(38),
"graphemes" => Value::test_int(38),
"unicode-width" => Value::test_int(38),
})),
},
Example {
description: "Counts unicode characters",
example: r#"'今天天气真好' | str stats "#,
example: r#"'今天天气真好' | str stats"#,
result: Some(Value::test_record(record! {
"lines" => Value::test_int(1),
"words" => Value::test_int(6),
"bytes" => Value::test_int(18),
"chars" => Value::test_int(6),
"graphemes" => Value::test_int(6),
"unicode-width" => Value::test_int(12),
})),
},
Example {
Expand All @@ -86,6 +88,7 @@ impl Command for SubCommand {
"bytes" => Value::test_int(15),
"chars" => Value::test_int(14),
"graphemes" => Value::test_int(13),
"unicode-width" => Value::test_int(13),
})),
},
]
Expand Down Expand Up @@ -139,6 +142,7 @@ fn counter(contents: &str, span: Span) -> Value {
"bytes" => get_count(&counts, Counter::Bytes, span),
"chars" => get_count(&counts, Counter::CodePoints, span),
"graphemes" => get_count(&counts, Counter::GraphemeClusters, span),
"unicode-width" => get_count(&counts, Counter::UnicodeWidth, span),
};

Value::record(record, span)
Expand Down Expand Up @@ -208,6 +212,7 @@ impl Count for Counter {
}
Counter::Words => s.unicode_words().count(),
Counter::CodePoints => s.chars().count(),
Counter::UnicodeWidth => unicode_width::UnicodeWidthStr::width(s),
}
}
}
Expand All @@ -229,15 +234,19 @@ pub enum Counter {

/// Counts unicode code points
CodePoints,

/// Counts the width of the string
UnicodeWidth,
}

/// A convenience array of all counter types.
pub const ALL_COUNTERS: [Counter; 5] = [
pub const ALL_COUNTERS: [Counter; 6] = [
Counter::GraphemeClusters,
Counter::Bytes,
Counter::Lines,
Counter::Words,
Counter::CodePoints,
Counter::UnicodeWidth,
];

impl fmt::Display for Counter {
Expand All @@ -248,6 +257,7 @@ impl fmt::Display for Counter {
Counter::Lines => "lines",
Counter::Words => "words",
Counter::CodePoints => "codepoints",
Counter::UnicodeWidth => "unicode-width",
};

write!(f, "{s}")
Expand Down Expand Up @@ -297,6 +307,7 @@ fn test_one_newline() {
correct_counts.insert(Counter::GraphemeClusters, 1);
correct_counts.insert(Counter::Bytes, 1);
correct_counts.insert(Counter::CodePoints, 1);
correct_counts.insert(Counter::UnicodeWidth, 0);

assert_eq!(correct_counts, counts);
}
Expand Down Expand Up @@ -336,6 +347,7 @@ fn test_count_counts_lines() {

// one more than grapheme clusters because of \r\n
correct_counts.insert(Counter::CodePoints, 24);
correct_counts.insert(Counter::UnicodeWidth, 17);

assert_eq!(correct_counts, counts);
}
Expand All @@ -353,6 +365,7 @@ fn test_count_counts_words() {
correct_counts.insert(Counter::Bytes, i_can_eat_glass.len());
correct_counts.insert(Counter::Words, 9);
correct_counts.insert(Counter::CodePoints, 50);
correct_counts.insert(Counter::UnicodeWidth, 50);

assert_eq!(correct_counts, counts);
}
Expand Down

0 comments on commit 6dc71f5

Please sign in to comment.