Skip to content

Commit

Permalink
Fixes incorrect result when the string includes unicode marks
Browse files Browse the repository at this point in the history
  • Loading branch information
Yassir Barchi committed Dec 2, 2024
1 parent 4249fb3 commit 882d64f
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 3 deletions.
2 changes: 1 addition & 1 deletion minijinja-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ minijinja = { version = "=2.5.0", path = "../minijinja", features = [
"custom_syntax",
"loop_controls"
] }
minijinja-contrib = { version = "=2.5.0", optional = true, path = "../minijinja-contrib", features = ["pycompat", "datetime", "timezone", "rand", "unicode_wordwrap"] }
minijinja-contrib = { version = "=2.5.0", optional = true, path = "../minijinja-contrib", features = ["pycompat", "datetime", "timezone", "rand", "unicode_wordwrap", "wordcount"] }
rustyline = { version = "14.0.0", optional = true }
serde = { version = "1.0.183", features = ["derive", "rc"] }
serde_json = "1.0.105"
Expand Down
2 changes: 2 additions & 0 deletions minijinja-contrib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pycompat = ["minijinja/builtins"]
datetime = ["time"]
timezone = ["time-tz"]
rand = ["dep:rand"]
wordcount = ["unicode_categories"]
wordwrap = ["textwrap"]
unicode_wordwrap = ["wordwrap", "textwrap/unicode-linebreak", "textwrap/unicode-width"]

Expand All @@ -31,6 +32,7 @@ serde = "1.0.164"
textwrap = { version = "0.16.1", optional = true, default-features = false, features = ["smawk"] }
time = { version = "0.3.35", optional = true, features = ["serde", "formatting", "parsing"] }
time-tz = { version = "1.0.3", features = ["db"], optional = true }
unicode_categories = { version = "0.1.1", optional = true}

[dev-dependencies]
insta = { version = "1.38.0", features = ["glob", "serde"] }
Expand Down
6 changes: 5 additions & 1 deletion minijinja-contrib/src/filters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,18 @@ pub fn truncate(state: &State, value: Value, kwargs: Kwargs) -> Result<String, E
/// ```jinja
/// {{ "Hello world!"|wordcount }}
/// ```
#[cfg(feature = "wordcount")]
#[cfg_attr(docsrs, doc(cfg(feature = "wordcount")))]
pub fn wordcount(value: Value) -> Result<Value, Error> {
use unicode_categories::UnicodeCategories;

let s = value.as_str().unwrap_or_default();
let mut count: u32 = 0;
let mut in_word = false;

// Iterate through characters, counting transitions from non-word to word chars
for c in s.chars() {
let is_word_char = c.is_alphanumeric() || c == '_';
let is_word_char = c.is_letter() || c.is_numeric() || c == '_';
if is_word_char && !in_word {
count += 1;
in_word = true;
Expand Down
5 changes: 4 additions & 1 deletion minijinja-contrib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ pub fn add_to_environment(env: &mut Environment) {
env.add_filter("pluralize", filters::pluralize);
env.add_filter("filesizeformat", filters::filesizeformat);
env.add_filter("truncate", filters::truncate);
env.add_filter("wordcount", filters::wordcount);
#[cfg(feature = "wordcount")]
{
env.add_filter("wordcount", filters::wordcount);
}
#[cfg(feature = "wordwrap")]
{
env.add_filter("wordwrap", filters::wordwrap);
Expand Down
12 changes: 12 additions & 0 deletions minijinja-contrib/tests/filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,18 @@ fn test_wordcount() {
.unwrap(),
"3"
);

// Test unicode marks
assert_eq!(
env.render_str(
"{{ text|wordcount }}",
context! {
text => "helloाworld"
}
)
.unwrap(),
"2"
);
}

#[test]
Expand Down

0 comments on commit 882d64f

Please sign in to comment.