diff --git a/minijinja-cli/Cargo.toml b/minijinja-cli/Cargo.toml index d6521e45..89b60f1c 100644 --- a/minijinja-cli/Cargo.toml +++ b/minijinja-cli/Cargo.toml @@ -46,7 +46,7 @@ minijinja = { version = "=2.5.0", path = "../minijinja", features = [ "custom_syntax", "loop_controls" ] } -minijinja-contrib = { version = "=2.5.0", optional = true, path = "../minijinja-contrib", features = ["pycompat", "datetime", "timezone", "rand", "unicode_wordwrap"] } +minijinja-contrib = { version = "=2.5.0", optional = true, path = "../minijinja-contrib", features = ["pycompat", "datetime", "timezone", "rand", "unicode_wordwrap", "wordcount"] } rustyline = { version = "14.0.0", optional = true } serde = { version = "1.0.183", features = ["derive", "rc"] } serde_json = "1.0.105" diff --git a/minijinja-contrib/Cargo.toml b/minijinja-contrib/Cargo.toml index 556614fc..2acb9f7b 100644 --- a/minijinja-contrib/Cargo.toml +++ b/minijinja-contrib/Cargo.toml @@ -21,6 +21,7 @@ pycompat = ["minijinja/builtins"] datetime = ["time"] timezone = ["time-tz"] rand = ["dep:rand"] +wordcount = ["unicode_categories"] wordwrap = ["textwrap"] unicode_wordwrap = ["wordwrap", "textwrap/unicode-linebreak", "textwrap/unicode-width"] @@ -31,6 +32,7 @@ serde = "1.0.164" textwrap = { version = "0.16.1", optional = true, default-features = false, features = ["smawk"] } time = { version = "0.3.35", optional = true, features = ["serde", "formatting", "parsing"] } time-tz = { version = "1.0.3", features = ["db"], optional = true } +unicode_categories = { version = "0.1.1", optional = true} [dev-dependencies] insta = { version = "1.38.0", features = ["glob", "serde"] } diff --git a/minijinja-contrib/src/filters/mod.rs b/minijinja-contrib/src/filters/mod.rs index 9cdf001c..257c0888 100644 --- a/minijinja-contrib/src/filters/mod.rs +++ b/minijinja-contrib/src/filters/mod.rs @@ -220,14 +220,18 @@ pub fn truncate(state: &State, value: Value, kwargs: Kwargs) -> Result Result { + use unicode_categories::UnicodeCategories; + let s = value.as_str().unwrap_or_default(); let mut count: u32 = 0; let mut in_word = false; // Iterate through characters, counting transitions from non-word to word chars for c in s.chars() { - let is_word_char = c.is_alphanumeric() || c == '_'; + let is_word_char = c.is_letter() || c.is_numeric() || c == '_'; if is_word_char && !in_word { count += 1; in_word = true; diff --git a/minijinja-contrib/src/lib.rs b/minijinja-contrib/src/lib.rs index e56ddbcc..d1a9e11f 100644 --- a/minijinja-contrib/src/lib.rs +++ b/minijinja-contrib/src/lib.rs @@ -35,7 +35,10 @@ pub fn add_to_environment(env: &mut Environment) { env.add_filter("pluralize", filters::pluralize); env.add_filter("filesizeformat", filters::filesizeformat); env.add_filter("truncate", filters::truncate); - env.add_filter("wordcount", filters::wordcount); + #[cfg(feature = "wordcount")] + { + env.add_filter("wordcount", filters::wordcount); + } #[cfg(feature = "wordwrap")] { env.add_filter("wordwrap", filters::wordwrap); diff --git a/minijinja-contrib/tests/filters.rs b/minijinja-contrib/tests/filters.rs index 3649b8d8..42275ff6 100644 --- a/minijinja-contrib/tests/filters.rs +++ b/minijinja-contrib/tests/filters.rs @@ -266,6 +266,18 @@ fn test_wordcount() { .unwrap(), "3" ); + + // Test unicode marks + assert_eq!( + env.render_str( + "{{ text|wordcount }}", + context! { + text => "helloाworld" + } + ) + .unwrap(), + "2" + ); } #[test]