From 24edf8a1f667410f921571a941a249b9ff9ccde3 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 27 Oct 2024 00:03:19 +0200 Subject: [PATCH 1/2] Added better docs on bytes and unified implicit string conversions --- minijinja/src/value/argtypes.rs | 1 + minijinja/src/value/mod.rs | 35 ++++++++++++++++++++++++++++++--- minijinja/src/value/ops.rs | 6 ++++++ minijinja/tests/test_value.rs | 9 +++++++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/minijinja/src/value/argtypes.rs b/minijinja/src/value/argtypes.rs index bbc94c63..9eade61f 100644 --- a/minijinja/src/value/argtypes.rs +++ b/minijinja/src/value/argtypes.rs @@ -493,6 +493,7 @@ impl TryFrom for Arc { match value.0 { ValueRepr::String(x, _) => Ok(x), ValueRepr::SmallStr(x) => Ok(Arc::from(x.as_str())), + ValueRepr::Bytes(ref x) => Ok(Arc::from(String::from_utf8_lossy(x))), _ => Err(Error::new( ErrorKind::InvalidOperation, "value is not a string", diff --git a/minijinja/src/value/mod.rs b/minijinja/src/value/mod.rs index de289c68..adbf3200 100644 --- a/minijinja/src/value/mod.rs +++ b/minijinja/src/value/mod.rs @@ -178,10 +178,28 @@ let vec = Vec::::deserialize(value).unwrap(); //! //! It's generally recommende to ignore the existence of invalid objects and let them //! fail naturally as they are encountered. +//! +//! # Notes on Bytes and Strings +//! +//! Usually one would pass strings to templates as Jinja is entirely based on string +//! rendering. However there are situations where it can be useful to pass bytes instead. +//! As such MiniJinja allows a value type to carry bytes even though there is no syntax +//! within the template language to create a byte literal. +//! +//! When rendering bytes as strings, MiniJinja will attempt to interpret them as +//! lossy utf-8. This is a bit different to Jinja2 which in Python 3 stopped +//! rendering byte strings as strings. This is an intentional change that was +//! deemed acceptable given how infrequently bytes are used but how relatively +//! commonly bytes are often holding "almost utf-8" in templates. Most +//! conversions to strings also will do almost the same. The debug rendering of +//! bytes however is different and bytes are not iterable. Like strings however +//! they can be sliced and indexed, but they will be sliced by bytes and not by +//! characters. // this module is based on the content module in insta which in turn is based // on the content module in serde::private::ser. +use core::str; use std::cell::{Cell, RefCell}; use std::cmp::Ordering; use std::collections::BTreeMap; @@ -1078,19 +1096,25 @@ impl Value { } /// If the value is a string, return it. + /// + /// This will also perform a lossy string conversion of bytes from utf-8. pub fn to_str(&self) -> Option> { match &self.0 { ValueRepr::String(ref s, _) => Some(s.clone()), ValueRepr::SmallStr(ref s) => Some(Arc::from(s.as_str())), + ValueRepr::Bytes(ref b) => Some(Arc::from(String::from_utf8_lossy(b))), _ => None, } } /// If the value is a string, return it. + /// + /// This will also return well formed utf-8 bytes as string. pub fn as_str(&self) -> Option<&str> { match &self.0 { ValueRepr::String(ref s, _) => Some(s as &str), ValueRepr::SmallStr(ref s) => Some(s.as_str()), + ValueRepr::Bytes(ref b) => str::from_utf8(b).ok(), _ => None, } } @@ -1140,6 +1164,7 @@ impl Value { match self.0 { ValueRepr::String(ref s, _) => Some(s.chars().count()), ValueRepr::SmallStr(ref s) => Some(s.as_str().chars().count()), + ValueRepr::Bytes(ref b) => Some(b.len()), ValueRepr::Object(ref dy) => dy.enumerator_len(), _ => None, } @@ -1285,9 +1310,9 @@ impl Value { // TODO: add small str optimization here Some(Value::from(s.as_str().chars().rev().collect::())) } - ValueRepr::Bytes(ref b) => { - Some(Value::from(b.iter().rev().copied().collect::>())) - } + ValueRepr::Bytes(ref b) => Some(Value::from_bytes( + b.iter().rev().copied().collect::>(), + )), ValueRepr::Object(ref o) => match o.enumerate() { Enumerator::NonEnumerable => None, Enumerator::Empty => Some(Value::make_iterable(|| None::.into_iter())), @@ -1422,6 +1447,10 @@ impl Value { let idx = some!(index(key, || Some(s.as_str().chars().count()))); s.as_str().chars().nth(idx).map(Value::from) } + ValueRepr::Bytes(ref b) => { + let idx = some!(index(key, || Some(b.len()))); + b.get(idx).copied().map(Value::from) + } _ => None, } } diff --git a/minijinja/src/value/ops.rs b/minijinja/src/value/ops.rs index ae0d4a45..082a4d32 100644 --- a/minijinja/src/value/ops.rs +++ b/minijinja/src/value/ops.rs @@ -128,6 +128,12 @@ pub fn slice(value: Value, start: Value, stop: Value, step: Value) -> Result(), )) } + ValueRepr::Bytes(ref b) => { + let (start, len) = get_offset_and_len(start, stop, || b.len()); + Ok(Value::from_bytes( + b.get(start..start + len).unwrap_or_default().to_owned(), + )) + } ValueRepr::Undefined | ValueRepr::None => Ok(Value::from(Vec::::new())), ValueRepr::Object(obj) if matches!(obj.repr(), ObjectRepr::Seq | ObjectRepr::Iterable) => { Ok(Value::make_object_iterable(obj, move |obj| { diff --git a/minijinja/tests/test_value.rs b/minijinja/tests/test_value.rs index 051f6a42..588926a9 100644 --- a/minijinja/tests/test_value.rs +++ b/minijinja/tests/test_value.rs @@ -1195,6 +1195,15 @@ fn test_bytes() { assert_eq!(byte_value.kind(), ValueKind::Bytes); assert!(byte_value.try_iter().is_err()); assert_eq!(format!("{:?}", byte_value), "b'\\x01\\x02\\x03\\x04'"); + assert_eq!(byte_value.get_item_by_index(0).ok(), Some(Value::from(1))); + assert_eq!( + byte_value.reverse().ok(), + Some(Value::from_bytes(vec![4, 3, 2, 1])) + ); + assert_eq!( + render!("{{ x[1:-1] }}", x => Value::from_bytes(vec![1, 76, 65, 4])), + "LA" + ); let bytes = vec![1u8, 2, 3, 4]; let not_byte_value = Value::from(bytes); From 788666e88ca8d7f8890047223d58210c5f23f3af Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 27 Oct 2024 00:04:29 +0200 Subject: [PATCH 2/2] Added changelog entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ede0b12a..7e1e385e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ All notable changes to MiniJinja are documented here. `Value::from_bytes` without having to go via serde, and they are now producing a nicer looking debug output. #616 - Added the missing `string` filter from Jinja2. #617 +- Reversing bytes and convergint them implicitly to strings will now work + more consistently. #619 ## 2.4.0