From e432a2c2c488cbde6009bc3326a1d2a9725e6e3c Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 19 Mar 2024 10:16:50 +0000 Subject: [PATCH] support for json cow --- Cargo.lock | 4 +- Cargo.toml | 2 +- src/errors/line_error.rs | 2 +- src/errors/location.rs | 7 +++ src/input/input_abstract.rs | 2 +- src/input/input_json.rs | 100 ++++++++++++++++++------------------ src/input/input_python.rs | 2 +- src/input/input_string.rs | 2 +- src/input/return_enums.rs | 38 ++++++++++---- src/lookup_key.rs | 18 +++---- src/validators/generator.rs | 4 +- 11 files changed, 102 insertions(+), 79 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 667d15b1b..88d00f3c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,9 +148,9 @@ checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" [[package]] name = "jiter" -version = "0.0.7" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a1b6e316923afd3087ec73829f646a67c18f3a5bd61624247b05e652e4a99d" +checksum = "a9cbc4bba9fee7e90f7ab23d53caa097007c6b870b2ca0a33334e774eb34c1ff" dependencies = [ "ahash", "hashbrown", diff --git a/Cargo.toml b/Cargo.toml index ff97d1dbf..a3762466e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ base64 = "0.21.7" num-bigint = "0.4.4" python3-dll-a = "0.2.7" uuid = "1.7.0" -jiter = {version = "0.0.7", features = ["python"]} +jiter = { version = "0.1.0", features = ["python"] } [lib] name = "_pydantic_core" diff --git a/src/errors/line_error.rs b/src/errors/line_error.rs index 9a9d6ef6f..c3d2b66dc 100644 --- a/src/errors/line_error.rs +++ b/src/errors/line_error.rs @@ -151,7 +151,7 @@ impl ValLineError { #[derive(Clone)] pub enum InputValue { Python(PyObject), - Json(JsonValue), + Json(JsonValue<'static>), } impl ToPyObject for InputValue { diff --git a/src/errors/location.rs b/src/errors/location.rs index 138d327ce..07e1623d7 100644 --- a/src/errors/location.rs +++ b/src/errors/location.rs @@ -1,5 +1,6 @@ use pyo3::exceptions::PyTypeError; use pyo3::sync::GILOnceCell; +use std::borrow::Cow; use std::fmt; use pyo3::prelude::*; @@ -52,6 +53,12 @@ impl From<&str> for LocItem { } } +impl From> for LocItem { + fn from(s: Cow<'_, str>) -> Self { + Self::S(s.into_owned()) + } +} + impl From for LocItem { fn from(i: i64) -> Self { Self::I(i) diff --git a/src/input/input_abstract.rs b/src/input/input_abstract.rs index a5ae2a6be..7e6649603 100644 --- a/src/input/input_abstract.rs +++ b/src/input/input_abstract.rs @@ -180,7 +180,7 @@ pub trait Input<'py>: fmt::Debug + ToPyObject { fn validate_frozenset(&self, strict: bool) -> ValMatch>; - fn validate_iter(&self) -> ValResult; + fn validate_iter(&self) -> ValResult>; fn validate_date(&self, strict: bool) -> ValMatch>; diff --git a/src/input/input_json.rs b/src/input/input_json.rs index 1a79f1ea6..f2bf74998 100644 --- a/src/input/input_json.rs +++ b/src/input/input_json.rs @@ -24,26 +24,26 @@ use super::{ }; /// This is required but since JSON object keys are always strings, I don't think it can be called -impl From<&JsonValue> for LocItem { +impl From<&JsonValue<'_>> for LocItem { fn from(json_value: &JsonValue) -> Self { match json_value { JsonValue::Int(i) => (*i).into(), - JsonValue::Str(s) => s.as_str().into(), + JsonValue::Str(s) => s.clone().into(), v => format!("{v:?}").into(), } } } -impl From for LocItem { +impl From> for LocItem { fn from(json_value: JsonValue) -> Self { (&json_value).into() } } -impl<'py> Input<'py> for JsonValue { +impl<'py, 'data> Input<'py> for JsonValue<'data> { fn as_error_value(&self) -> InputValue { // cloning JsonValue is cheap due to use of Arc - InputValue::Json(self.clone()) + InputValue::Json(self.to_static()) } fn is_none(&self) -> bool { @@ -63,11 +63,11 @@ impl<'py> Input<'py> for JsonValue { } } - type Arguments<'a> = JsonArgs<'a> + type Arguments<'a> = JsonArgs<'a, 'data> where Self: 'a,; - fn validate_args(&self) -> ValResult> { + fn validate_args(&self) -> ValResult> { match self { JsonValue::Object(object) => Ok(JsonArgs::new(None, Some(object))), JsonValue::Array(array) => Ok(JsonArgs::new(Some(array), None)), @@ -75,7 +75,7 @@ impl<'py> Input<'py> for JsonValue { } } - fn validate_dataclass_args<'a>(&'a self, class_name: &str) -> ValResult> { + fn validate_dataclass_args<'a>(&'a self, class_name: &str) -> ValResult> { match self { JsonValue::Object(object) => Ok(JsonArgs::new(None, Some(object))), _ => { @@ -98,7 +98,7 @@ impl<'py> Input<'py> for JsonValue { // TODO: in V3 we may want to make JSON str always win if in union, for consistency, // see https://github.com/pydantic/pydantic-core/pull/867#discussion_r1386582501 match self { - JsonValue::Str(s) => Ok(ValidationMatch::strict(s.as_str().into())), + JsonValue::Str(s) => Ok(ValidationMatch::strict(s.as_ref().into())), JsonValue::Int(i) if !strict && coerce_numbers_to_str => Ok(ValidationMatch::lax(i.to_string().into())), JsonValue::BigInt(b) if !strict && coerce_numbers_to_str => Ok(ValidationMatch::lax(b.to_string().into())), JsonValue::Float(f) if !strict && coerce_numbers_to_str => Ok(ValidationMatch::lax(f.to_string().into())), @@ -142,7 +142,7 @@ impl<'py> Input<'py> for JsonValue { fn exact_str(&self) -> ValResult> { match self { - JsonValue::Str(s) => Ok(s.as_str().into()), + JsonValue::Str(s) => Ok(s.as_ref().into()), _ => Err(ValError::new(ErrorTypeDefaults::StringType, self)), } } @@ -168,7 +168,7 @@ impl<'py> Input<'py> for JsonValue { } } - type Dict<'a> = &'a JsonObject; + type Dict<'a> = &'a JsonObject<'data> where Self: 'a; fn validate_dict(&self, _strict: bool) -> ValResult> { match self { @@ -181,18 +181,18 @@ impl<'py> Input<'py> for JsonValue { self.validate_dict(false) } - type List<'a> = &'a JsonArray; + type List<'a> = &'a JsonArray<'data> where Self: 'a; - fn validate_list(&self, _strict: bool) -> ValMatch<&JsonArray> { + fn validate_list(&self, _strict: bool) -> ValMatch<&JsonArray<'data>> { match self { JsonValue::Array(a) => Ok(ValidationMatch::exact(a)), _ => Err(ValError::new(ErrorTypeDefaults::ListType, self)), } } - type Tuple<'a> = &'a JsonArray; + type Tuple<'a> = &'a JsonArray<'data> where Self: 'a; - fn validate_tuple(&self, _strict: bool) -> ValMatch<&JsonArray> { + fn validate_tuple(&self, _strict: bool) -> ValMatch<&JsonArray<'data>> { // just as in set's case, List has to be allowed match self { JsonValue::Array(a) => Ok(ValidationMatch::strict(a)), @@ -200,9 +200,9 @@ impl<'py> Input<'py> for JsonValue { } } - type Set<'a> = &'a JsonArray; + type Set<'a> = &'a JsonArray<'data> where Self: 'a; - fn validate_set(&self, _strict: bool) -> ValMatch<&JsonArray> { + fn validate_set(&self, _strict: bool) -> ValMatch<&JsonArray<'data>> { // we allow a list here since otherwise it would be impossible to create a set from JSON match self { JsonValue::Array(a) => Ok(ValidationMatch::strict(a)), @@ -210,7 +210,7 @@ impl<'py> Input<'py> for JsonValue { } } - fn validate_frozenset(&self, _strict: bool) -> ValMatch<&JsonArray> { + fn validate_frozenset(&self, _strict: bool) -> ValMatch<&JsonArray<'data>> { // we allow a list here since otherwise it would be impossible to create a frozenset from JSON match self { JsonValue::Array(a) => Ok(ValidationMatch::strict(a)), @@ -218,14 +218,14 @@ impl<'py> Input<'py> for JsonValue { } } - fn validate_iter(&self) -> ValResult { + fn validate_iter(&self) -> ValResult> { match self { - JsonValue::Array(a) => Ok(a.clone().into()), + JsonValue::Array(a) => Ok(GenericIterator::from(a.clone()).into_static()), JsonValue::Str(s) => Ok(string_to_vec(s).into()), JsonValue::Object(object) => { // return keys iterator to match python's behavior let keys: JsonArray = JsonArray::new(object.keys().map(|k| JsonValue::Str(k.clone())).collect()); - Ok(keys.into()) + Ok(GenericIterator::from(keys).into_static()) } _ => Err(ValError::new(ErrorTypeDefaults::IterableType, self)), } @@ -303,7 +303,7 @@ impl<'py> Input<'py> for str { fn as_error_value(&self) -> InputValue { // Justification for the clone: this is on the error pathway and we are generally ok // with errors having a performance penalty - InputValue::Json(JsonValue::Str(self.to_owned())) + InputValue::Json(JsonValue::Str(self.to_owned().into())) } fn as_kwargs(&self, _py: Python<'py>) -> Option> { @@ -394,7 +394,7 @@ impl<'py> Input<'py> for str { Err(ValError::new(ErrorTypeDefaults::SetType, self)) } - fn validate_iter(&self) -> ValResult { + fn validate_iter(&self) -> ValResult> { Ok(string_to_vec(self).into()) } @@ -441,21 +441,21 @@ impl BorrowInput<'_> for String { } } -impl BorrowInput<'_> for JsonValue { - type Input = JsonValue; +impl<'data> BorrowInput<'_> for JsonValue<'data> { + type Input = JsonValue<'data>; fn borrow_input(&self) -> &Self::Input { self } } -fn string_to_vec(s: &str) -> JsonArray { - JsonArray::new(s.chars().map(|c| JsonValue::Str(c.to_string())).collect()) +fn string_to_vec(s: &str) -> JsonArray<'static> { + JsonArray::new(s.chars().map(|c| JsonValue::Str(c.to_string().into())).collect()) } -impl<'py> ValidatedDict<'py> for &'_ JsonObject { +impl<'py, 'data> ValidatedDict<'py> for &'_ JsonObject<'data> { type Key<'a> = &'a str where Self: 'a; - type Item<'a> = &'a JsonValue where Self: 'a; + type Item<'a> = &'a JsonValue<'data> where Self: 'a; fn get_item<'k>(&self, key: &'k LookupKey) -> ValResult)>> { key.json_get(self) @@ -469,12 +469,12 @@ impl<'py> ValidatedDict<'py> for &'_ JsonObject { &'a self, consumer: impl ConsumeIterator, Self::Item<'a>)>, Output = R>, ) -> ValResult { - Ok(consumer.consume_iterator(LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_str(), v))))) + Ok(consumer.consume_iterator(LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v))))) } } -impl<'a, 'py> ValidatedList<'py> for &'a JsonArray { - type Item = &'a JsonValue; +impl<'a, 'py, 'data> ValidatedList<'py> for &'a JsonArray<'data> { + type Item = &'a JsonValue<'data>; fn len(&self) -> Option { Some(SmallVec::len(self)) @@ -487,8 +487,8 @@ impl<'a, 'py> ValidatedList<'py> for &'a JsonArray { } } -impl<'a, 'py> ValidatedTuple<'py> for &'a JsonArray { - type Item = &'a JsonValue; +impl<'a, 'data> ValidatedTuple<'_> for &'a JsonArray<'data> { + type Item = &'a JsonValue<'data>; fn len(&self) -> Option { Some(SmallVec::len(self)) @@ -498,8 +498,8 @@ impl<'a, 'py> ValidatedTuple<'py> for &'a JsonArray { } } -impl<'a, 'py> ValidatedSet<'py> for &'a JsonArray { - type Item = &'a JsonValue; +impl<'a, 'data> ValidatedSet<'_> for &'a JsonArray<'data> { + type Item = &'a JsonValue<'data>; fn iterate(self, consumer: impl ConsumeIterator, Output = R>) -> ValResult { Ok(consumer.consume_iterator(self.iter().map(Ok))) @@ -507,20 +507,20 @@ impl<'a, 'py> ValidatedSet<'py> for &'a JsonArray { } #[cfg_attr(debug_assertions, derive(Debug))] -pub struct JsonArgs<'a> { - args: Option<&'a [JsonValue]>, - kwargs: Option<&'a JsonObject>, +pub struct JsonArgs<'a, 'data> { + args: Option<&'a [JsonValue<'data>]>, + kwargs: Option<&'a JsonObject<'data>>, } -impl<'a> JsonArgs<'a> { - fn new(args: Option<&'a [JsonValue]>, kwargs: Option<&'a JsonObject>) -> Self { +impl<'a, 'data> JsonArgs<'a, 'data> { + fn new(args: Option<&'a [JsonValue<'data>]>, kwargs: Option<&'a JsonObject<'data>>) -> Self { Self { args, kwargs } } } -impl<'a> Arguments<'_> for JsonArgs<'a> { - type Args = [JsonValue]; - type Kwargs = JsonObject; +impl<'a, 'data> Arguments<'_> for JsonArgs<'a, 'data> { + type Args = [JsonValue<'data>]; + type Kwargs = JsonObject<'data>; fn args(&self) -> Option<&Self::Args> { self.args @@ -531,8 +531,8 @@ impl<'a> Arguments<'_> for JsonArgs<'a> { } } -impl PositionalArgs<'_> for [JsonValue] { - type Item<'a> = &'a JsonValue; +impl<'data> PositionalArgs<'_> for [JsonValue<'data>] { + type Item<'a> = &'a JsonValue<'data> where Self: 'a; fn len(&self) -> usize { <[JsonValue]>::len(self) @@ -545,9 +545,9 @@ impl PositionalArgs<'_> for [JsonValue] { } } -impl KeywordArgs<'_> for JsonObject { - type Key<'a> = &'a str; - type Item<'a> = &'a JsonValue; +impl<'data> KeywordArgs<'_> for JsonObject<'data> { + type Key<'a> = &'a str where Self: 'a; + type Item<'a> = &'a JsonValue<'data> where Self: 'a; fn len(&self) -> usize { LazyIndexMap::len(self) @@ -556,6 +556,6 @@ impl KeywordArgs<'_> for JsonObject { key.json_get(self) } fn iter(&self) -> impl Iterator, Self::Item<'_>)>> { - LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_str(), v))) + LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v))) } } diff --git a/src/input/input_python.rs b/src/input/input_python.rs index fe00ee1f9..f59dea1f8 100644 --- a/src/input/input_python.rs +++ b/src/input/input_python.rs @@ -475,7 +475,7 @@ impl<'py> Input<'py> for Bound<'py, PyAny> { Err(ValError::new(ErrorTypeDefaults::FrozenSetType, self)) } - fn validate_iter(&self) -> ValResult { + fn validate_iter(&self) -> ValResult> { if self.iter().is_ok() { Ok(self.into()) } else { diff --git a/src/input/input_string.rs b/src/input/input_string.rs index 520bbba61..3c61cdebc 100644 --- a/src/input/input_string.rs +++ b/src/input/input_string.rs @@ -171,7 +171,7 @@ impl<'py> Input<'py> for StringMapping<'py> { Err(ValError::new(ErrorTypeDefaults::FrozenSetType, self)) } - fn validate_iter(&self) -> ValResult { + fn validate_iter(&self) -> ValResult> { Err(ValError::new(ErrorTypeDefaults::IterableType, self)) } diff --git a/src/input/return_enums.rs b/src/input/return_enums.rs index b4a699735..753ace478 100644 --- a/src/input/return_enums.rs +++ b/src/input/return_enums.rs @@ -321,20 +321,29 @@ pub(crate) fn iterate_attributes<'a, 'py>( }) } -#[derive(Debug, Clone)] -pub enum GenericIterator { +#[derive(Debug)] +pub enum GenericIterator<'data> { PyIterator(GenericPyIterator), - JsonArray(GenericJsonIterator), + JsonArray(GenericJsonIterator<'data>), } -impl From for GenericIterator { - fn from(array: JsonArray) -> Self { +impl GenericIterator<'_> { + pub(crate) fn into_static(self) -> GenericIterator<'static> { + match self { + GenericIterator::PyIterator(iter) => GenericIterator::PyIterator(iter), + GenericIterator::JsonArray(iter) => GenericIterator::JsonArray(iter.into_static()), + } + } +} + +impl<'data> From> for GenericIterator<'data> { + fn from(array: JsonArray<'data>) -> Self { let json_iter = GenericJsonIterator { array, index: 0 }; Self::JsonArray(json_iter) } } -impl From<&Bound<'_, PyAny>> for GenericIterator { +impl From<&Bound<'_, PyAny>> for GenericIterator<'_> { fn from(obj: &Bound<'_, PyAny>) -> Self { let py_iter = GenericPyIterator { obj: obj.clone().into(), @@ -377,13 +386,13 @@ impl GenericPyIterator { } #[derive(Debug, Clone)] -pub struct GenericJsonIterator { - array: JsonArray, +pub struct GenericJsonIterator<'data> { + array: JsonArray<'data>, index: usize, } -impl GenericJsonIterator { - pub fn next(&mut self, _py: Python) -> PyResult> { +impl<'data> GenericJsonIterator<'data> { + pub fn next(&mut self, _py: Python) -> PyResult, usize)>> { if self.index < self.array.len() { // panic here is impossible due to bounds check above; compiler should be // able to optimize it away even @@ -397,12 +406,19 @@ impl GenericJsonIterator { } pub fn input_as_error_value(&self, _py: Python<'_>) -> InputValue { - InputValue::Json(JsonValue::Array(self.array.clone())) + InputValue::Json(JsonValue::Array(self.array.clone()).into_static()) } pub fn index(&self) -> usize { self.index } + + pub fn into_static(self) -> GenericJsonIterator<'static> { + GenericJsonIterator { + array: JsonArray::new(self.array.iter().map(JsonValue::to_static).collect()), + index: self.index, + } + } } #[cfg_attr(debug_assertions, derive(Debug))] diff --git a/src/lookup_key.rs b/src/lookup_key.rs index f1ccfa142..bacdb5fd1 100644 --- a/src/lookup_key.rs +++ b/src/lookup_key.rs @@ -260,12 +260,12 @@ impl LookupKey { } } - pub fn json_get<'data, 's>( + pub fn json_get<'a, 'data, 's>( &'s self, - dict: &'data JsonObject, - ) -> ValResult> { + dict: &'a JsonObject<'data>, + ) -> ValResult)>> { match self { - Self::Simple { key, path, .. } => match dict.get(key) { + Self::Simple { key, path, .. } => match dict.get(key.as_str()) { Some(value) => Ok(Some((path, value))), None => Ok(None), }, @@ -275,9 +275,9 @@ impl LookupKey { key2, path2, .. - } => match dict.get(key1) { + } => match dict.get(key1.as_str()) { Some(value) => Ok(Some((path1, value))), - None => match dict.get(key2) { + None => match dict.get(key2.as_str()) { Some(value) => Ok(Some((path2, value))), None => Ok(None), }, @@ -475,7 +475,7 @@ impl PathItem { } } - pub fn json_get<'a>(&self, any_json: &'a JsonValue) -> Option<&'a JsonValue> { + pub fn json_get<'a, 'data>(&self, any_json: &'a JsonValue<'data>) -> Option<&'a JsonValue<'data>> { match any_json { JsonValue::Object(v_obj) => self.json_obj_get(v_obj), JsonValue::Array(v_array) => match self { @@ -493,9 +493,9 @@ impl PathItem { } } - pub fn json_obj_get<'a>(&self, json_obj: &'a JsonObject) -> Option<&'a JsonValue> { + pub fn json_obj_get<'a, 'data>(&self, json_obj: &'a JsonObject<'data>) -> Option<&'a JsonValue<'data>> { match self { - Self::S(key, _) => json_obj.get(key), + Self::S(key, _) => json_obj.get(key.as_str()), _ => None, } } diff --git a/src/validators/generator.rs b/src/validators/generator.rs index f73532268..f7c932397 100644 --- a/src/validators/generator.rs +++ b/src/validators/generator.rs @@ -65,7 +65,7 @@ impl Validator for GeneratorValidator { input: &(impl Input<'py> + ?Sized), state: &mut ValidationState<'_, 'py>, ) -> ValResult { - let iterator = input.validate_iter()?; + let iterator = input.validate_iter()?.into_static(); let validator = self.item_validator.as_ref().map(|v| { InternalValidator::new( py, @@ -96,7 +96,7 @@ impl Validator for GeneratorValidator { #[pyclass(module = "pydantic_core._pydantic_core")] #[derive(Debug)] struct ValidatorIterator { - iterator: GenericIterator, + iterator: GenericIterator<'static>, validator: Option, min_length: Option, max_length: Option,