From ab07079d091db6139817db6a6c79c74ee294ae43 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 20 Dec 2022 12:30:42 +0100 Subject: [PATCH 01/18] init: framework crate --- packages/libs/deer/Cargo.toml | 2 +- packages/libs/deer/desert/Cargo.toml | 10 ++++++++++ packages/libs/deer/desert/README.md | 6 ++++++ packages/libs/deer/desert/src/lib.rs | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 packages/libs/deer/desert/Cargo.toml create mode 100644 packages/libs/deer/desert/README.md create mode 100644 packages/libs/deer/desert/src/lib.rs diff --git a/packages/libs/deer/Cargo.toml b/packages/libs/deer/Cargo.toml index 5034bb922b9..828796915d8 100644 --- a/packages/libs/deer/Cargo.toml +++ b/packages/libs/deer/Cargo.toml @@ -25,4 +25,4 @@ std = ['serde/std', 'error-stack/std'] arbitrary-precision = [] [workspace] -members = ['.', 'macros', 'json'] +members = ['.', 'macros', 'json', 'desert'] diff --git a/packages/libs/deer/desert/Cargo.toml b/packages/libs/deer/desert/Cargo.toml new file mode 100644 index 00000000000..58bf5d7be83 --- /dev/null +++ b/packages/libs/deer/desert/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "deer-desert" +version = "0.0.0" +edition = "2021" +# NOTE: THIS PACKAGE IS NEVER INTENDED TO BE PUBLISHED +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/packages/libs/deer/desert/README.md b/packages/libs/deer/desert/README.md new file mode 100644 index 00000000000..6e229ad5b71 --- /dev/null +++ b/packages/libs/deer/desert/README.md @@ -0,0 +1,6 @@ +# deer-desert + +desert is the the internal only deserialization testing framework used throughout the integration tests and should never +be published. + +`desert` = `deser` (`deserialization`) + `t` (`test`) diff --git a/packages/libs/deer/desert/src/lib.rs b/packages/libs/deer/desert/src/lib.rs new file mode 100644 index 00000000000..7d12d9af819 --- /dev/null +++ b/packages/libs/deer/desert/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} From 225619f4aac2b597866c577a95cc79936464fad0 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 20 Dec 2022 19:56:44 +0100 Subject: [PATCH 02/18] feat: token + deserialize (initial) --- packages/libs/deer/desert/Cargo.toml | 2 + packages/libs/deer/desert/src/de.rs | 102 +++++++++++++++++++++++++ packages/libs/deer/desert/src/lib.rs | 7 ++ packages/libs/deer/desert/src/token.rs | 27 +++++++ packages/libs/deer/src/context.rs | 1 + 5 files changed, 139 insertions(+) create mode 100644 packages/libs/deer/desert/src/de.rs create mode 100644 packages/libs/deer/desert/src/token.rs diff --git a/packages/libs/deer/desert/Cargo.toml b/packages/libs/deer/desert/Cargo.toml index 58bf5d7be83..30a2cca8cf3 100644 --- a/packages/libs/deer/desert/Cargo.toml +++ b/packages/libs/deer/desert/Cargo.toml @@ -8,3 +8,5 @@ publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +deer = { path = ".." } +error-stack = { version = "0.2.4", default_features = false } diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs new file mode 100644 index 00000000000..89ef6a53819 --- /dev/null +++ b/packages/libs/deer/desert/src/de.rs @@ -0,0 +1,102 @@ +use alloc::borrow::ToOwned; + +use deer::{error::DeserializerError, Context, Visitor}; +use error_stack::Result; + +use crate::token::Token; + +macro_rules! forward { + ($($method:ident),*) => { + $( + fn $method(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_any(visitor) + } + )* + }; +} + +#[derive(Debug)] +pub struct Deserializer<'a, 'de> { + context: &'a Context, + tokens: &'de [Token], +} + +impl<'a, 'de> deer::Deserializer<'de> for Deserializer<'a, 'de> { + forward!( + deserialize_null, + deserialize_bool, + deserialize_number, + deserialize_char, + deserialize_string, + deserialize_str, + deserialize_bytes, + deserialize_bytes_buffer, + deserialize_array, + deserialize_object + ); + + fn context(&self) -> &Context { + self.context + } + + fn deserialize_any(mut self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let token = self.next(); + + match token { + Token::Bool(value) => visitor.visit_bool(value), + Token::Number(value) => visitor.visit_number(value.clone()), + Token::Char(value) => visitor.visit_char(value), + Token::Str(value) => visitor.visit_str(value), + Token::BorrowedStr(value) => visitor.visit_borrowed_str(value), + Token::String(value) => visitor.visit_string(value.to_owned()), + Token::Bytes(value) => visitor.visit_bytes(value), + Token::BorrowedBytes(value) => visitor.visit_borrowed_bytes(value), + Token::BytesBuf(value) => visitor.visit_bytes_buffer(value.to_vec()), + Token::Array { .. } => {} + Token::Object { .. } => {} + _ => { + panic!("Deserializer did not expect {token}"); + } + } + .change_context(DeserializerError) + } +} + +impl<'a, 'de> Deserializer<'a, 'de> { + pub fn new(tokens: &'de [Token], context: &'a Context) -> Self { + Self { tokens, context } + } + + fn peek_maybe(&self) -> Option { + self.tokens.first().copied() + } + + fn peek(&self) -> Token { + self.peek_maybe().expect("should have token to deserialize") + } + + fn next_maybe(&mut self) -> Option { + let (next, tokens) = self.tokens.split_first()?; + self.tokens = tokens; + + Some(*next) + } + + fn next(&mut self) -> Token { + self.next_maybe().expect("should have token to deserialize") + } + + pub fn remaining(&self) -> usize { + self.tokens.len() + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} diff --git a/packages/libs/deer/desert/src/lib.rs b/packages/libs/deer/desert/src/lib.rs index 7d12d9af819..427e024c689 100644 --- a/packages/libs/deer/desert/src/lib.rs +++ b/packages/libs/deer/desert/src/lib.rs @@ -1,3 +1,10 @@ +#![no_std] + +extern crate alloc; + +mod de; +mod token; + pub fn add(left: usize, right: usize) -> usize { left + right } diff --git a/packages/libs/deer/desert/src/token.rs b/packages/libs/deer/desert/src/token.rs new file mode 100644 index 00000000000..87e289e9de2 --- /dev/null +++ b/packages/libs/deer/desert/src/token.rs @@ -0,0 +1,27 @@ +use core::fmt::{Debug, Display, Formatter}; + +use deer::Number; + +// TODO: test +#[derive(Debug, Copy, Clone)] +pub enum Token { + Bool(bool), + Number(&'static Number), + Char(char), + Str(&'static str), + BorrowedStr(&'static str), + String(&'static str), + Bytes(&'static [u8]), + BorrowedBytes(&'static [u8]), + BytesBuf(&'static [u8]), + Array { length: Option }, + ArrayEnd, + Object { length: Option }, + ObjectEnd, +} + +impl Display for Token { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + Debug::fmt(self, f) + } +} diff --git a/packages/libs/deer/src/context.rs b/packages/libs/deer/src/context.rs index fd111cf68f9..09063e95c75 100644 --- a/packages/libs/deer/src/context.rs +++ b/packages/libs/deer/src/context.rs @@ -1,6 +1,7 @@ use alloc::{boxed::Box, collections::BTreeMap}; use core::any::{Any, TypeId}; +#[derive(Debug)] pub struct Context { inner: BTreeMap>, } From 03eb3bf5c19e579b14317fc9cfdea97257a57183 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 20 Dec 2022 21:22:37 +0100 Subject: [PATCH 03/18] feat: implement `ArrayAccess` --- packages/libs/deer/desert/src/de.rs | 148 +++++++++++++++++++++++++++- 1 file changed, 144 insertions(+), 4 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 89ef6a53819..f191d9d51ee 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -1,7 +1,10 @@ use alloc::borrow::ToOwned; -use deer::{error::DeserializerError, Context, Visitor}; -use error_stack::Result; +use deer::{ + error::{ArrayAccessError, DeserializerError}, + Context, Deserialize, Visitor, +}; +use error_stack::{Report, Result, ResultExt}; use crate::token::Token; @@ -24,7 +27,7 @@ pub struct Deserializer<'a, 'de> { tokens: &'de [Token], } -impl<'a, 'de> deer::Deserializer<'de> for Deserializer<'a, 'de> { +impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { forward!( deserialize_null, deserialize_bool, @@ -58,7 +61,7 @@ impl<'a, 'de> deer::Deserializer<'de> for Deserializer<'a, 'de> { Token::Bytes(value) => visitor.visit_bytes(value), Token::BorrowedBytes(value) => visitor.visit_borrowed_bytes(value), Token::BytesBuf(value) => visitor.visit_bytes_buffer(value.to_vec()), - Token::Array { .. } => {} + Token::Array { length } => visitor.visit_array(ArrayAccess::new(self, length)), Token::Object { .. } => {} _ => { panic!("Deserializer did not expect {token}"); @@ -100,3 +103,140 @@ impl<'a, 'de> Deserializer<'a, 'de> { self.tokens.is_empty() } } + +#[derive(Debug)] +struct DeserializerNone<'a> { + context: &'a Context, +} + +impl<'de> deer::Deserializer<'de> for DeserializerNone<'_> { + forward!( + deserialize_null, + deserialize_bool, + deserialize_number, + deserialize_char, + deserialize_string, + deserialize_str, + deserialize_bytes, + deserialize_bytes_buffer, + deserialize_array, + deserialize_object + ); + + fn context(&self) -> &Context { + self.context + } + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_none().change_context(DeserializerError) + } +} + +struct ArrayAccess<'a, 'b, 'de: 'a> { + deserializer: &'a mut Deserializer<'b, 'de>, + + dirty: bool, + length: Option, + remaining: Option, +} + +impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { + pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { + Self { + deserializer, + dirty: false, + length, + remaining: None, + } + } +} + +impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { + fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { + if self.dirty { + return Err( + Report::new(SetBoundedError::Dirty.into_error()).change_context(ArrayAccessError) + ); + } + + if self.remaining.is_some() { + return Err( + Report::new(SetBoundedError::CalledMultipleTimes.into_error()) + .change_context(ArrayAccessError), + ); + } + + self.remaining = Some(length); + + Ok(()) + } + + fn next(&mut self) -> Option> + where + T: Deserialize<'de>, + { + self.dirty = true; + + if matches!(self.deserializer.peek(), Token::ArrayEnd) { + // we have reached the ending, if `self.remaining` is set we use the `DeserializerNone` + // to deserialize any values that require `None` + if let Some(remaining) = &mut self.remaining { + if *remaining == 0 { + return None; + } + + *remaining = remaining.saturating_sub(1); + + let value = T::deserialize(DeserializerNone { + context: self.deserializer.context, + }); + + Some(value.change_context(ArrayAccessError)) + } else { + None + } + } else { + let value = T::deserialize(self.deserializer); + Some(value.change_context(ArrayAccessError)) + } + } + + fn size_hint(&self) -> Option { + self.length + } + + fn end(self) -> Result<(), ArrayAccessError> { + let mut result = Ok(()); + + // ensure that we consume the last token, if it is the wrong token error out + if !matches!(self.deserializer.peek(), Token::ArrayEnd) { + // TODO: error + result = Err(Report::new(ArrayAccessError)); + } + + self.deserializer.next(); + + if self.remaining.map_or(false, |remaining| remaining > 0) { + let error = Report::new(ArrayAccessError); + // TODO: error + match &mut result { + Err(result) => result.extend_one(error), + result => *result = Err(error), + } + } + + result + } +} + +struct ObjectAccess<'a, 'b, 'de: 'a> { + deserializer: &'a mut Deserializer<'b, 'de>, + + length: Option, + remaining: Option, +} + +impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> {} From cfa0506f39d5c6887738cb82b72686cc04bcab49 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 20 Dec 2022 21:25:22 +0100 Subject: [PATCH 04/18] feat: `ObjectAccess` skeleton --- packages/libs/deer/desert/src/de.rs | 45 +++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index f191d9d51ee..55bda4d1af9 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -1,7 +1,7 @@ use alloc::borrow::ToOwned; use deer::{ - error::{ArrayAccessError, DeserializerError}, + error::{ArrayAccessError, DeserializerError, ObjectAccessError}, Context, Deserialize, Visitor, }; use error_stack::{Report, Result, ResultExt}; @@ -62,7 +62,7 @@ impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { Token::BorrowedBytes(value) => visitor.visit_borrowed_bytes(value), Token::BytesBuf(value) => visitor.visit_bytes_buffer(value.to_vec()), Token::Array { length } => visitor.visit_array(ArrayAccess::new(self, length)), - Token::Object { .. } => {} + Token::Object { length } => visitor.visit_object(ObjectAccess::new(self, length)), _ => { panic!("Deserializer did not expect {token}"); } @@ -235,8 +235,47 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { struct ObjectAccess<'a, 'b, 'de: 'a> { deserializer: &'a mut Deserializer<'b, 'de>, + dirty: bool, length: Option, remaining: Option, } -impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> {} +impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { + pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { + Self { + deserializer, + dirty: false, + length, + remaining: None, + } + } +} + +impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { + fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { + todo!() + } + + fn value(&mut self, key: &str) -> Result + where + T: Deserialize<'de>, + { + todo!() + } + + fn next(&mut self) -> Option> + where + K: Deserialize<'de>, + V: Deserialize<'de>, + { + todo!() + } + + fn size_hint(&self) -> Option { + todo!() + } + + fn end(self) -> Result<(), ObjectAccessError> { + todo!() + } +} From b09de56587b6ea678804076e89946a66137b0a61 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Tue, 20 Dec 2022 21:49:27 +0100 Subject: [PATCH 05/18] feat: `peek` ignore trivia --- packages/libs/deer/desert/src/de.rs | 132 ++++++++++++++++++++++++- packages/libs/deer/desert/src/token.rs | 1 + 2 files changed, 129 insertions(+), 4 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 55bda4d1af9..6470e2b4449 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -1,4 +1,5 @@ use alloc::borrow::ToOwned; +use core::ops::Range; use deer::{ error::{ArrayAccessError, DeserializerError, ObjectAccessError}, @@ -27,6 +28,16 @@ pub struct Deserializer<'a, 'de> { tokens: &'de [Token], } +impl<'a, 'de> Deserializer<'a, 'de> { + pub(crate) fn erase(&mut self, range: Range) { + for index in range { + if let Some(token) = self.tokens.get_mut(index) { + *token = Token::Trivia + } + } + } +} + impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { forward!( deserialize_null, @@ -76,8 +87,21 @@ impl<'a, 'de> Deserializer<'a, 'de> { Self { tokens, context } } + fn peek_n(&self, n: usize) -> Option { + self.tokens.get(n).copied() + } + fn peek_maybe(&self) -> Option { - self.tokens.first().copied() + let mut n = 0; + let mut token = self.peek_n(n); + + while matches!(token, Some(Token::Trivia)) { + // skip all trivia + n += 1; + token = self.peek_n(n); + } + + token } fn peek(&self) -> Token { @@ -88,6 +112,11 @@ impl<'a, 'de> Deserializer<'a, 'de> { let (next, tokens) = self.tokens.split_first()?; self.tokens = tokens; + // avoid and skip all trivia + if matches!(next, Token::Trivia) { + return self.next_maybe(); + } + Some(*next) } @@ -249,18 +278,113 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { remaining: None, } } + + fn scan(&self, key: &str) -> Option { + let mut objects: usize = 0; + let mut arrays: usize = 0; + let mut n = 0; + + #[derive(Copy, Clone, Eq, PartialEq)] + enum State { + Key, + Value, + } + + impl State { + fn flip(&mut self) { + match *self { + State::Key => *self = State::Value, + State::Value => *self = State::Key, + } + } + } + + let mut state = State::Key; + + loop { + let next = self.deserializer.peek_n(n)?; + + match next { + Token::Array { .. } => arrays += 1, + Token::ArrayEnd => arrays -= 1, + Token::Object { .. } => objects += 1, + Token::ObjectEnd if objects == 0 => { + // this is for the outer layer (that's us), therefore we can abort our linear + // search + return None; + } + Token::ObjectEnd => objects -= 1, + Token::Str(value) | Token::BorrowedStr(value) | Token::String(value) + if objects == 0 && arrays == 0 && value == key && state == State::Key => + { + // we found an element that matches the element value that is next in line + return Some(n); + } + _ => {} + } + + if arrays == 0 && objects == 0 { + // we're dependent on the fact if something is a key or value, if we're not nested + // then we can switch the state. + state.flip(); + } + + n += 1; + } + } } +// TODO: for value we need a scan for some sorts, and then need to replace/remove the elements from +// the stream impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { - fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { - todo!() + fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { + if self.dirty { + return Err( + Report::new(SetBoundedError::Dirty.into_error()).change_context(ArrayAccessError) + ); + } + + if self.remaining.is_some() { + return Err( + Report::new(SetBoundedError::CalledMultipleTimes.into_error()) + .change_context(ArrayAccessError), + ); + } + + self.remaining = Some(length); + + Ok(()) } fn value(&mut self, key: &str) -> Result where T: Deserialize<'de>, { - todo!() + // TODO: we need to look bounded stuffs + match self.scan(key) { + Some(offset) => { + // now we need to figure out which values are used, we can do this through offset + // calculations + let remaining = self.deserializer.remaining() - offset; + + let mut deserializer = Deserializer { + tokens: &self.deserializer.tokens[offset + 1..], + context: self.deserializer.context, + }; + + let value = T::deserialize(&mut deserializer); + + let erase = remaining - deserializer.remaining(); + + self.deserializer.erase(offset..offset + erase); + + value + } + None => T::deserialize(DeserializerNone { + context: self.deserializer.context, + }), + } + .change_context(ObjectAccessError) } fn next(&mut self) -> Option> diff --git a/packages/libs/deer/desert/src/token.rs b/packages/libs/deer/desert/src/token.rs index 87e289e9de2..2d9d8092fd0 100644 --- a/packages/libs/deer/desert/src/token.rs +++ b/packages/libs/deer/desert/src/token.rs @@ -18,6 +18,7 @@ pub enum Token { ArrayEnd, Object { length: Option }, ObjectEnd, + Trivia, } impl Display for Token { From 74d977650858d4407072e8b94f72a78ccb6007de Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 13:50:44 +0100 Subject: [PATCH 06/18] drive-by: move `SetBoundedError` into `deer` --- packages/libs/deer/json/src/error.rs | 30 -------------------- packages/libs/deer/src/error/internal.rs | 36 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 30 deletions(-) create mode 100644 packages/libs/deer/src/error/internal.rs diff --git a/packages/libs/deer/json/src/error.rs b/packages/libs/deer/json/src/error.rs index 261db2642d4..04112b96a7b 100644 --- a/packages/libs/deer/json/src/error.rs +++ b/packages/libs/deer/json/src/error.rs @@ -61,33 +61,3 @@ impl Variant for OverflowError { Ok(()) } } - -#[derive(Debug)] -pub(crate) enum SetBoundedError { - Dirty, - CalledMultipleTimes, -} - -impl Display for SetBoundedError { - fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { - match self { - Self::Dirty => f.write_str("unable to set bounds after calling `.next()`"), - Self::CalledMultipleTimes => f.write_str("cannot call set_bounds() multiple times"), - } - } -} - -impl Variant for SetBoundedError { - type Properties = (Location,); - - const ID: Id = id!["internal", "access", "set_bounds"]; - const NAMESPACE: Namespace = NAMESPACE; - - fn message<'a>( - &self, - fmt: &mut Formatter, - _: &::Value<'a>, - ) -> core::fmt::Result { - Display::fmt(&self, fmt) - } -} diff --git a/packages/libs/deer/src/error/internal.rs b/packages/libs/deer/src/error/internal.rs new file mode 100644 index 00000000000..3d4dd455e25 --- /dev/null +++ b/packages/libs/deer/src/error/internal.rs @@ -0,0 +1,36 @@ +use core::fmt::{Display, Formatter}; + +use crate::{ + error::{ErrorProperties, Id, Location, Namespace, Variant, NAMESPACE}, + id, +}; + +#[derive(Debug)] +pub enum SetBoundedError { + Dirty, + CalledMultipleTimes, +} + +impl Display for SetBoundedError { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + match self { + Self::Dirty => f.write_str("unable to set bounds after calling `.next()`"), + Self::CalledMultipleTimes => f.write_str("cannot call set_bounds() multiple times"), + } + } +} + +impl Variant for SetBoundedError { + type Properties = (Location,); + + const ID: Id = id!["internal", "access", "set_bounds"]; + const NAMESPACE: Namespace = NAMESPACE; + + fn message<'a>( + &self, + fmt: &mut Formatter, + _: &::Value<'a>, + ) -> core::fmt::Result { + Display::fmt(&self, fmt) + } +} From 5ac60a0b3344dc0d5bdaa73e8437bd9064c380a1 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 14:11:34 +0100 Subject: [PATCH 07/18] feat: start error recovery --- packages/libs/deer/desert/src/de.rs | 133 +++++++++++++++++++++++++--- packages/libs/deer/json/src/lib.rs | 5 +- packages/libs/deer/src/error/mod.rs | 2 + 3 files changed, 127 insertions(+), 13 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 6470e2b4449..cbfaf05f945 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -2,7 +2,7 @@ use alloc::borrow::ToOwned; use core::ops::Range; use deer::{ - error::{ArrayAccessError, DeserializerError, ObjectAccessError}, + error::{ArrayAccessError, DeserializerError, ObjectAccessError, SetBoundedError, Variant}, Context, Deserialize, Visitor, }; use error_stack::{Report, Result, ResultExt}; @@ -56,7 +56,7 @@ impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { self.context } - fn deserialize_any(mut self, visitor: V) -> Result + fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { @@ -124,6 +124,11 @@ impl<'a, 'de> Deserializer<'a, 'de> { self.next_maybe().expect("should have token to deserialize") } + fn bump_n(&mut self, n: usize) { + let (_, tokens) = self.tokens.split_at(n); + self.tokens = tokens; + } + pub fn remaining(&self) -> usize { self.tokens.len() } @@ -181,6 +186,31 @@ impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { remaining: None, } } + + fn scan_end(&self) -> Option { + let mut objects: usize = 0; + let mut arrays: usize = 0; + + let mut n = 0; + + loop { + let token = self.deserializer.peek_n(n)?; + + match token { + Token::Array { .. } => arrays += 1, + Token::ArrayEnd if arrays == 0 && objects == 0 => { + // we're at the outer layer, meaning we can know where we end + return Some(n); + } + Token::ArrayEnd => arrays = arrays.saturating_sub(1), + Token::Object { .. } => objects += 1, + Token::ObjectEnd => objects = objects.saturating_sub(1), + _ => {} + } + + n += 1; + } + } } impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { @@ -228,7 +258,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { None } } else { - let value = T::deserialize(self.deserializer); + let value = T::deserialize(&mut *self.deserializer); Some(value.change_context(ArrayAccessError)) } } @@ -279,6 +309,8 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { } } + // This assumes that Str and such are atomic, meaning `Str Str` as a deserialize value is + // considered invalid, as that should use `ArrayAccess` instead. fn scan(&self, key: &str) -> Option { let mut objects: usize = 0; let mut arrays: usize = 0; @@ -306,14 +338,14 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { match next { Token::Array { .. } => arrays += 1, - Token::ArrayEnd => arrays -= 1, + Token::ArrayEnd => arrays = arrays.saturating_sub(1), Token::Object { .. } => objects += 1, - Token::ObjectEnd if objects == 0 => { + Token::ObjectEnd if objects == 0 && arrays == 0 => { // this is for the outer layer (that's us), therefore we can abort our linear // search return None; } - Token::ObjectEnd => objects -= 1, + Token::ObjectEnd => objects = objects.saturating_sub(1), Token::Str(value) | Token::BorrowedStr(value) | Token::String(value) if objects == 0 && arrays == 0 && value == key && state == State::Key => { @@ -332,22 +364,47 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { n += 1; } } + + fn scan_end(&self) -> Option { + let mut objects: usize = 0; + let mut arrays: usize = 0; + + let mut n = 0; + + loop { + let token = self.deserializer.peek_n(n)?; + + match token { + Token::Array { .. } => arrays += 1, + Token::ArrayEnd => arrays = arrays.saturating_sub(1), + Token::Object { .. } => objects += 1, + Token::ObjectEnd if arrays == 0 && objects == 0 => { + // we're at the outer layer, meaning we can know where we end + return Some(n); + } + Token::ObjectEnd => objects = objects.saturating_sub(1), + _ => {} + } + + n += 1; + } + } } // TODO: for value we need a scan for some sorts, and then need to replace/remove the elements from // the stream impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { - fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { + fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { if self.dirty { return Err( - Report::new(SetBoundedError::Dirty.into_error()).change_context(ArrayAccessError) + Report::new(SetBoundedError::Dirty.into_error()).change_context(ObjectAccessError) ); } if self.remaining.is_some() { return Err( Report::new(SetBoundedError::CalledMultipleTimes.into_error()) - .change_context(ArrayAccessError), + .change_context(ObjectAccessError), ); } @@ -360,6 +417,17 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { where T: Deserialize<'de>, { + if self.remaining == Some(0) { + return T::deserialize(DeserializerNone { + context: self.deserializer.context, + }) + .change_context(ObjectAccessError); + } + + if let Some(remaining) = &mut self.remaining { + *remaining = remaining.saturating_sub(1); + } + // TODO: we need to look bounded stuffs match self.scan(key) { Some(offset) => { @@ -392,11 +460,54 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { K: Deserialize<'de>, V: Deserialize<'de>, { - todo!() + if self.remaining == Some(0) { + return None; + } + + if let Some(remaining) = &mut self.remaining { + *remaining = remaining.saturating_sub(1); + } + + let (key, value) = if matches!(self.deserializer.peek(), Token::ObjectEnd) { + // we're not in bounded mode, which means we need to signal that we're done + if self.remaining.is_none() { + return None; + } + + if self.remaining.is_some() { + let key = K::deserialize(DeserializerNone { + context: self.deserializer.context, + }); + let value = V::deserialize(DeserializerNone { + context: self.deserializer.context, + }); + + (key, value) + } else { + return None; + } + } else { + let key = K::deserialize(&mut *self.deserializer); + let value = V::deserialize(&mut *self.deserializer); + + (key, value) + }; + + let result = match (key, value) { + (Err(mut key), Err(value)) => { + key.extend_one(value); + + Err(key.change_context(ObjectAccessError)) + } + (Err(error), _) | (_, Err(error)) => Err(error.change_context(ObjectAccessError)), + (Ok(key), Ok(value)) => Ok((key, value)), + }; + + Some(result) } fn size_hint(&self) -> Option { - todo!() + self.length } fn end(self) -> Result<(), ObjectAccessError> { diff --git a/packages/libs/deer/json/src/lib.rs b/packages/libs/deer/json/src/lib.rs index 7d7e2ae753d..c0b8a48cc74 100644 --- a/packages/libs/deer/json/src/lib.rs +++ b/packages/libs/deer/json/src/lib.rs @@ -34,14 +34,15 @@ use deer::{ error::{ ArrayAccessError, ArrayLengthError, DeserializeError, DeserializerError, ExpectedLength, ExpectedType, MissingError, ObjectAccessError, ObjectItemsExtraError, ReceivedKey, - ReceivedLength, ReceivedType, ReceivedValue, TypeError, ValueError, Variant, + ReceivedLength, ReceivedType, ReceivedValue, SetBoundedError, TypeError, ValueError, + Variant, }, Context, Deserialize, DeserializeOwned, Document, Reflection, Schema, Visitor, }; use error_stack::{IntoReport, Report, Result, ResultExt}; use serde_json::{Map, Value}; -use crate::error::{BytesUnsupportedError, OverflowError, SetBoundedError}; +use crate::error::{BytesUnsupportedError, OverflowError}; #[cfg(not(feature = "arbitrary-precision"))] fn serde_to_deer_number(number: &serde_json::Number) -> Option { diff --git a/packages/libs/deer/src/error/mod.rs b/packages/libs/deer/src/error/mod.rs index a0ab28e490b..a159bbbdcc4 100644 --- a/packages/libs/deer/src/error/mod.rs +++ b/packages/libs/deer/src/error/mod.rs @@ -67,6 +67,7 @@ use error_stack::{Context, Frame, IntoReport, Report, Result}; pub use extra::{ ArrayLengthError, ExpectedLength, ObjectItemsExtraError, ReceivedKey, ReceivedLength, }; +pub use internal::SetBoundedError; pub use location::Location; use serde::ser::SerializeMap; pub use r#type::{ExpectedType, ReceivedType, TypeError}; @@ -79,6 +80,7 @@ pub use value::{MissingError, ReceivedValue, ValueError}; use crate::error::serialize::{impl_serialize, Export}; mod extra; +mod internal; mod location; mod macros; mod serialize; From d9e4806d5736121a92222d2047755c746a12ed42 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 14:13:09 +0100 Subject: [PATCH 08/18] todo: oversight --- packages/libs/deer/desert/src/de.rs | 1 + packages/libs/deer/json/src/lib.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index cbfaf05f945..802e7b5894f 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -268,6 +268,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { } fn end(self) -> Result<(), ArrayAccessError> { + // TODO: error if self.remaining isn't Some(0) or None let mut result = Ok(()); // ensure that we consume the last token, if it is the wrong token error out diff --git a/packages/libs/deer/json/src/lib.rs b/packages/libs/deer/json/src/lib.rs index c0b8a48cc74..01d3e49df21 100644 --- a/packages/libs/deer/json/src/lib.rs +++ b/packages/libs/deer/json/src/lib.rs @@ -454,6 +454,7 @@ impl<'a, 'de> deer::ArrayAccess<'de> for ArrayAccess<'a> { } fn end(self) -> Result<(), ArrayAccessError> { + // TODO: error if self.remaining isn't Some(0) or None let count = self.inner.count(); if count == 0 { Ok(()) From 01f0fc604b52535df686163f042376357c8674f6 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 16:19:21 +0100 Subject: [PATCH 09/18] feat: introduction of trivia tape --- packages/libs/deer/desert/Cargo.toml | 1 + packages/libs/deer/desert/src/de.rs | 235 ++++++++++++++++++++----- packages/libs/deer/desert/src/token.rs | 1 - 3 files changed, 192 insertions(+), 45 deletions(-) diff --git a/packages/libs/deer/desert/Cargo.toml b/packages/libs/deer/desert/Cargo.toml index 30a2cca8cf3..b46acc70811 100644 --- a/packages/libs/deer/desert/Cargo.toml +++ b/packages/libs/deer/desert/Cargo.toml @@ -10,3 +10,4 @@ publish = false [dependencies] deer = { path = ".." } error-stack = { version = "0.2.4", default_features = false } +bitvec = { version = "1", default_features = false, features = ['alloc', 'atomic'] } diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 802e7b5894f..9d27aaabb23 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -1,6 +1,15 @@ use alloc::borrow::ToOwned; -use core::ops::Range; +use core::{ + ops::{Deref, DerefMut, Range}, + slice::SliceIndex, +}; +use bitvec::{ + boxed::BitBox, + order::Lsb0, + slice::{BitSlice, BitSliceIndex}, + vec::BitVec, +}; use deer::{ error::{ArrayAccessError, DeserializerError, ObjectAccessError, SetBoundedError, Variant}, Context, Deserialize, Visitor, @@ -23,19 +32,177 @@ macro_rules! forward { } #[derive(Debug)] -pub struct Deserializer<'a, 'de> { - context: &'a Context, +enum Trivia<'de> { + Owned(BitBox), + Slice(&'de mut BitSlice), +} + +impl Deref for Trivia<'_> { + type Target = BitSlice; + + fn deref(&self) -> &Self::Target { + match self { + Trivia::Owned(value) => value.as_bitslice(), + Trivia::Slice(value) => value.as_ref(), + } + } +} + +impl DerefMut for Trivia<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + match self { + Trivia::Owned(value) => value.as_mut_bitslice(), + Trivia::Slice(value) => *value, + } + } +} + +impl From for Trivia<'_> { + fn from(value: BitBox) -> Self { + Self::Owned(value) + } +} + +impl<'de> From<&'de mut BitSlice> for Trivia<'de> { + fn from(value: &'de mut BitSlice) -> Self { + Self::Slice(value) + } +} + +#[derive(Debug)] +struct Tape<'de> { tokens: &'de [Token], + trivia: Trivia<'de>, } -impl<'a, 'de> Deserializer<'a, 'de> { - pub(crate) fn erase(&mut self, range: Range) { - for index in range { - if let Some(token) = self.tokens.get_mut(index) { - *token = Token::Trivia +impl Tape<'static> { + fn empty() -> Self { + Self { + tokens: &[], + trivia: Trivia::Owned(BitVec::new().into_boxed_bitslice()), + } + } +} + +impl<'de> Tape<'de> { + // also includes trivia + fn peek_all_n(&self, n: usize) -> Option { + self.tokens.get(n).copied() + } + + fn is_trivia_n(&self, n: usize) -> Option { + self.trivia.get(n).as_deref().copied() + } + + fn set_trivia(&mut self, mut range: Range) { + // automatically adjust so that we're able to always index to the end, even if the the end + // is out of bounds + if range.end >= self.tokens.len() && range.start < self.tokens.len() { + range.end = self.tokens.len(); + } + + if let Some(slice) = self.trivia.get_mut(range) { + slice.fill(true); + } + } + + fn peek_n(&self, n: usize) -> Option { + let mut offset = 0; + let mut m = 0; + + while m != n { + if !self.is_trivia_n(offset)? { + m += 1; + } + + offset += 1; + } + + self.peek_all_n(m) + } + + fn peek(&self) -> Option { + let mut n = 0; + + while self.is_trivia_n(n)? { + n += 1; + } + + self.peek_all_n(n) + } + + fn bump(&mut self) -> Option<(Token, bool)> { + // naive version of bump, which just takes the token and returns it with the status + let (token, tokens) = self.tokens.split_first()?; + let is_trivia = *self.trivia.get(0)?; + // use trivia like a feed tape, this avoid reallocation + self.trivia.shift_left(1); + self.tokens = tokens; + + Some((*token, is_trivia)) + } + + fn bump_n(&mut self, i: usize) { + for _ in 0..i { + self.bump(); + } + } + + fn next(&mut self) -> Option { + loop { + let (token, is_trivia) = self.bump()?; + + if !is_trivia { + return Some(token); } } } + + fn remaining(&self) -> usize { + self.tokens.len() + } + + fn is_empty(&self) -> bool { + self.tokens.is_empty() + } + + fn view<'a, B>(&'a mut self, n: B) -> Option> + where + B: BitSliceIndex<'a, usize, Lsb0, Mut = &'a mut BitSlice> + + SliceIndex<[Token], Output = [Token]> + + Clone, + { + let tokens = self.tokens.get(n.clone())?; + let trivia = self.trivia.get_mut(n)?; + + Some(Tape { + tokens, + trivia: trivia.into(), + }) + } +} + +impl<'de> From<&'de [Token]> for Tape<'de> { + fn from(value: &'de [Token]) -> Self { + Self { + tokens: value, + trivia: BitVec::repeat(false, value.len()) + .into_boxed_bitslice() + .into(), + } + } +} + +#[derive(Debug)] +pub struct Deserializer<'a, 'de> { + context: &'a Context, + tokens: Tape<'de>, +} + +impl<'a, 'de> Deserializer<'a, 'de> { + fn erase(&mut self, range: Range) { + self.tokens.set_trivia(range); + } } impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { @@ -84,53 +251,30 @@ impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { impl<'a, 'de> Deserializer<'a, 'de> { pub fn new(tokens: &'de [Token], context: &'a Context) -> Self { - Self { tokens, context } - } - - fn peek_n(&self, n: usize) -> Option { - self.tokens.get(n).copied() - } - - fn peek_maybe(&self) -> Option { - let mut n = 0; - let mut token = self.peek_n(n); - - while matches!(token, Some(Token::Trivia)) { - // skip all trivia - n += 1; - token = self.peek_n(n); + Self { + tokens: tokens.into(), + context, } - - token } fn peek(&self) -> Token { - self.peek_maybe().expect("should have token to deserialize") + self.tokens + .peek() + .expect("should have token to deserialize") } - fn next_maybe(&mut self) -> Option { - let (next, tokens) = self.tokens.split_first()?; - self.tokens = tokens; - - // avoid and skip all trivia - if matches!(next, Token::Trivia) { - return self.next_maybe(); - } - - Some(*next) + fn peek_n(&self, n: usize) -> Option { + self.tokens.peek_n(n) } fn next(&mut self) -> Token { - self.next_maybe().expect("should have token to deserialize") - } - - fn bump_n(&mut self, n: usize) { - let (_, tokens) = self.tokens.split_at(n); - self.tokens = tokens; + self.tokens + .next() + .expect("should have token to deserialize") } pub fn remaining(&self) -> usize { - self.tokens.len() + self.tokens.remaining() } pub fn is_empty(&self) -> bool { @@ -436,14 +580,17 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { // calculations let remaining = self.deserializer.remaining() - offset; + let tape = self.deserializer.tokens.view(offset + 1..); + let mut deserializer = Deserializer { - tokens: &self.deserializer.tokens[offset + 1..], + tokens: tape.unwrap_or_else(Tape::<'static>::empty), context: self.deserializer.context, }; let value = T::deserialize(&mut deserializer); let erase = remaining - deserializer.remaining(); + drop(deserializer); self.deserializer.erase(offset..offset + erase); diff --git a/packages/libs/deer/desert/src/token.rs b/packages/libs/deer/desert/src/token.rs index 2d9d8092fd0..87e289e9de2 100644 --- a/packages/libs/deer/desert/src/token.rs +++ b/packages/libs/deer/desert/src/token.rs @@ -18,7 +18,6 @@ pub enum Token { ArrayEnd, Object { length: Option }, ObjectEnd, - Trivia, } impl Display for Token { From e976b35b90228ba8d51b2b7696ff5c23aa7e4fb4 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 16:31:22 +0100 Subject: [PATCH 10/18] feat: `Cow` equivalent for `Trivia` to avoid allocation --- packages/libs/deer/desert/src/de.rs | 67 +++++++++++++---------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 9d27aaabb23..1023d23f445 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -1,6 +1,6 @@ -use alloc::borrow::ToOwned; +use alloc::borrow::{Cow, ToOwned}; use core::{ - ops::{Deref, DerefMut, Range}, + ops::{Deref, Range}, slice::SliceIndex, }; @@ -32,59 +32,52 @@ macro_rules! forward { } #[derive(Debug)] -enum Trivia<'de> { +enum Trivia<'a> { Owned(BitBox), - Slice(&'de mut BitSlice), + Slice(&'a BitSlice), } -impl Deref for Trivia<'_> { +impl<'a> Deref for Trivia<'a> { type Target = BitSlice; fn deref(&self) -> &Self::Target { match self { Trivia::Owned(value) => value.as_bitslice(), - Trivia::Slice(value) => value.as_ref(), + Trivia::Slice(value) => *value, } } } -impl DerefMut for Trivia<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { +impl<'a> Trivia<'a> { + fn to_mut(&mut self) -> &mut BitSlice { match self { Trivia::Owned(value) => value.as_mut_bitslice(), - Trivia::Slice(value) => *value, - } - } -} + Trivia::Slice(value) => { + let owned = BitBox::from_bitslice(*value); + *self = Self::Owned(owned); -impl From for Trivia<'_> { - fn from(value: BitBox) -> Self { - Self::Owned(value) - } -} - -impl<'de> From<&'de mut BitSlice> for Trivia<'de> { - fn from(value: &'de mut BitSlice) -> Self { - Self::Slice(value) + self.to_mut() + } + } } } #[derive(Debug)] -struct Tape<'de> { +struct Tape<'a, 'de> { tokens: &'de [Token], - trivia: Trivia<'de>, + trivia: Trivia<'a>, } -impl Tape<'static> { +impl Tape<'_, '_> { fn empty() -> Self { Self { tokens: &[], - trivia: Trivia::Owned(BitVec::new().into_boxed_bitslice()), + trivia: Trivia::Slice(BitSlice::empty()), } } } -impl<'de> Tape<'de> { +impl<'a, 'de> Tape<'a, 'de> { // also includes trivia fn peek_all_n(&self, n: usize) -> Option { self.tokens.get(n).copied() @@ -101,7 +94,7 @@ impl<'de> Tape<'de> { range.end = self.tokens.len(); } - if let Some(slice) = self.trivia.get_mut(range) { + if let Some(slice) = self.trivia.to_mut().get_mut(range) { slice.fill(true); } } @@ -136,7 +129,7 @@ impl<'de> Tape<'de> { let (token, tokens) = self.tokens.split_first()?; let is_trivia = *self.trivia.get(0)?; // use trivia like a feed tape, this avoid reallocation - self.trivia.shift_left(1); + self.trivia.to_mut().shift_left(1); self.tokens = tokens; Some((*token, is_trivia)) @@ -166,29 +159,27 @@ impl<'de> Tape<'de> { self.tokens.is_empty() } - fn view<'a, B>(&'a mut self, n: B) -> Option> + fn view<'b, B>(&'b self, n: B) -> Option> where - B: BitSliceIndex<'a, usize, Lsb0, Mut = &'a mut BitSlice> + B: BitSliceIndex<'b, usize, Lsb0, Immut = &'b BitSlice> + SliceIndex<[Token], Output = [Token]> + Clone, { let tokens = self.tokens.get(n.clone())?; - let trivia = self.trivia.get_mut(n)?; + let trivia = self.trivia.get(n)?; Some(Tape { tokens, - trivia: trivia.into(), + trivia: Trivia::Slice(trivia), }) } } -impl<'de> From<&'de [Token]> for Tape<'de> { +impl<'de> From<&'de [Token]> for Tape<'_, 'de> { fn from(value: &'de [Token]) -> Self { Self { tokens: value, - trivia: BitVec::repeat(false, value.len()) - .into_boxed_bitslice() - .into(), + trivia: Trivia::Owned(BitVec::repeat(false, value.len()).into_boxed_bitslice()), } } } @@ -196,7 +187,7 @@ impl<'de> From<&'de [Token]> for Tape<'de> { #[derive(Debug)] pub struct Deserializer<'a, 'de> { context: &'a Context, - tokens: Tape<'de>, + tokens: Tape<'a, 'de>, } impl<'a, 'de> Deserializer<'a, 'de> { @@ -583,7 +574,7 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { let tape = self.deserializer.tokens.view(offset + 1..); let mut deserializer = Deserializer { - tokens: tape.unwrap_or_else(Tape::<'static>::empty), + tokens: tape.unwrap_or_else(Tape::empty), context: self.deserializer.context, }; From acd1909eeb293f006d75f619e62c45e25261ecaa Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 17:58:17 +0100 Subject: [PATCH 11/18] feat: rework `.end` for array --- packages/libs/deer/desert/src/de.rs | 52 ++++++++++++++++++----------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 1023d23f445..1eeb87acbe6 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -11,7 +11,10 @@ use bitvec::{ vec::BitVec, }; use deer::{ - error::{ArrayAccessError, DeserializerError, ObjectAccessError, SetBoundedError, Variant}, + error::{ + ArrayAccessError, ArrayLengthError, DeserializerError, ExpectedLength, ObjectAccessError, + ReceivedLength, SetBoundedError, Variant, + }, Context, Deserialize, Visitor, }; use error_stack::{Report, Result, ResultExt}; @@ -307,16 +310,16 @@ impl<'de> deer::Deserializer<'de> for DeserializerNone<'_> { struct ArrayAccess<'a, 'b, 'de: 'a> { deserializer: &'a mut Deserializer<'b, 'de>, - dirty: bool, length: Option, remaining: Option, + consumed: usize, } impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { Self { deserializer, - dirty: false, + consumed: 0, length, remaining: None, } @@ -350,7 +353,7 @@ impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { - if self.dirty { + if self.consumed > 0 { return Err( Report::new(SetBoundedError::Dirty.into_error()).change_context(ArrayAccessError) ); @@ -372,7 +375,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { where T: Deserialize<'de>, { - self.dirty = true; + self.consumed += 1; if matches!(self.deserializer.peek(), Token::ArrayEnd) { // we have reached the ending, if `self.remaining` is set we use the `DeserializerNone` @@ -403,27 +406,40 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { } fn end(self) -> Result<(), ArrayAccessError> { - // TODO: error if self.remaining isn't Some(0) or None let mut result = Ok(()); // ensure that we consume the last token, if it is the wrong token error out if !matches!(self.deserializer.peek(), Token::ArrayEnd) { - // TODO: error - result = Err(Report::new(ArrayAccessError)); + let mut error = Report::new(ArrayLengthError.into_error()) + .attach(ExpectedLength::new(self.consumed)); + + if let Some(length) = self.size_hint() { + error = error.attach(ReceivedLength::new(length)); + } + + result = Err(error); } - self.deserializer.next(); + // bump until the very end, which ensures that deserialize calls after this might succeed! + let bump = self + .scan_end() + .unwrap_or_else(|| self.deserializer.tokens.remaining()); + self.deserializer.tokens.bump_n(bump); + + if let Some(remaining) = self.remaining { + if remaining > 0 { + // TODO: This should be an internal error, as the consumer did not ensure that this + // was called n times ~> ContractViolation error + let error = Report::new(()); - if self.remaining.map_or(false, |remaining| remaining > 0) { - let error = Report::new(ArrayAccessError); - // TODO: error - match &mut result { - Err(result) => result.extend_one(error), - result => *result = Err(error), + match &mut result { + Err(result) => result.extend_one(error), + result => *result = Err(error), + } } } - result + result.change_context(ArrayAccessError) } } @@ -609,9 +625,7 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { let (key, value) = if matches!(self.deserializer.peek(), Token::ObjectEnd) { // we're not in bounded mode, which means we need to signal that we're done - if self.remaining.is_none() { - return None; - } + self.remaining?; if self.remaining.is_some() { let key = K::deserialize(DeserializerNone { From 0c2e0f05073123f36beb70103575bbcf0f6c2e26 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 19:08:25 +0100 Subject: [PATCH 12/18] feat: `ObjectLengthError` --- packages/libs/deer/desert/src/de.rs | 76 +++++++++++----- packages/libs/deer/json/src/lib.rs | 30 ++++--- packages/libs/deer/src/error/extra.rs | 108 ++++++++++++++++++++++- packages/libs/deer/src/error/internal.rs | 23 +++-- packages/libs/deer/src/error/mod.rs | 5 +- 5 files changed, 196 insertions(+), 46 deletions(-) diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs index 1eeb87acbe6..c70f48416ce 100644 --- a/packages/libs/deer/desert/src/de.rs +++ b/packages/libs/deer/desert/src/de.rs @@ -12,8 +12,8 @@ use bitvec::{ }; use deer::{ error::{ - ArrayAccessError, ArrayLengthError, DeserializerError, ExpectedLength, ObjectAccessError, - ReceivedLength, SetBoundedError, Variant, + ArrayAccessError, ArrayLengthError, BoundedContractViolationError, DeserializerError, + ExpectedLength, ObjectAccessError, ObjectItemsExtraError, ReceivedLength, Variant, }, Context, Deserialize, Visitor, }; @@ -355,15 +355,16 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { if self.consumed > 0 { return Err( - Report::new(SetBoundedError::Dirty.into_error()).change_context(ArrayAccessError) + Report::new(BoundedContractViolationError::SetDirty.into_error()) + .change_context(ArrayAccessError), ); } if self.remaining.is_some() { - return Err( - Report::new(SetBoundedError::CalledMultipleTimes.into_error()) - .change_context(ArrayAccessError), - ); + return Err(Report::new( + BoundedContractViolationError::SetCalledMultipleTimes.into_error(), + ) + .change_context(ArrayAccessError)); } self.remaining = Some(length); @@ -428,9 +429,8 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { if let Some(remaining) = self.remaining { if remaining > 0 { - // TODO: This should be an internal error, as the consumer did not ensure that this - // was called n times ~> ContractViolation error - let error = Report::new(()); + let error = + Report::new(BoundedContractViolationError::EndRemainingItems.into_error()); match &mut result { Err(result) => result.extend_one(error), @@ -446,18 +446,18 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { struct ObjectAccess<'a, 'b, 'de: 'a> { deserializer: &'a mut Deserializer<'b, 'de>, - dirty: bool, length: Option, remaining: Option, + consumed: usize, } impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { Self { deserializer, - dirty: false, length, remaining: None, + consumed: 0, } } @@ -547,17 +547,18 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { // the stream impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { - if self.dirty { + if self.consumed > 0 { return Err( - Report::new(SetBoundedError::Dirty.into_error()).change_context(ObjectAccessError) + Report::new(BoundedContractViolationError::SetDirty.into_error()) + .change_context(ObjectAccessError), ); } if self.remaining.is_some() { - return Err( - Report::new(SetBoundedError::CalledMultipleTimes.into_error()) - .change_context(ObjectAccessError), - ); + return Err(Report::new( + BoundedContractViolationError::SetCalledMultipleTimes.into_error(), + ) + .change_context(ObjectAccessError)); } self.remaining = Some(length); @@ -576,11 +577,12 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { .change_context(ObjectAccessError); } + self.consumed += 1; + if let Some(remaining) = &mut self.remaining { *remaining = remaining.saturating_sub(1); } - // TODO: we need to look bounded stuffs match self.scan(key) { Some(offset) => { // now we need to figure out which values are used, we can do this through offset @@ -619,6 +621,8 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { return None; } + self.consumed += 1; + if let Some(remaining) = &mut self.remaining { *remaining = remaining.saturating_sub(1); } @@ -664,6 +668,38 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { } fn end(self) -> Result<(), ObjectAccessError> { - todo!() + let mut result = Ok(()); + + // ensure that we consume the last token, if it is the wrong token error out + if !matches!(self.deserializer.peek(), Token::ObjectEnd) { + let mut error = Report::new(ObjectItemsExtraError.into_error()) + .attach(ExpectedLength::new(self.consumed)); + + if let Some(length) = self.size_hint() { + error = error.attach(ReceivedLength::new(length)); + } + + result = Err(error); + } + + // bump until the very end, which ensures that deserialize calls after this might succeed! + let bump = self + .scan_end() + .unwrap_or_else(|| self.deserializer.tokens.remaining()); + self.deserializer.tokens.bump_n(bump); + + if let Some(remaining) = self.remaining { + if remaining > 0 { + let error = + Report::new(BoundedContractViolationError::EndRemainingItems.into_error()); + + match &mut result { + Err(result) => result.extend_one(error), + result => *result = Err(error), + } + } + } + + result.change_context(ObjectAccessError) } } diff --git a/packages/libs/deer/json/src/lib.rs b/packages/libs/deer/json/src/lib.rs index 01d3e49df21..04d8e573cf3 100644 --- a/packages/libs/deer/json/src/lib.rs +++ b/packages/libs/deer/json/src/lib.rs @@ -32,10 +32,10 @@ use std::any::Demand; use deer::{ error::{ - ArrayAccessError, ArrayLengthError, DeserializeError, DeserializerError, ExpectedLength, - ExpectedType, MissingError, ObjectAccessError, ObjectItemsExtraError, ReceivedKey, - ReceivedLength, ReceivedType, ReceivedValue, SetBoundedError, TypeError, ValueError, - Variant, + ArrayAccessError, ArrayLengthError, BoundedContractViolationError, DeserializeError, + DeserializerError, ExpectedLength, ExpectedType, MissingError, ObjectAccessError, + ObjectItemsExtraError, ReceivedKey, ReceivedLength, ReceivedType, ReceivedValue, TypeError, + ValueError, Variant, }, Context, Deserialize, DeserializeOwned, Document, Reflection, Schema, Visitor, }; @@ -400,15 +400,16 @@ impl<'a, 'de> deer::ArrayAccess<'de> for ArrayAccess<'a> { fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { if self.dirty { return Err( - Report::new(SetBoundedError::Dirty.into_error()).change_context(ArrayAccessError) + Report::new(BoundedContractViolationError::SetDirty.into_error()) + .change_context(ArrayAccessError), ); } if self.remaining.is_some() { - return Err( - Report::new(SetBoundedError::CalledMultipleTimes.into_error()) - .change_context(ArrayAccessError), - ); + return Err(Report::new( + BoundedContractViolationError::SetCalledMultipleTimes.into_error(), + ) + .change_context(ArrayAccessError)); } self.remaining = Some(length); @@ -494,15 +495,16 @@ impl<'a, 'de> deer::ObjectAccess<'de> for ObjectAccess<'a> { fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { if self.dirty { return Err( - Report::new(SetBoundedError::Dirty.into_error()).change_context(ObjectAccessError) + Report::new(BoundedContractViolationError::SetDirty.into_error()) + .change_context(ObjectAccessError), ); } if self.remaining.is_some() { - return Err( - Report::new(SetBoundedError::CalledMultipleTimes.into_error()) - .change_context(ObjectAccessError), - ); + return Err(Report::new( + BoundedContractViolationError::SetCalledMultipleTimes.into_error(), + ) + .change_context(ObjectAccessError)); } self.remaining = Some(length); diff --git a/packages/libs/deer/src/error/extra.rs b/packages/libs/deer/src/error/extra.rs index 0ff1bdcf14f..a0609f0315e 100644 --- a/packages/libs/deer/src/error/extra.rs +++ b/packages/libs/deer/src/error/extra.rs @@ -78,6 +78,52 @@ impl Display for ObjectItemsExtraError { } } +#[derive(Debug)] +pub struct ObjectLengthError; + +impl Variant for ObjectLengthError { + type Properties = (Location, ExpectedLength, ReceivedLength); + + const ID: Id = id!["object", "length"]; + const NAMESPACE: Namespace = NAMESPACE; + + fn message<'a>( + &self, + fmt: &mut Formatter, + properties: &::Value<'a>, + ) -> fmt::Result { + // expected object of length {expected}, but received object of length {received} + let (_, expected, received) = properties; + + let has_expected = expected.is_some(); + let has_received = received.is_some(); + + if let Some(ExpectedLength(length)) = expected { + fmt.write_fmt(format_args!("expected object of length {length}"))?; + } + + if has_expected && has_received { + fmt.write_str(", but ")?; + } + + if let Some(ReceivedLength(length)) = received { + fmt.write_fmt(format_args!("received object of length {length}"))?; + } + + if !has_expected && !has_received { + Display::fmt(self, fmt)?; + } + + Ok(()) + } +} + +impl Display for ObjectLengthError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str("received more items than expected") + } +} + #[derive(serde::Serialize)] pub struct ExpectedLength(usize); @@ -240,7 +286,65 @@ mod tests { } #[test] - fn object() { + fn object_length() { + // we simulate that the error happens in: + // [..., {field1: {_: _, _: _, _: _} <- here}] + let error = Report::new(Error::new(ObjectLengthError)) + .attach(Location::Field("field1")) + .attach(Location::Array(1)) + .attach(ExpectedLength::new(2)) + .attach(ReceivedLength::new(3)); + + let value = to_json::(&error); + + assert_eq!( + value, + json!({ + "location": [ + {"type": "array", "value": 1}, + {"type": "field", "value": "field1"} + ], + "expected": 2, + "received": 3 + }) + ); + } + + #[test] + fn object_length_message() { + assert_eq!( + to_message::(&Report::new(ObjectLengthError.into_error())), + "received more items than expected" + ); + + assert_eq!( + to_message::( + &Report::new(ObjectLengthError.into_error()) // + .attach(ReceivedLength::new(3)) + ), + "received object of length 3" + ); + + assert_eq!( + to_message::( + &Report::new(ObjectLengthError.into_error()) // + .attach(ExpectedLength::new(2)) + ), + "expected object of length 2" + ); + + assert_eq!( + to_message::( + &Report::new(ObjectLengthError.into_error()) + .attach(ExpectedLength::new(2)) + .attach(ReceivedLength::new(3)) + ), + "expected object of length 2, but received object of length 3" + ); + } + + #[test] + fn object_extra() { // we simulate that the error happens in: // [..., {field1: [...], field2: [...]} <- here] let error = Report::new(ObjectItemsExtraError.into_error()) @@ -261,7 +365,7 @@ mod tests { } #[test] - fn object_message() { + fn object_extra_message() { assert_eq!( to_message::(&Report::new(ObjectItemsExtraError.into_error())), "received unexpected keys" diff --git a/packages/libs/deer/src/error/internal.rs b/packages/libs/deer/src/error/internal.rs index 3d4dd455e25..700c899a98e 100644 --- a/packages/libs/deer/src/error/internal.rs +++ b/packages/libs/deer/src/error/internal.rs @@ -5,25 +5,32 @@ use crate::{ id, }; +// TODO: name set_size? #[derive(Debug)] -pub enum SetBoundedError { - Dirty, - CalledMultipleTimes, +pub enum BoundedContractViolationError { + SetDirty, + SetCalledMultipleTimes, + EndRemainingItems, } -impl Display for SetBoundedError { +impl Display for BoundedContractViolationError { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { match self { - Self::Dirty => f.write_str("unable to set bounds after calling `.next()`"), - Self::CalledMultipleTimes => f.write_str("cannot call set_bounds() multiple times"), + Self::SetDirty => f.write_str("unable to set bounds after calling `.next()`"), + Self::SetCalledMultipleTimes => { + f.write_str("cannot call `set_bounded()` multiple times") + } + Self::EndRemainingItems => { + f.write_str("`.next()` was not called exactly `n` times before calling `.end()`") + } } } } -impl Variant for SetBoundedError { +impl Variant for BoundedContractViolationError { type Properties = (Location,); - const ID: Id = id!["internal", "access", "set_bounds"]; + const ID: Id = id!["internal", "access", "bounded"]; const NAMESPACE: Namespace = NAMESPACE; fn message<'a>( diff --git a/packages/libs/deer/src/error/mod.rs b/packages/libs/deer/src/error/mod.rs index a159bbbdcc4..ea1d29b98f7 100644 --- a/packages/libs/deer/src/error/mod.rs +++ b/packages/libs/deer/src/error/mod.rs @@ -65,9 +65,10 @@ use core::{ use error_stack::{Context, Frame, IntoReport, Report, Result}; pub use extra::{ - ArrayLengthError, ExpectedLength, ObjectItemsExtraError, ReceivedKey, ReceivedLength, + ArrayLengthError, ExpectedLength, ObjectItemsExtraError, ObjectLengthError, ReceivedKey, + ReceivedLength, }; -pub use internal::SetBoundedError; +pub use internal::BoundedContractViolationError; pub use location::Location; use serde::ser::SerializeMap; pub use r#type::{ExpectedType, ReceivedType, TypeError}; From 6f0c870b3abc2997216fed2ad59e3e13ec6dcba7 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 19:10:23 +0100 Subject: [PATCH 13/18] feat: reorganize code --- packages/libs/deer/desert/src/array.rs | 149 ++++ packages/libs/deer/desert/src/de.rs | 705 ------------------ packages/libs/deer/desert/src/deserializer.rs | 307 ++++++++ packages/libs/deer/desert/src/lib.rs | 5 +- packages/libs/deer/desert/src/object.rs | 274 +++++++ packages/libs/deer/desert/src/tape.rs | 0 6 files changed, 734 insertions(+), 706 deletions(-) create mode 100644 packages/libs/deer/desert/src/array.rs delete mode 100644 packages/libs/deer/desert/src/de.rs create mode 100644 packages/libs/deer/desert/src/deserializer.rs create mode 100644 packages/libs/deer/desert/src/object.rs create mode 100644 packages/libs/deer/desert/src/tape.rs diff --git a/packages/libs/deer/desert/src/array.rs b/packages/libs/deer/desert/src/array.rs new file mode 100644 index 00000000000..595c9457464 --- /dev/null +++ b/packages/libs/deer/desert/src/array.rs @@ -0,0 +1,149 @@ +use deer::{ + error::{ + ArrayAccessError, ArrayLengthError, BoundedContractViolationError, ExpectedLength, + ReceivedLength, Variant, + }, + Deserialize, +}; +use error_stack::{Report, ResultExt}; + +use crate::{ + deserializer::{Deserializer, DeserializerNone}, + token::Token, +}; + +pub struct ArrayAccess<'a, 'b, 'de: 'a> { + deserializer: &'a mut Deserializer<'b, 'de>, + + length: Option, + remaining: Option, + consumed: usize, +} + +impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { + pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { + Self { + deserializer, + consumed: 0, + length, + remaining: None, + } + } + + fn scan_end(&self) -> Option { + let mut objects: usize = 0; + let mut arrays: usize = 0; + + let mut n = 0; + + loop { + let token = self.deserializer.peek_n(n)?; + + match token { + Token::Array { .. } => arrays += 1, + Token::ArrayEnd if arrays == 0 && objects == 0 => { + // we're at the outer layer, meaning we can know where we end + return Some(n); + } + Token::ArrayEnd => arrays = arrays.saturating_sub(1), + Token::Object { .. } => objects += 1, + Token::ObjectEnd => objects = objects.saturating_sub(1), + _ => {} + } + + n += 1; + } + } +} + +impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { + fn set_bounded(&mut self, length: usize) -> error_stack::Result<(), ArrayAccessError> { + if self.consumed > 0 { + return Err( + Report::new(BoundedContractViolationError::SetDirty.into_error()) + .change_context(ArrayAccessError), + ); + } + + if self.remaining.is_some() { + return Err(Report::new( + BoundedContractViolationError::SetCalledMultipleTimes.into_error(), + ) + .change_context(ArrayAccessError)); + } + + self.remaining = Some(length); + + Ok(()) + } + + fn next(&mut self) -> Option> + where + T: Deserialize<'de>, + { + self.consumed += 1; + + if matches!(self.deserializer.peek(), Token::ArrayEnd) { + // we have reached the ending, if `self.remaining` is set we use the `DeserializerNone` + // to deserialize any values that require `None` + if let Some(remaining) = &mut self.remaining { + if *remaining == 0 { + return None; + } + + *remaining = remaining.saturating_sub(1); + + let value = T::deserialize(DeserializerNone { + context: self.deserializer.context, + }); + + Some(value.change_context(ArrayAccessError)) + } else { + None + } + } else { + let value = T::deserialize(&mut *self.deserializer); + Some(value.change_context(ArrayAccessError)) + } + } + + fn size_hint(&self) -> Option { + self.length + } + + fn end(self) -> error_stack::Result<(), ArrayAccessError> { + let mut result = Ok(()); + + // ensure that we consume the last token, if it is the wrong token error out + if !matches!(self.deserializer.peek(), Token::ArrayEnd) { + let mut error = Report::new(ArrayLengthError.into_error()) + .attach(ExpectedLength::new(self.consumed)); + + if let Some(length) = self.size_hint() { + error = error.attach(ReceivedLength::new(length)); + } + + result = Err(error); + } + + // bump until the very end, which ensures that deserialize calls after this might succeed! + let bump = self + .scan_end() + .unwrap_or_else(|| self.deserializer.tokens.remaining()); + self.deserializer.tokens.bump_n(bump); + + if let Some(remaining) = self.remaining { + if remaining > 0 { + let error = + Report::new(BoundedContractViolationError::EndRemainingItems.into_error()); + + match &mut result { + Err(result) => result.extend_one(error), + result => *result = Err(error), + } + } + } + + result.change_context(ArrayAccessError) + } +} diff --git a/packages/libs/deer/desert/src/de.rs b/packages/libs/deer/desert/src/de.rs deleted file mode 100644 index c70f48416ce..00000000000 --- a/packages/libs/deer/desert/src/de.rs +++ /dev/null @@ -1,705 +0,0 @@ -use alloc::borrow::{Cow, ToOwned}; -use core::{ - ops::{Deref, Range}, - slice::SliceIndex, -}; - -use bitvec::{ - boxed::BitBox, - order::Lsb0, - slice::{BitSlice, BitSliceIndex}, - vec::BitVec, -}; -use deer::{ - error::{ - ArrayAccessError, ArrayLengthError, BoundedContractViolationError, DeserializerError, - ExpectedLength, ObjectAccessError, ObjectItemsExtraError, ReceivedLength, Variant, - }, - Context, Deserialize, Visitor, -}; -use error_stack::{Report, Result, ResultExt}; - -use crate::token::Token; - -macro_rules! forward { - ($($method:ident),*) => { - $( - fn $method(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_any(visitor) - } - )* - }; -} - -#[derive(Debug)] -enum Trivia<'a> { - Owned(BitBox), - Slice(&'a BitSlice), -} - -impl<'a> Deref for Trivia<'a> { - type Target = BitSlice; - - fn deref(&self) -> &Self::Target { - match self { - Trivia::Owned(value) => value.as_bitslice(), - Trivia::Slice(value) => *value, - } - } -} - -impl<'a> Trivia<'a> { - fn to_mut(&mut self) -> &mut BitSlice { - match self { - Trivia::Owned(value) => value.as_mut_bitslice(), - Trivia::Slice(value) => { - let owned = BitBox::from_bitslice(*value); - *self = Self::Owned(owned); - - self.to_mut() - } - } - } -} - -#[derive(Debug)] -struct Tape<'a, 'de> { - tokens: &'de [Token], - trivia: Trivia<'a>, -} - -impl Tape<'_, '_> { - fn empty() -> Self { - Self { - tokens: &[], - trivia: Trivia::Slice(BitSlice::empty()), - } - } -} - -impl<'a, 'de> Tape<'a, 'de> { - // also includes trivia - fn peek_all_n(&self, n: usize) -> Option { - self.tokens.get(n).copied() - } - - fn is_trivia_n(&self, n: usize) -> Option { - self.trivia.get(n).as_deref().copied() - } - - fn set_trivia(&mut self, mut range: Range) { - // automatically adjust so that we're able to always index to the end, even if the the end - // is out of bounds - if range.end >= self.tokens.len() && range.start < self.tokens.len() { - range.end = self.tokens.len(); - } - - if let Some(slice) = self.trivia.to_mut().get_mut(range) { - slice.fill(true); - } - } - - fn peek_n(&self, n: usize) -> Option { - let mut offset = 0; - let mut m = 0; - - while m != n { - if !self.is_trivia_n(offset)? { - m += 1; - } - - offset += 1; - } - - self.peek_all_n(m) - } - - fn peek(&self) -> Option { - let mut n = 0; - - while self.is_trivia_n(n)? { - n += 1; - } - - self.peek_all_n(n) - } - - fn bump(&mut self) -> Option<(Token, bool)> { - // naive version of bump, which just takes the token and returns it with the status - let (token, tokens) = self.tokens.split_first()?; - let is_trivia = *self.trivia.get(0)?; - // use trivia like a feed tape, this avoid reallocation - self.trivia.to_mut().shift_left(1); - self.tokens = tokens; - - Some((*token, is_trivia)) - } - - fn bump_n(&mut self, i: usize) { - for _ in 0..i { - self.bump(); - } - } - - fn next(&mut self) -> Option { - loop { - let (token, is_trivia) = self.bump()?; - - if !is_trivia { - return Some(token); - } - } - } - - fn remaining(&self) -> usize { - self.tokens.len() - } - - fn is_empty(&self) -> bool { - self.tokens.is_empty() - } - - fn view<'b, B>(&'b self, n: B) -> Option> - where - B: BitSliceIndex<'b, usize, Lsb0, Immut = &'b BitSlice> - + SliceIndex<[Token], Output = [Token]> - + Clone, - { - let tokens = self.tokens.get(n.clone())?; - let trivia = self.trivia.get(n)?; - - Some(Tape { - tokens, - trivia: Trivia::Slice(trivia), - }) - } -} - -impl<'de> From<&'de [Token]> for Tape<'_, 'de> { - fn from(value: &'de [Token]) -> Self { - Self { - tokens: value, - trivia: Trivia::Owned(BitVec::repeat(false, value.len()).into_boxed_bitslice()), - } - } -} - -#[derive(Debug)] -pub struct Deserializer<'a, 'de> { - context: &'a Context, - tokens: Tape<'a, 'de>, -} - -impl<'a, 'de> Deserializer<'a, 'de> { - fn erase(&mut self, range: Range) { - self.tokens.set_trivia(range); - } -} - -impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { - forward!( - deserialize_null, - deserialize_bool, - deserialize_number, - deserialize_char, - deserialize_string, - deserialize_str, - deserialize_bytes, - deserialize_bytes_buffer, - deserialize_array, - deserialize_object - ); - - fn context(&self) -> &Context { - self.context - } - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let token = self.next(); - - match token { - Token::Bool(value) => visitor.visit_bool(value), - Token::Number(value) => visitor.visit_number(value.clone()), - Token::Char(value) => visitor.visit_char(value), - Token::Str(value) => visitor.visit_str(value), - Token::BorrowedStr(value) => visitor.visit_borrowed_str(value), - Token::String(value) => visitor.visit_string(value.to_owned()), - Token::Bytes(value) => visitor.visit_bytes(value), - Token::BorrowedBytes(value) => visitor.visit_borrowed_bytes(value), - Token::BytesBuf(value) => visitor.visit_bytes_buffer(value.to_vec()), - Token::Array { length } => visitor.visit_array(ArrayAccess::new(self, length)), - Token::Object { length } => visitor.visit_object(ObjectAccess::new(self, length)), - _ => { - panic!("Deserializer did not expect {token}"); - } - } - .change_context(DeserializerError) - } -} - -impl<'a, 'de> Deserializer<'a, 'de> { - pub fn new(tokens: &'de [Token], context: &'a Context) -> Self { - Self { - tokens: tokens.into(), - context, - } - } - - fn peek(&self) -> Token { - self.tokens - .peek() - .expect("should have token to deserialize") - } - - fn peek_n(&self, n: usize) -> Option { - self.tokens.peek_n(n) - } - - fn next(&mut self) -> Token { - self.tokens - .next() - .expect("should have token to deserialize") - } - - pub fn remaining(&self) -> usize { - self.tokens.remaining() - } - - pub fn is_empty(&self) -> bool { - self.tokens.is_empty() - } -} - -#[derive(Debug)] -struct DeserializerNone<'a> { - context: &'a Context, -} - -impl<'de> deer::Deserializer<'de> for DeserializerNone<'_> { - forward!( - deserialize_null, - deserialize_bool, - deserialize_number, - deserialize_char, - deserialize_string, - deserialize_str, - deserialize_bytes, - deserialize_bytes_buffer, - deserialize_array, - deserialize_object - ); - - fn context(&self) -> &Context { - self.context - } - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_none().change_context(DeserializerError) - } -} - -struct ArrayAccess<'a, 'b, 'de: 'a> { - deserializer: &'a mut Deserializer<'b, 'de>, - - length: Option, - remaining: Option, - consumed: usize, -} - -impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { - pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { - Self { - deserializer, - consumed: 0, - length, - remaining: None, - } - } - - fn scan_end(&self) -> Option { - let mut objects: usize = 0; - let mut arrays: usize = 0; - - let mut n = 0; - - loop { - let token = self.deserializer.peek_n(n)?; - - match token { - Token::Array { .. } => arrays += 1, - Token::ArrayEnd if arrays == 0 && objects == 0 => { - // we're at the outer layer, meaning we can know where we end - return Some(n); - } - Token::ArrayEnd => arrays = arrays.saturating_sub(1), - Token::Object { .. } => objects += 1, - Token::ObjectEnd => objects = objects.saturating_sub(1), - _ => {} - } - - n += 1; - } - } -} - -impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { - fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { - if self.consumed > 0 { - return Err( - Report::new(BoundedContractViolationError::SetDirty.into_error()) - .change_context(ArrayAccessError), - ); - } - - if self.remaining.is_some() { - return Err(Report::new( - BoundedContractViolationError::SetCalledMultipleTimes.into_error(), - ) - .change_context(ArrayAccessError)); - } - - self.remaining = Some(length); - - Ok(()) - } - - fn next(&mut self) -> Option> - where - T: Deserialize<'de>, - { - self.consumed += 1; - - if matches!(self.deserializer.peek(), Token::ArrayEnd) { - // we have reached the ending, if `self.remaining` is set we use the `DeserializerNone` - // to deserialize any values that require `None` - if let Some(remaining) = &mut self.remaining { - if *remaining == 0 { - return None; - } - - *remaining = remaining.saturating_sub(1); - - let value = T::deserialize(DeserializerNone { - context: self.deserializer.context, - }); - - Some(value.change_context(ArrayAccessError)) - } else { - None - } - } else { - let value = T::deserialize(&mut *self.deserializer); - Some(value.change_context(ArrayAccessError)) - } - } - - fn size_hint(&self) -> Option { - self.length - } - - fn end(self) -> Result<(), ArrayAccessError> { - let mut result = Ok(()); - - // ensure that we consume the last token, if it is the wrong token error out - if !matches!(self.deserializer.peek(), Token::ArrayEnd) { - let mut error = Report::new(ArrayLengthError.into_error()) - .attach(ExpectedLength::new(self.consumed)); - - if let Some(length) = self.size_hint() { - error = error.attach(ReceivedLength::new(length)); - } - - result = Err(error); - } - - // bump until the very end, which ensures that deserialize calls after this might succeed! - let bump = self - .scan_end() - .unwrap_or_else(|| self.deserializer.tokens.remaining()); - self.deserializer.tokens.bump_n(bump); - - if let Some(remaining) = self.remaining { - if remaining > 0 { - let error = - Report::new(BoundedContractViolationError::EndRemainingItems.into_error()); - - match &mut result { - Err(result) => result.extend_one(error), - result => *result = Err(error), - } - } - } - - result.change_context(ArrayAccessError) - } -} - -struct ObjectAccess<'a, 'b, 'de: 'a> { - deserializer: &'a mut Deserializer<'b, 'de>, - - length: Option, - remaining: Option, - consumed: usize, -} - -impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { - pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { - Self { - deserializer, - length, - remaining: None, - consumed: 0, - } - } - - // This assumes that Str and such are atomic, meaning `Str Str` as a deserialize value is - // considered invalid, as that should use `ArrayAccess` instead. - fn scan(&self, key: &str) -> Option { - let mut objects: usize = 0; - let mut arrays: usize = 0; - let mut n = 0; - - #[derive(Copy, Clone, Eq, PartialEq)] - enum State { - Key, - Value, - } - - impl State { - fn flip(&mut self) { - match *self { - State::Key => *self = State::Value, - State::Value => *self = State::Key, - } - } - } - - let mut state = State::Key; - - loop { - let next = self.deserializer.peek_n(n)?; - - match next { - Token::Array { .. } => arrays += 1, - Token::ArrayEnd => arrays = arrays.saturating_sub(1), - Token::Object { .. } => objects += 1, - Token::ObjectEnd if objects == 0 && arrays == 0 => { - // this is for the outer layer (that's us), therefore we can abort our linear - // search - return None; - } - Token::ObjectEnd => objects = objects.saturating_sub(1), - Token::Str(value) | Token::BorrowedStr(value) | Token::String(value) - if objects == 0 && arrays == 0 && value == key && state == State::Key => - { - // we found an element that matches the element value that is next in line - return Some(n); - } - _ => {} - } - - if arrays == 0 && objects == 0 { - // we're dependent on the fact if something is a key or value, if we're not nested - // then we can switch the state. - state.flip(); - } - - n += 1; - } - } - - fn scan_end(&self) -> Option { - let mut objects: usize = 0; - let mut arrays: usize = 0; - - let mut n = 0; - - loop { - let token = self.deserializer.peek_n(n)?; - - match token { - Token::Array { .. } => arrays += 1, - Token::ArrayEnd => arrays = arrays.saturating_sub(1), - Token::Object { .. } => objects += 1, - Token::ObjectEnd if arrays == 0 && objects == 0 => { - // we're at the outer layer, meaning we can know where we end - return Some(n); - } - Token::ObjectEnd => objects = objects.saturating_sub(1), - _ => {} - } - - n += 1; - } - } -} - -// TODO: for value we need a scan for some sorts, and then need to replace/remove the elements from -// the stream -impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { - fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { - if self.consumed > 0 { - return Err( - Report::new(BoundedContractViolationError::SetDirty.into_error()) - .change_context(ObjectAccessError), - ); - } - - if self.remaining.is_some() { - return Err(Report::new( - BoundedContractViolationError::SetCalledMultipleTimes.into_error(), - ) - .change_context(ObjectAccessError)); - } - - self.remaining = Some(length); - - Ok(()) - } - - fn value(&mut self, key: &str) -> Result - where - T: Deserialize<'de>, - { - if self.remaining == Some(0) { - return T::deserialize(DeserializerNone { - context: self.deserializer.context, - }) - .change_context(ObjectAccessError); - } - - self.consumed += 1; - - if let Some(remaining) = &mut self.remaining { - *remaining = remaining.saturating_sub(1); - } - - match self.scan(key) { - Some(offset) => { - // now we need to figure out which values are used, we can do this through offset - // calculations - let remaining = self.deserializer.remaining() - offset; - - let tape = self.deserializer.tokens.view(offset + 1..); - - let mut deserializer = Deserializer { - tokens: tape.unwrap_or_else(Tape::empty), - context: self.deserializer.context, - }; - - let value = T::deserialize(&mut deserializer); - - let erase = remaining - deserializer.remaining(); - drop(deserializer); - - self.deserializer.erase(offset..offset + erase); - - value - } - None => T::deserialize(DeserializerNone { - context: self.deserializer.context, - }), - } - .change_context(ObjectAccessError) - } - - fn next(&mut self) -> Option> - where - K: Deserialize<'de>, - V: Deserialize<'de>, - { - if self.remaining == Some(0) { - return None; - } - - self.consumed += 1; - - if let Some(remaining) = &mut self.remaining { - *remaining = remaining.saturating_sub(1); - } - - let (key, value) = if matches!(self.deserializer.peek(), Token::ObjectEnd) { - // we're not in bounded mode, which means we need to signal that we're done - self.remaining?; - - if self.remaining.is_some() { - let key = K::deserialize(DeserializerNone { - context: self.deserializer.context, - }); - let value = V::deserialize(DeserializerNone { - context: self.deserializer.context, - }); - - (key, value) - } else { - return None; - } - } else { - let key = K::deserialize(&mut *self.deserializer); - let value = V::deserialize(&mut *self.deserializer); - - (key, value) - }; - - let result = match (key, value) { - (Err(mut key), Err(value)) => { - key.extend_one(value); - - Err(key.change_context(ObjectAccessError)) - } - (Err(error), _) | (_, Err(error)) => Err(error.change_context(ObjectAccessError)), - (Ok(key), Ok(value)) => Ok((key, value)), - }; - - Some(result) - } - - fn size_hint(&self) -> Option { - self.length - } - - fn end(self) -> Result<(), ObjectAccessError> { - let mut result = Ok(()); - - // ensure that we consume the last token, if it is the wrong token error out - if !matches!(self.deserializer.peek(), Token::ObjectEnd) { - let mut error = Report::new(ObjectItemsExtraError.into_error()) - .attach(ExpectedLength::new(self.consumed)); - - if let Some(length) = self.size_hint() { - error = error.attach(ReceivedLength::new(length)); - } - - result = Err(error); - } - - // bump until the very end, which ensures that deserialize calls after this might succeed! - let bump = self - .scan_end() - .unwrap_or_else(|| self.deserializer.tokens.remaining()); - self.deserializer.tokens.bump_n(bump); - - if let Some(remaining) = self.remaining { - if remaining > 0 { - let error = - Report::new(BoundedContractViolationError::EndRemainingItems.into_error()); - - match &mut result { - Err(result) => result.extend_one(error), - result => *result = Err(error), - } - } - } - - result.change_context(ObjectAccessError) - } -} diff --git a/packages/libs/deer/desert/src/deserializer.rs b/packages/libs/deer/desert/src/deserializer.rs new file mode 100644 index 00000000000..b8df4abae34 --- /dev/null +++ b/packages/libs/deer/desert/src/deserializer.rs @@ -0,0 +1,307 @@ +use alloc::borrow::ToOwned; +use core::{ + ops::{Deref, Range}, + slice::SliceIndex, +}; + +use bitvec::{ + boxed::BitBox, + order::Lsb0, + slice::{BitSlice, BitSliceIndex}, + vec::BitVec, +}; +use deer::{ + Context, + error::DeserializerError, Visitor, +}; +use error_stack::{Result, ResultExt}; +use crate::array::ArrayAccess; +use crate::object::ObjectAccess; + +use crate::token::Token; + +macro_rules! forward { + ($($method:ident),*) => { + $( + fn $method(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_any(visitor) + } + )* + }; +} + +#[derive(Debug)] +enum Trivia<'a> { + Owned(BitBox), + Slice(&'a BitSlice), +} + +impl<'a> Deref for Trivia<'a> { + type Target = BitSlice; + + fn deref(&self) -> &Self::Target { + match self { + Trivia::Owned(value) => value.as_bitslice(), + Trivia::Slice(value) => *value, + } + } +} + +impl<'a> Trivia<'a> { + fn to_mut(&mut self) -> &mut BitSlice { + match self { + Trivia::Owned(value) => value.as_mut_bitslice(), + Trivia::Slice(value) => { + let owned = BitBox::from_bitslice(*value); + *self = Self::Owned(owned); + + self.to_mut() + } + } + } +} + +#[derive(Debug)] +struct Tape<'a, 'de> { + tokens: &'de [Token], + trivia: Trivia<'a>, +} + +impl Tape<'_, '_> { + fn empty() -> Self { + Self { + tokens: &[], + trivia: Trivia::Slice(BitSlice::empty()), + } + } +} + +impl<'a, 'de> Tape<'a, 'de> { + // also includes trivia + fn peek_all_n(&self, n: usize) -> Option { + self.tokens.get(n).copied() + } + + fn is_trivia_n(&self, n: usize) -> Option { + self.trivia.get(n).as_deref().copied() + } + + fn set_trivia(&mut self, mut range: Range) { + // automatically adjust so that we're able to always index to the end, even if the the end + // is out of bounds + if range.end >= self.tokens.len() && range.start < self.tokens.len() { + range.end = self.tokens.len(); + } + + if let Some(slice) = self.trivia.to_mut().get_mut(range) { + slice.fill(true); + } + } + + fn peek_n(&self, n: usize) -> Option { + let mut offset = 0; + let mut m = 0; + + while m != n { + if !self.is_trivia_n(offset)? { + m += 1; + } + + offset += 1; + } + + self.peek_all_n(m) + } + + fn peek(&self) -> Option { + let mut n = 0; + + while self.is_trivia_n(n)? { + n += 1; + } + + self.peek_all_n(n) + } + + fn bump(&mut self) -> Option<(Token, bool)> { + // naive version of bump, which just takes the token and returns it with the status + let (token, tokens) = self.tokens.split_first()?; + let is_trivia = *self.trivia.get(0)?; + // use trivia like a feed tape, this avoid reallocation + self.trivia.to_mut().shift_left(1); + self.tokens = tokens; + + Some((*token, is_trivia)) + } + + fn bump_n(&mut self, i: usize) { + for _ in 0..i { + self.bump(); + } + } + + fn next(&mut self) -> Option { + loop { + let (token, is_trivia) = self.bump()?; + + if !is_trivia { + return Some(token); + } + } + } + + fn remaining(&self) -> usize { + self.tokens.len() + } + + fn is_empty(&self) -> bool { + self.tokens.is_empty() + } + + fn view<'b, B>(&'b self, n: B) -> Option> + where + B: BitSliceIndex<'b, usize, Lsb0, Immut = &'b BitSlice> + + SliceIndex<[Token], Output = [Token]> + + Clone, + { + let tokens = self.tokens.get(n.clone())?; + let trivia = self.trivia.get(n)?; + + Some(Tape { + tokens, + trivia: Trivia::Slice(trivia), + }) + } +} + +impl<'de> From<&'de [Token]> for Tape<'_, 'de> { + fn from(value: &'de [Token]) -> Self { + Self { + tokens: value, + trivia: Trivia::Owned(BitVec::repeat(false, value.len()).into_boxed_bitslice()), + } + } +} + +#[derive(Debug)] +pub struct Deserializer<'a, 'de> { + context: &'a Context, + tokens: Tape<'a, 'de>, +} + +impl<'a, 'de> Deserializer<'a, 'de> { + fn erase(&mut self, range: Range) { + self.tokens.set_trivia(range); + } +} + +impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { + forward!( + deserialize_null, + deserialize_bool, + deserialize_number, + deserialize_char, + deserialize_string, + deserialize_str, + deserialize_bytes, + deserialize_bytes_buffer, + deserialize_array, + deserialize_object + ); + + fn context(&self) -> &Context { + self.context + } + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let token = self.next(); + + match token { + Token::Bool(value) => visitor.visit_bool(value), + Token::Number(value) => visitor.visit_number(value.clone()), + Token::Char(value) => visitor.visit_char(value), + Token::Str(value) => visitor.visit_str(value), + Token::BorrowedStr(value) => visitor.visit_borrowed_str(value), + Token::String(value) => visitor.visit_string(value.to_owned()), + Token::Bytes(value) => visitor.visit_bytes(value), + Token::BorrowedBytes(value) => visitor.visit_borrowed_bytes(value), + Token::BytesBuf(value) => visitor.visit_bytes_buffer(value.to_vec()), + Token::Array { length } => visitor.visit_array(ArrayAccess::new(self, length)), + Token::Object { length } => visitor.visit_object(ObjectAccess::new(self, length)), + _ => { + panic!("Deserializer did not expect {token}"); + } + } + .change_context(DeserializerError) + } +} + +impl<'a, 'de> Deserializer<'a, 'de> { + pub fn new(tokens: &'de [Token], context: &'a Context) -> Self { + Self { + tokens: tokens.into(), + context, + } + } + + fn peek(&self) -> Token { + self.tokens + .peek() + .expect("should have token to deserialize") + } + + fn peek_n(&self, n: usize) -> Option { + self.tokens.peek_n(n) + } + + fn next(&mut self) -> Token { + self.tokens + .next() + .expect("should have token to deserialize") + } + + pub fn remaining(&self) -> usize { + self.tokens.remaining() + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} + +#[derive(Debug)] +struct DeserializerNone<'a> { + context: &'a Context, +} + +impl<'de> deer::Deserializer<'de> for DeserializerNone<'_> { + forward!( + deserialize_null, + deserialize_bool, + deserialize_number, + deserialize_char, + deserialize_string, + deserialize_str, + deserialize_bytes, + deserialize_bytes_buffer, + deserialize_array, + deserialize_object + ); + + fn context(&self) -> &Context { + self.context + } + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_none().change_context(DeserializerError) + } +} diff --git a/packages/libs/deer/desert/src/lib.rs b/packages/libs/deer/desert/src/lib.rs index 427e024c689..431f39f43ea 100644 --- a/packages/libs/deer/desert/src/lib.rs +++ b/packages/libs/deer/desert/src/lib.rs @@ -2,7 +2,10 @@ extern crate alloc; -mod de; +mod array; +mod deserializer; +mod object; +pub(crate) mod tape; mod token; pub fn add(left: usize, right: usize) -> usize { diff --git a/packages/libs/deer/desert/src/object.rs b/packages/libs/deer/desert/src/object.rs new file mode 100644 index 00000000000..3da746116b8 --- /dev/null +++ b/packages/libs/deer/desert/src/object.rs @@ -0,0 +1,274 @@ +use deer::{ + error::{ + BoundedContractViolationError, ExpectedLength, ObjectAccessError, ObjectLengthError, + ReceivedLength, Variant, + }, + Deserialize, +}; +use error_stack::{Report, ResultExt}; + +use crate::{ + deserializer::{Deserializer, DeserializerNone, Tape}, + token::Token, +}; + +pub struct ObjectAccess<'a, 'b, 'de: 'a> { + deserializer: &'a mut Deserializer<'b, 'de>, + + length: Option, + remaining: Option, + consumed: usize, +} + +impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { + pub fn new(deserializer: &'a mut Deserializer<'b, 'de>, length: Option) -> Self { + Self { + deserializer, + length, + remaining: None, + consumed: 0, + } + } + + // This assumes that Str and such are atomic, meaning `Str Str` as a deserialize value is + // considered invalid, as that should use `ArrayAccess` instead. + fn scan(&self, key: &str) -> Option { + let mut objects: usize = 0; + let mut arrays: usize = 0; + let mut n = 0; + + #[derive(Copy, Clone, Eq, PartialEq)] + enum State { + Key, + Value, + } + + impl State { + fn flip(&mut self) { + match *self { + State::Key => *self = State::Value, + State::Value => *self = State::Key, + } + } + } + + let mut state = State::Key; + + loop { + let next = self.deserializer.peek_n(n)?; + + match next { + Token::Array { .. } => arrays += 1, + Token::ArrayEnd => arrays = arrays.saturating_sub(1), + Token::Object { .. } => objects += 1, + Token::ObjectEnd if objects == 0 && arrays == 0 => { + // this is for the outer layer (that's us), therefore we can abort our linear + // search + return None; + } + Token::ObjectEnd => objects = objects.saturating_sub(1), + Token::Str(value) | Token::BorrowedStr(value) | Token::String(value) + if objects == 0 && arrays == 0 && value == key && state == State::Key => + { + // we found an element that matches the element value that is next in line + return Some(n); + } + _ => {} + } + + if arrays == 0 && objects == 0 { + // we're dependent on the fact if something is a key or value, if we're not nested + // then we can switch the state. + state.flip(); + } + + n += 1; + } + } + + fn scan_end(&self) -> Option { + let mut objects: usize = 0; + let mut arrays: usize = 0; + + let mut n = 0; + + loop { + let token = self.deserializer.peek_n(n)?; + + match token { + Token::Array { .. } => arrays += 1, + Token::ArrayEnd => arrays = arrays.saturating_sub(1), + Token::Object { .. } => objects += 1, + Token::ObjectEnd if arrays == 0 && objects == 0 => { + // we're at the outer layer, meaning we can know where we end + return Some(n); + } + Token::ObjectEnd => objects = objects.saturating_sub(1), + _ => {} + } + + n += 1; + } + } +} + +// TODO: for value we need a scan for some sorts, and then need to replace/remove the elements from +// the stream +impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { + fn set_bounded(&mut self, length: usize) -> error_stack::Result<(), ObjectAccessError> { + if self.consumed > 0 { + return Err( + Report::new(BoundedContractViolationError::SetDirty.into_error()) + .change_context(ObjectAccessError), + ); + } + + if self.remaining.is_some() { + return Err(Report::new( + BoundedContractViolationError::SetCalledMultipleTimes.into_error(), + ) + .change_context(ObjectAccessError)); + } + + self.remaining = Some(length); + + Ok(()) + } + + fn value(&mut self, key: &str) -> error_stack::Result + where + T: Deserialize<'de>, + { + if self.remaining == Some(0) { + return T::deserialize(DeserializerNone { + context: self.deserializer.context, + }) + .change_context(ObjectAccessError); + } + + self.consumed += 1; + + if let Some(remaining) = &mut self.remaining { + *remaining = remaining.saturating_sub(1); + } + + match self.scan(key) { + Some(offset) => { + // now we need to figure out which values are used, we can do this through offset + // calculations + let remaining = self.deserializer.remaining() - offset; + + let tape = self.deserializer.tokens.view(offset + 1..); + + let mut deserializer = Deserializer { + tokens: tape.unwrap_or_else(Tape::empty), + context: self.deserializer.context, + }; + + let value = T::deserialize(&mut deserializer); + + let erase = remaining - deserializer.remaining(); + drop(deserializer); + + self.deserializer.erase(offset..offset + erase); + + value + } + None => T::deserialize(DeserializerNone { + context: self.deserializer.context, + }), + } + .change_context(ObjectAccessError) + } + + fn next(&mut self) -> Option> + where + K: Deserialize<'de>, + V: Deserialize<'de>, + { + if self.remaining == Some(0) { + return None; + } + + self.consumed += 1; + + if let Some(remaining) = &mut self.remaining { + *remaining = remaining.saturating_sub(1); + } + + let (key, value) = if matches!(self.deserializer.peek(), Token::ObjectEnd) { + // we're not in bounded mode, which means we need to signal that we're done + self.remaining?; + + if self.remaining.is_some() { + let key = K::deserialize(DeserializerNone { + context: self.deserializer.context, + }); + let value = V::deserialize(DeserializerNone { + context: self.deserializer.context, + }); + + (key, value) + } else { + return None; + } + } else { + let key = K::deserialize(&mut *self.deserializer); + let value = V::deserialize(&mut *self.deserializer); + + (key, value) + }; + + let result = match (key, value) { + (Err(mut key), Err(value)) => { + key.extend_one(value); + + Err(key.change_context(ObjectAccessError)) + } + (Err(error), _) | (_, Err(error)) => Err(error.change_context(ObjectAccessError)), + (Ok(key), Ok(value)) => Ok((key, value)), + }; + + Some(result) + } + + fn size_hint(&self) -> Option { + self.length + } + + fn end(self) -> error_stack::Result<(), ObjectAccessError> { + let mut result = Ok(()); + + // ensure that we consume the last token, if it is the wrong token error out + if !matches!(self.deserializer.peek(), Token::ObjectEnd) { + let mut error = Report::new(ObjectLengthError.into_error()) + .attach(ExpectedLength::new(self.consumed)); + + if let Some(length) = self.size_hint() { + error = error.attach(ReceivedLength::new(length)); + } + + result = Err(error); + } + + // bump until the very end, which ensures that deserialize calls after this might succeed! + let bump = self + .scan_end() + .unwrap_or_else(|| self.deserializer.tokens.remaining()); + self.deserializer.tokens.bump_n(bump); + + if let Some(remaining) = self.remaining { + if remaining > 0 { + let error = + Report::new(BoundedContractViolationError::EndRemainingItems.into_error()); + + match &mut result { + Err(result) => result.extend_one(error), + result => *result = Err(error), + } + } + } + + result.change_context(ObjectAccessError) + } +} diff --git a/packages/libs/deer/desert/src/tape.rs b/packages/libs/deer/desert/src/tape.rs new file mode 100644 index 00000000000..e69de29bb2d From b2b29f4725ddaea7f46eb2c9ae2ac3e7ff1595ff Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 19:19:04 +0100 Subject: [PATCH 14/18] feat: fix compiler error --- packages/libs/deer/desert/src/array.rs | 16 +- packages/libs/deer/desert/src/deserializer.rs | 220 +++--------------- packages/libs/deer/desert/src/lib.rs | 4 +- packages/libs/deer/desert/src/object.rs | 37 +-- packages/libs/deer/desert/src/tape.rs | 166 +++++++++++++ 5 files changed, 224 insertions(+), 219 deletions(-) diff --git a/packages/libs/deer/desert/src/array.rs b/packages/libs/deer/desert/src/array.rs index 595c9457464..15a97712e00 100644 --- a/packages/libs/deer/desert/src/array.rs +++ b/packages/libs/deer/desert/src/array.rs @@ -3,9 +3,9 @@ use deer::{ ArrayAccessError, ArrayLengthError, BoundedContractViolationError, ExpectedLength, ReceivedLength, Variant, }, - Deserialize, + Deserialize, Deserializer as _, }; -use error_stack::{Report, ResultExt}; +use error_stack::{Report, Result, ResultExt}; use crate::{ deserializer::{Deserializer, DeserializerNone}, @@ -57,7 +57,7 @@ impl<'a, 'b, 'de> ArrayAccess<'a, 'b, 'de> { } impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { - fn set_bounded(&mut self, length: usize) -> error_stack::Result<(), ArrayAccessError> { + fn set_bounded(&mut self, length: usize) -> Result<(), ArrayAccessError> { if self.consumed > 0 { return Err( Report::new(BoundedContractViolationError::SetDirty.into_error()) @@ -77,7 +77,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { Ok(()) } - fn next(&mut self) -> Option> + fn next(&mut self) -> Option> where T: Deserialize<'de>, { @@ -94,7 +94,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { *remaining = remaining.saturating_sub(1); let value = T::deserialize(DeserializerNone { - context: self.deserializer.context, + context: self.deserializer.context(), }); Some(value.change_context(ArrayAccessError)) @@ -111,7 +111,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { self.length } - fn end(self) -> error_stack::Result<(), ArrayAccessError> { + fn end(self) -> Result<(), ArrayAccessError> { let mut result = Ok(()); // ensure that we consume the last token, if it is the wrong token error out @@ -129,8 +129,8 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { // bump until the very end, which ensures that deserialize calls after this might succeed! let bump = self .scan_end() - .unwrap_or_else(|| self.deserializer.tokens.remaining()); - self.deserializer.tokens.bump_n(bump); + .unwrap_or_else(|| self.deserializer.tape().remaining()); + self.deserializer.tape_mut().bump_n(bump); if let Some(remaining) = self.remaining { if remaining > 0 { diff --git a/packages/libs/deer/desert/src/deserializer.rs b/packages/libs/deer/desert/src/deserializer.rs index b8df4abae34..3f4af18489d 100644 --- a/packages/libs/deer/desert/src/deserializer.rs +++ b/packages/libs/deer/desert/src/deserializer.rs @@ -1,24 +1,10 @@ use alloc::borrow::ToOwned; -use core::{ - ops::{Deref, Range}, - slice::SliceIndex, -}; +use core::ops::Range; -use bitvec::{ - boxed::BitBox, - order::Lsb0, - slice::{BitSlice, BitSliceIndex}, - vec::BitVec, -}; -use deer::{ - Context, - error::DeserializerError, Visitor, -}; +use deer::{error::DeserializerError, Context, Visitor}; use error_stack::{Result, ResultExt}; -use crate::array::ArrayAccess; -use crate::object::ObjectAccess; -use crate::token::Token; +use crate::{array::ArrayAccess, object::ObjectAccess, tape::Tape, token::Token}; macro_rules! forward { ($($method:ident),*) => { @@ -33,168 +19,15 @@ macro_rules! forward { }; } -#[derive(Debug)] -enum Trivia<'a> { - Owned(BitBox), - Slice(&'a BitSlice), -} - -impl<'a> Deref for Trivia<'a> { - type Target = BitSlice; - - fn deref(&self) -> &Self::Target { - match self { - Trivia::Owned(value) => value.as_bitslice(), - Trivia::Slice(value) => *value, - } - } -} - -impl<'a> Trivia<'a> { - fn to_mut(&mut self) -> &mut BitSlice { - match self { - Trivia::Owned(value) => value.as_mut_bitslice(), - Trivia::Slice(value) => { - let owned = BitBox::from_bitslice(*value); - *self = Self::Owned(owned); - - self.to_mut() - } - } - } -} - -#[derive(Debug)] -struct Tape<'a, 'de> { - tokens: &'de [Token], - trivia: Trivia<'a>, -} - -impl Tape<'_, '_> { - fn empty() -> Self { - Self { - tokens: &[], - trivia: Trivia::Slice(BitSlice::empty()), - } - } -} - -impl<'a, 'de> Tape<'a, 'de> { - // also includes trivia - fn peek_all_n(&self, n: usize) -> Option { - self.tokens.get(n).copied() - } - - fn is_trivia_n(&self, n: usize) -> Option { - self.trivia.get(n).as_deref().copied() - } - - fn set_trivia(&mut self, mut range: Range) { - // automatically adjust so that we're able to always index to the end, even if the the end - // is out of bounds - if range.end >= self.tokens.len() && range.start < self.tokens.len() { - range.end = self.tokens.len(); - } - - if let Some(slice) = self.trivia.to_mut().get_mut(range) { - slice.fill(true); - } - } - - fn peek_n(&self, n: usize) -> Option { - let mut offset = 0; - let mut m = 0; - - while m != n { - if !self.is_trivia_n(offset)? { - m += 1; - } - - offset += 1; - } - - self.peek_all_n(m) - } - - fn peek(&self) -> Option { - let mut n = 0; - - while self.is_trivia_n(n)? { - n += 1; - } - - self.peek_all_n(n) - } - - fn bump(&mut self) -> Option<(Token, bool)> { - // naive version of bump, which just takes the token and returns it with the status - let (token, tokens) = self.tokens.split_first()?; - let is_trivia = *self.trivia.get(0)?; - // use trivia like a feed tape, this avoid reallocation - self.trivia.to_mut().shift_left(1); - self.tokens = tokens; - - Some((*token, is_trivia)) - } - - fn bump_n(&mut self, i: usize) { - for _ in 0..i { - self.bump(); - } - } - - fn next(&mut self) -> Option { - loop { - let (token, is_trivia) = self.bump()?; - - if !is_trivia { - return Some(token); - } - } - } - - fn remaining(&self) -> usize { - self.tokens.len() - } - - fn is_empty(&self) -> bool { - self.tokens.is_empty() - } - - fn view<'b, B>(&'b self, n: B) -> Option> - where - B: BitSliceIndex<'b, usize, Lsb0, Immut = &'b BitSlice> - + SliceIndex<[Token], Output = [Token]> - + Clone, - { - let tokens = self.tokens.get(n.clone())?; - let trivia = self.trivia.get(n)?; - - Some(Tape { - tokens, - trivia: Trivia::Slice(trivia), - }) - } -} - -impl<'de> From<&'de [Token]> for Tape<'_, 'de> { - fn from(value: &'de [Token]) -> Self { - Self { - tokens: value, - trivia: Trivia::Owned(BitVec::repeat(false, value.len()).into_boxed_bitslice()), - } - } -} - #[derive(Debug)] pub struct Deserializer<'a, 'de> { context: &'a Context, - tokens: Tape<'a, 'de>, + tape: Tape<'a, 'de>, } impl<'a, 'de> Deserializer<'a, 'de> { - fn erase(&mut self, range: Range) { - self.tokens.set_trivia(range); + pub(crate) fn erase(&mut self, range: Range) { + self.tape.set_trivia(range); } } @@ -243,41 +76,46 @@ impl<'a, 'de> deer::Deserializer<'de> for &mut Deserializer<'a, 'de> { } impl<'a, 'de> Deserializer<'a, 'de> { + pub(crate) fn new_bare(tape: Tape<'a, 'de>, context: &'a Context) -> Self { + Self { tape, context } + } + pub fn new(tokens: &'de [Token], context: &'a Context) -> Self { - Self { - tokens: tokens.into(), - context, - } + Self::new_bare(tokens.into(), context) } - fn peek(&self) -> Token { - self.tokens - .peek() - .expect("should have token to deserialize") + pub(crate) fn peek(&self) -> Token { + self.tape.peek().expect("should have token to deserialize") } - fn peek_n(&self, n: usize) -> Option { - self.tokens.peek_n(n) + pub(crate) fn peek_n(&self, n: usize) -> Option { + self.tape.peek_n(n) } - fn next(&mut self) -> Token { - self.tokens - .next() - .expect("should have token to deserialize") + pub(crate) fn next(&mut self) -> Token { + self.tape.next().expect("should have token to deserialize") + } + + pub(crate) fn tape(&self) -> &Tape { + &self.tape + } + + pub(crate) fn tape_mut(&mut self) -> &mut Tape<'a, 'de> { + &mut self.tape } pub fn remaining(&self) -> usize { - self.tokens.remaining() + self.tape.remaining() } pub fn is_empty(&self) -> bool { - self.tokens.is_empty() + self.tape.is_empty() } } #[derive(Debug)] -struct DeserializerNone<'a> { - context: &'a Context, +pub(crate) struct DeserializerNone<'a> { + pub(crate) context: &'a Context, } impl<'de> deer::Deserializer<'de> for DeserializerNone<'_> { diff --git a/packages/libs/deer/desert/src/lib.rs b/packages/libs/deer/desert/src/lib.rs index 431f39f43ea..00204102cd3 100644 --- a/packages/libs/deer/desert/src/lib.rs +++ b/packages/libs/deer/desert/src/lib.rs @@ -2,9 +2,9 @@ extern crate alloc; -mod array; +pub(crate) mod array; mod deserializer; -mod object; +pub(crate) mod object; pub(crate) mod tape; mod token; diff --git a/packages/libs/deer/desert/src/object.rs b/packages/libs/deer/desert/src/object.rs index 3da746116b8..9d77e46d273 100644 --- a/packages/libs/deer/desert/src/object.rs +++ b/packages/libs/deer/desert/src/object.rs @@ -3,12 +3,13 @@ use deer::{ BoundedContractViolationError, ExpectedLength, ObjectAccessError, ObjectLengthError, ReceivedLength, Variant, }, - Deserialize, + Deserialize, Deserializer as _, }; -use error_stack::{Report, ResultExt}; +use error_stack::{Report, Result, ResultExt}; use crate::{ - deserializer::{Deserializer, DeserializerNone, Tape}, + deserializer::{Deserializer, DeserializerNone}, + tape::Tape, token::Token, }; @@ -115,7 +116,7 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { // TODO: for value we need a scan for some sorts, and then need to replace/remove the elements from // the stream impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { - fn set_bounded(&mut self, length: usize) -> error_stack::Result<(), ObjectAccessError> { + fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { if self.consumed > 0 { return Err( Report::new(BoundedContractViolationError::SetDirty.into_error()) @@ -135,13 +136,13 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { Ok(()) } - fn value(&mut self, key: &str) -> error_stack::Result + fn value(&mut self, key: &str) -> Result where T: Deserialize<'de>, { if self.remaining == Some(0) { return T::deserialize(DeserializerNone { - context: self.deserializer.context, + context: self.deserializer.context(), }) .change_context(ObjectAccessError); } @@ -158,12 +159,12 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { // calculations let remaining = self.deserializer.remaining() - offset; - let tape = self.deserializer.tokens.view(offset + 1..); + let tape = self.deserializer.tape().view(offset + 1..); - let mut deserializer = Deserializer { - tokens: tape.unwrap_or_else(Tape::empty), - context: self.deserializer.context, - }; + let mut deserializer = Deserializer::new_bare( + tape.unwrap_or_else(Tape::empty), + self.deserializer.context(), + ); let value = T::deserialize(&mut deserializer); @@ -175,13 +176,13 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { value } None => T::deserialize(DeserializerNone { - context: self.deserializer.context, + context: self.deserializer.context(), }), } .change_context(ObjectAccessError) } - fn next(&mut self) -> Option> + fn next(&mut self) -> Option> where K: Deserialize<'de>, V: Deserialize<'de>, @@ -202,10 +203,10 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { if self.remaining.is_some() { let key = K::deserialize(DeserializerNone { - context: self.deserializer.context, + context: self.deserializer.context(), }); let value = V::deserialize(DeserializerNone { - context: self.deserializer.context, + context: self.deserializer.context(), }); (key, value) @@ -236,7 +237,7 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { self.length } - fn end(self) -> error_stack::Result<(), ObjectAccessError> { + fn end(self) -> Result<(), ObjectAccessError> { let mut result = Ok(()); // ensure that we consume the last token, if it is the wrong token error out @@ -254,8 +255,8 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { // bump until the very end, which ensures that deserialize calls after this might succeed! let bump = self .scan_end() - .unwrap_or_else(|| self.deserializer.tokens.remaining()); - self.deserializer.tokens.bump_n(bump); + .unwrap_or_else(|| self.deserializer.tape().remaining()); + self.deserializer.tape_mut().bump_n(bump); if let Some(remaining) = self.remaining { if remaining > 0 { diff --git a/packages/libs/deer/desert/src/tape.rs b/packages/libs/deer/desert/src/tape.rs index e69de29bb2d..84577cf9ebc 100644 --- a/packages/libs/deer/desert/src/tape.rs +++ b/packages/libs/deer/desert/src/tape.rs @@ -0,0 +1,166 @@ +use core::{ + ops::{Deref, Range}, + slice::SliceIndex, +}; + +use bitvec::{ + boxed::BitBox, + order::Lsb0, + prelude::{BitSlice, BitVec}, + slice::BitSliceIndex, +}; + +use crate::token::Token; + +#[derive(Debug)] +enum Trivia<'a> { + Owned(BitBox), + Slice(&'a BitSlice), +} + +impl<'a> Deref for Trivia<'a> { + type Target = BitSlice; + + fn deref(&self) -> &Self::Target { + match self { + Trivia::Owned(value) => value.as_bitslice(), + Trivia::Slice(value) => *value, + } + } +} + +impl<'a> Trivia<'a> { + fn to_mut(&mut self) -> &mut BitSlice { + match self { + Trivia::Owned(value) => value.as_mut_bitslice(), + Trivia::Slice(value) => { + let owned = BitBox::from_bitslice(*value); + *self = Self::Owned(owned); + + self.to_mut() + } + } + } +} + +#[derive(Debug)] +pub struct Tape<'a, 'de> { + tokens: &'de [Token], + trivia: Trivia<'a>, +} + +impl Tape<'_, '_> { + pub(crate) fn empty() -> Self { + Self { + tokens: &[], + trivia: Trivia::Slice(BitSlice::empty()), + } + } +} + +impl<'a, 'de> Tape<'a, 'de> { + // also includes trivia + fn peek_all_n(&self, n: usize) -> Option { + self.tokens.get(n).copied() + } + + fn is_trivia_n(&self, n: usize) -> Option { + self.trivia.get(n).as_deref().copied() + } + + pub(crate) fn set_trivia(&mut self, mut range: Range) { + // automatically adjust so that we're able to always index to the end, even if the the end + // is out of bounds + if range.end >= self.tokens.len() && range.start < self.tokens.len() { + range.end = self.tokens.len(); + } + + if let Some(slice) = self.trivia.to_mut().get_mut(range) { + slice.fill(true); + } + } + + pub(crate) fn peek_n(&self, n: usize) -> Option { + let mut offset = 0; + let mut m = 0; + + while m != n { + if !self.is_trivia_n(offset)? { + m += 1; + } + + offset += 1; + } + + self.peek_all_n(m) + } + + pub(crate) fn peek(&self) -> Option { + let mut n = 0; + + while self.is_trivia_n(n)? { + n += 1; + } + + self.peek_all_n(n) + } + + fn bump(&mut self) -> Option<(Token, bool)> { + // naive version of bump, which just takes the token and returns it with the status + let (token, tokens) = self.tokens.split_first()?; + let is_trivia = *self.trivia.get(0)?; + // use trivia like a feed tape, this avoid reallocation + self.trivia.to_mut().shift_left(1); + self.tokens = tokens; + + Some((*token, is_trivia)) + } + + pub(crate) fn bump_n(&mut self, i: usize) { + for _ in 0..i { + self.bump(); + } + } + + pub(crate) fn next(&mut self) -> Option { + loop { + let (token, is_trivia) = self.bump()?; + + if !is_trivia { + return Some(token); + } + } + } + + pub(crate) fn remaining(&self) -> usize { + self.tokens.len() + } + + pub(crate) fn is_empty(&self) -> bool { + self.tokens.is_empty() + } + + pub(crate) fn view<'b, B>(&'b self, n: B) -> Option> + where + B: BitSliceIndex<'b, usize, Lsb0, Immut = &'b BitSlice> + + SliceIndex<[Token], Output = [Token]> + + Clone, + { + let tokens = self.tokens.get(n.clone())?; + let trivia = self.trivia.get(n)?; + + Some(Tape { + tokens, + trivia: Trivia::Slice(trivia), + }) + } +} + +impl<'de> From<&'de [Token]> for Tape<'_, 'de> { + fn from(value: &'de [Token]) -> Self { + Self { + tokens: value, + trivia: Trivia::Owned(BitVec::repeat(false, value.len()).into_boxed_bitslice()), + } + } +} From 78ee04a87fcfdb770a76a855df6178472da8089b Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 19:40:39 +0100 Subject: [PATCH 15/18] feat: example test (for now a single test) --- packages/libs/deer/desert/Cargo.toml | 1 + packages/libs/deer/desert/src/assert.rs | 50 ++++++++++++++++++ packages/libs/deer/desert/src/deserializer.rs | 2 +- packages/libs/deer/desert/src/lib.rs | 20 +++---- packages/libs/deer/desert/src/object.rs | 2 - packages/libs/deer/desert/src/token.rs | 52 ++++++++++++++++++- 6 files changed, 108 insertions(+), 19 deletions(-) create mode 100644 packages/libs/deer/desert/src/assert.rs diff --git a/packages/libs/deer/desert/Cargo.toml b/packages/libs/deer/desert/Cargo.toml index b46acc70811..72d1f130b85 100644 --- a/packages/libs/deer/desert/Cargo.toml +++ b/packages/libs/deer/desert/Cargo.toml @@ -10,4 +10,5 @@ publish = false [dependencies] deer = { path = ".." } error-stack = { version = "0.2.4", default_features = false } +serde_json = { version = "1.0.91", default_features = false, features = ['alloc'] } bitvec = { version = "1", default_features = false, features = ['alloc', 'atomic'] } diff --git a/packages/libs/deer/desert/src/assert.rs b/packages/libs/deer/desert/src/assert.rs new file mode 100644 index 00000000000..825760ae9ba --- /dev/null +++ b/packages/libs/deer/desert/src/assert.rs @@ -0,0 +1,50 @@ +use core::fmt::Debug; + +use deer::{error::ReportExt, Context, Deserialize}; +use serde_json::to_value; + +use crate::{deserializer::Deserializer, token::Token}; + +pub fn assert_tokens_with_context<'de, T>(expected: &T, tokens: &'de [Token], context: &Context) +where + T: Deserialize<'de> + PartialEq + Debug, +{ + let mut de = Deserializer::new(tokens, context); + let received = T::deserialize(&mut de).expect("should deserialize"); + + if de.remaining() > 0 { + panic!("{} remaining tokens", de.remaining()); + } + + assert_eq!(received, *expected); +} + +pub fn assert_tokens<'de, T>(value: &T, tokens: &'de [Token]) +where + T: Deserialize<'de> + PartialEq + Debug, +{ + assert_tokens_with_context(value, tokens, &Context::new()); +} + +pub fn assert_tokens_with_context_error<'de, T>( + error: &serde_json::Value, + tokens: &'de [Token], + context: &Context, +) where + T: Deserialize<'de> + Debug, +{ + let mut de = Deserializer::new(tokens, context); + let received = T::deserialize(&mut de).expect_err("value of type T should fail serialization"); + + let received = received.export(); + let received = to_value(received).expect("error should serialize"); + + assert_eq!(received, *error) +} + +pub fn assert_tokens_error<'de, T>(error: &serde_json::Value, tokens: &'de [Token]) +where + T: Deserialize<'de> + Debug, +{ + assert_tokens_with_context_error::(error, tokens, &Context::new()); +} diff --git a/packages/libs/deer/desert/src/deserializer.rs b/packages/libs/deer/desert/src/deserializer.rs index 3f4af18489d..cfd7c451d22 100644 --- a/packages/libs/deer/desert/src/deserializer.rs +++ b/packages/libs/deer/desert/src/deserializer.rs @@ -96,7 +96,7 @@ impl<'a, 'de> Deserializer<'a, 'de> { self.tape.next().expect("should have token to deserialize") } - pub(crate) fn tape(&self) -> &Tape { + pub(crate) fn tape(&self) -> &Tape<'a, 'de> { &self.tape } diff --git a/packages/libs/deer/desert/src/lib.rs b/packages/libs/deer/desert/src/lib.rs index 00204102cd3..7c7948124cf 100644 --- a/packages/libs/deer/desert/src/lib.rs +++ b/packages/libs/deer/desert/src/lib.rs @@ -3,22 +3,14 @@ extern crate alloc; pub(crate) mod array; +mod assert; mod deserializer; pub(crate) mod object; pub(crate) mod tape; mod token; -pub fn add(left: usize, right: usize) -> usize { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +pub use assert::{ + assert_tokens, assert_tokens_error, assert_tokens_with_context, + assert_tokens_with_context_error, +}; +pub use token::Token; diff --git a/packages/libs/deer/desert/src/object.rs b/packages/libs/deer/desert/src/object.rs index 9d77e46d273..b03db615b4a 100644 --- a/packages/libs/deer/desert/src/object.rs +++ b/packages/libs/deer/desert/src/object.rs @@ -113,8 +113,6 @@ impl<'a, 'b, 'de: 'a> ObjectAccess<'a, 'b, 'de> { } } -// TODO: for value we need a scan for some sorts, and then need to replace/remove the elements from -// the stream impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { fn set_bounded(&mut self, length: usize) -> Result<(), ObjectAccessError> { if self.consumed > 0 { diff --git a/packages/libs/deer/desert/src/token.rs b/packages/libs/deer/desert/src/token.rs index 87e289e9de2..92a53f430a9 100644 --- a/packages/libs/deer/desert/src/token.rs +++ b/packages/libs/deer/desert/src/token.rs @@ -5,6 +5,50 @@ use deer::Number; // TODO: test #[derive(Debug, Copy, Clone)] pub enum Token { + /// A serialized `bool` + /// + /// ``` + /// # use error_stack::ResultExt; + /// use deer::{ + /// error::{DeserializeError, VisitorError}, + /// Deserialize, Deserializer, Document, Reflection, Schema, Visitor, + /// }; + /// use deer_desert::{assert_tokens, Token}; + /// + /// #[derive(Debug, PartialEq)] + /// struct Bool(bool); + /// + /// impl Reflection for Bool { + /// fn schema(_: &mut Document) -> Schema { + /// Schema::new("boolean") + /// } + /// } + /// + /// impl<'de> Deserialize<'de> for Bool { + /// type Reflection = Self; + /// + /// fn deserialize>(de: D) -> error_stack::Result { + /// struct BoolVisitor; + /// + /// impl<'de> Visitor<'de> for BoolVisitor { + /// type Value = Bool; + /// + /// fn expecting(&self) -> Document { + /// Bool::reflection() + /// } + /// + /// fn visit_bool(self, v: bool) -> error_stack::Result { + /// Ok(Bool(v)) + /// } + /// } + /// + /// de.deserialize_bool(BoolVisitor) + /// .change_context(DeserializeError) + /// } + /// } + /// + /// assert_tokens(&Bool(true), &[Token::Bool(true)]) + /// ``` Bool(bool), Number(&'static Number), Char(char), @@ -14,9 +58,13 @@ pub enum Token { Bytes(&'static [u8]), BorrowedBytes(&'static [u8]), BytesBuf(&'static [u8]), - Array { length: Option }, + Array { + length: Option, + }, ArrayEnd, - Object { length: Option }, + Object { + length: Option, + }, ObjectEnd, } From 0089452ca2160c5749a5d607a012d3c0aba57815 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Wed, 21 Dec 2022 19:44:58 +0100 Subject: [PATCH 16/18] fix: lint --- packages/libs/deer/desert/src/tape.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/libs/deer/desert/src/tape.rs b/packages/libs/deer/desert/src/tape.rs index 84577cf9ebc..13f6ba67753 100644 --- a/packages/libs/deer/desert/src/tape.rs +++ b/packages/libs/deer/desert/src/tape.rs @@ -24,7 +24,7 @@ impl<'a> Deref for Trivia<'a> { fn deref(&self) -> &Self::Target { match self { Trivia::Owned(value) => value.as_bitslice(), - Trivia::Slice(value) => *value, + Trivia::Slice(value) => value, } } } From 460c2939cd94459447c162df018064d84d901c8b Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Thu, 5 Jan 2023 18:20:12 +0100 Subject: [PATCH 17/18] fet: `set_trivia` improve bounds checking --- packages/libs/deer/desert/src/tape.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/libs/deer/desert/src/tape.rs b/packages/libs/deer/desert/src/tape.rs index 13f6ba67753..20ad97a2215 100644 --- a/packages/libs/deer/desert/src/tape.rs +++ b/packages/libs/deer/desert/src/tape.rs @@ -68,13 +68,27 @@ impl<'a, 'de> Tape<'a, 'de> { self.trivia.get(n).as_deref().copied() } + /// ## Panics + /// + /// if range.start > range.end pub(crate) fn set_trivia(&mut self, mut range: Range) { + // ensure that the start range smaller than or equal to the end range + // doing this we can ensure that `0..1` is valid, but `1..0` is not. + assert!(range.start <= range.end); + // automatically adjust so that we're able to always index to the end, even if the the end // is out of bounds - if range.end >= self.tokens.len() && range.start < self.tokens.len() { + if range.end > self.tokens.len() { range.end = self.tokens.len(); } + // we have already asserted that `range.start <= range.end`, therefore if range.start is out + // of bounds, range.end must be out of bounds as well, in that case we do not need to fill + // the slice, as `.get_mut` will return `None` + if range.start >= self.tokens.len() { + return; + } + if let Some(slice) = self.trivia.to_mut().get_mut(range) { slice.fill(true); } From bf4455abbe162817b1bd73b07ac61d807b257286 Mon Sep 17 00:00:00 2001 From: Bilal Mahmoud Date: Thu, 5 Jan 2023 18:26:22 +0100 Subject: [PATCH 18/18] feat: derive `PartialEq` for `Token` --- packages/libs/deer/desert/src/array.rs | 4 ++-- packages/libs/deer/desert/src/object.rs | 4 ++-- packages/libs/deer/desert/src/token.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/libs/deer/desert/src/array.rs b/packages/libs/deer/desert/src/array.rs index 15a97712e00..b82824a1e0a 100644 --- a/packages/libs/deer/desert/src/array.rs +++ b/packages/libs/deer/desert/src/array.rs @@ -83,7 +83,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { { self.consumed += 1; - if matches!(self.deserializer.peek(), Token::ArrayEnd) { + if self.deserializer.peek() == Token::ArrayEnd { // we have reached the ending, if `self.remaining` is set we use the `DeserializerNone` // to deserialize any values that require `None` if let Some(remaining) = &mut self.remaining { @@ -115,7 +115,7 @@ impl<'de> deer::ArrayAccess<'de> for ArrayAccess<'_, '_, 'de> { let mut result = Ok(()); // ensure that we consume the last token, if it is the wrong token error out - if !matches!(self.deserializer.peek(), Token::ArrayEnd) { + if self.deserializer.peek() != Token::ArrayEnd { let mut error = Report::new(ArrayLengthError.into_error()) .attach(ExpectedLength::new(self.consumed)); diff --git a/packages/libs/deer/desert/src/object.rs b/packages/libs/deer/desert/src/object.rs index b03db615b4a..1d5575463d7 100644 --- a/packages/libs/deer/desert/src/object.rs +++ b/packages/libs/deer/desert/src/object.rs @@ -195,7 +195,7 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { *remaining = remaining.saturating_sub(1); } - let (key, value) = if matches!(self.deserializer.peek(), Token::ObjectEnd) { + let (key, value) = if self.deserializer.peek() == Token::ObjectEnd { // we're not in bounded mode, which means we need to signal that we're done self.remaining?; @@ -239,7 +239,7 @@ impl<'de> deer::ObjectAccess<'de> for ObjectAccess<'_, '_, 'de> { let mut result = Ok(()); // ensure that we consume the last token, if it is the wrong token error out - if !matches!(self.deserializer.peek(), Token::ObjectEnd) { + if self.deserializer.peek() != Token::ObjectEnd { let mut error = Report::new(ObjectLengthError.into_error()) .attach(ExpectedLength::new(self.consumed)); diff --git a/packages/libs/deer/desert/src/token.rs b/packages/libs/deer/desert/src/token.rs index 92a53f430a9..60351ee86bb 100644 --- a/packages/libs/deer/desert/src/token.rs +++ b/packages/libs/deer/desert/src/token.rs @@ -3,7 +3,7 @@ use core::fmt::{Debug, Display, Formatter}; use deer::Number; // TODO: test -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq)] pub enum Token { /// A serialized `bool` ///