Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use Cow in JsonValue #63

Merged
merged 8 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,24 @@ test_cases!(x100);
test_cases!(sentence);
test_cases!(unicode);

fn string_array_jiter_value_owned(bench: &mut Bencher) {
let json = read_file("./benches/string_array.json");
let json_data = json.as_bytes();
bench.iter(|| {
let v = JsonValue::parse_owned(black_box(json_data), false).unwrap();
black_box(v)
})
}

fn medium_response_jiter_value_owned(bench: &mut Bencher) {
let json = read_file("./benches/medium_response.json");
let json_data = json.as_bytes();
bench.iter(|| {
let v = JsonValue::parse_owned(black_box(json_data), false).unwrap();
black_box(v)
})
}

fn x100_serde_iter(bench: &mut Bencher) {
serde_str("./benches/x100.json", bench);
}
Expand Down Expand Up @@ -287,6 +305,7 @@ benchmark_group!(
massive_ints_array_serde_value,
medium_response_jiter_iter,
medium_response_jiter_value,
medium_response_jiter_value_owned,
medium_response_serde_value,
x100_jiter_iter,
x100_jiter_value,
Expand All @@ -306,6 +325,7 @@ benchmark_group!(
pass2_serde_value,
string_array_jiter_iter,
string_array_jiter_value,
string_array_jiter_value_owned,
string_array_serde_value,
true_array_jiter_iter,
true_array_jiter_value,
Expand Down
29 changes: 25 additions & 4 deletions src/jiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECU
use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange};
use crate::parse::{Parser, Peek};
use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape};
use crate::value::{take_value, JsonValue};
use crate::value::{take_value_borrowed, take_value_owned, JsonValue};
use crate::{JsonError, JsonErrorType};

pub type JiterResult<T> = Result<T, JiterError>;
Expand Down Expand Up @@ -198,7 +198,7 @@ impl<'j> Jiter<'j> {
}

/// Parse the next JSON value and return it as a [JsonValue]. Error if it is invalid JSON.
pub fn next_value(&mut self) -> JiterResult<JsonValue> {
pub fn next_value(&mut self) -> JiterResult<JsonValue<'j>> {
let peek = self.peek()?;
self.known_value(peek)
}
Expand All @@ -207,8 +207,29 @@ impl<'j> Jiter<'j> {
///
/// # Arguments
/// - `peek`: The [Peek] of the next JSON value.
pub fn known_value(&mut self, peek: Peek) -> JiterResult<JsonValue> {
take_value(
pub fn known_value(&mut self, peek: Peek) -> JiterResult<JsonValue<'j>> {
take_value_borrowed(
peek,
&mut self.parser,
&mut self.tape,
DEFAULT_RECURSION_LIMIT,
self.allow_inf_nan,
)
.map_err(Into::into)
}

/// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON.
pub fn next_value_owned(&mut self) -> JiterResult<JsonValue<'static>> {
let peek = self.peek()?;
self.known_value_owned(peek)
}

/// Parse the next JSON value and return it as a [JsonValue] with static lifetime. Error if it is invalid JSON.
///
/// # Arguments
/// - `peek`: The [Peek] of the next JSON value.
pub fn known_value_owned(&mut self, peek: Peek) -> JiterResult<JsonValue<'static>> {
take_value_owned(
peek,
&mut self.parser,
&mut self.tape,
Expand Down
18 changes: 16 additions & 2 deletions src/lazy_index_map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::Borrow;
use std::borrow::{Borrow, Cow};
use std::cmp::{Eq, PartialEq};
use std::fmt;
use std::hash::Hash;
Expand Down Expand Up @@ -30,7 +30,7 @@ impl<K: Clone, V: Clone> Clone for LazyIndexMap<K, V> {
fn clone(&self) -> Self {
Self {
vec: self.vec.clone(),
map: OnceLock::new(),
map: self.map.clone(),
last_find: AtomicUsize::new(0),
}
}
Expand Down Expand Up @@ -130,6 +130,20 @@ where
}
}

impl<'j> LazyIndexMap<Cow<'j, str>, crate::JsonValue<'j>> {
pub(crate) fn to_static(&self) -> LazyIndexMap<Cow<'static, str>, crate::JsonValue<'static>> {
LazyIndexMap {
vec: self
.vec
.iter()
.map(|(k, v)| (k.to_string().into(), v.to_static()))
.collect(),
map: OnceLock::new(),
last_find: AtomicUsize::new(0),
}
}
}

impl<K: PartialEq, V: PartialEq> PartialEq for LazyIndexMap<K, V> {
fn eq(&self, other: &Self) -> bool {
self.vec == other.vec
Expand Down
10 changes: 10 additions & 0 deletions src/string_decoder.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::ops::Range;
use std::str::{from_utf8, from_utf8_unchecked};

Expand Down Expand Up @@ -37,6 +38,15 @@ impl From<StringOutput<'_, '_>> for String {
}
}

impl<'t, 'j> From<StringOutput<'t, 'j>> for Cow<'j, str> {
fn from(val: StringOutput<'t, 'j>) -> Self {
match val {
StringOutput::Tape(s) => s.to_owned().into(),
StringOutput::Data(s) => s.into(),
}
}
}

impl<'t, 'j> StringOutput<'t, 'j> {
pub fn as_str(&self) -> &'t str {
match self {
Expand Down
123 changes: 98 additions & 25 deletions src/value.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::sync::Arc;

use num_bigint::BigInt;
Expand All @@ -7,26 +8,26 @@ use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT};
use crate::lazy_index_map::LazyIndexMap;
use crate::number_decoder::{NumberAny, NumberInt};
use crate::parse::{Parser, Peek};
use crate::string_decoder::{StringDecoder, Tape};
use crate::string_decoder::{StringDecoder, StringOutput, Tape};

/// Enum representing a JSON value.
#[derive(Clone, Debug, PartialEq)]
pub enum JsonValue {
pub enum JsonValue<'s> {
Null,
Bool(bool),
Int(i64),
BigInt(BigInt),
Float(f64),
Str(String),
Array(JsonArray),
Object(JsonObject),
Str(Cow<'s, str>),
Array(JsonArray<'s>),
Object(JsonObject<'s>),
}

pub type JsonArray = Arc<SmallVec<[JsonValue; 8]>>;
pub type JsonObject = Arc<LazyIndexMap<String, JsonValue>>;
pub type JsonArray<'s> = Arc<SmallVec<[JsonValue<'s>; 8]>>;
pub type JsonObject<'s> = Arc<LazyIndexMap<Cow<'s, str>, JsonValue<'s>>>;

#[cfg(feature = "python")]
impl pyo3::ToPyObject for JsonValue {
impl pyo3::ToPyObject for JsonValue<'_> {
fn to_object(&self, py: pyo3::Python<'_>) -> pyo3::PyObject {
use pyo3::prelude::*;
match self {
Expand All @@ -48,14 +49,51 @@ impl pyo3::ToPyObject for JsonValue {
}
}

impl JsonValue {
/// Parse a JSON value from a byte slice.
pub fn parse(data: &[u8], allow_inf_nan: bool) -> Result<Self, JsonError> {
impl<'j> JsonValue<'j> {
/// Parse a JSON enum from a byte slice, returning a borrowed version of the enum - e.g. strings can be
/// references into the original byte slice.
pub fn parse(data: &'j [u8], allow_inf_nan: bool) -> Result<Self, JsonError> {
let mut parser = Parser::new(data);

let mut tape = Tape::default();
let peek = parser.peek()?;
let v = take_value(peek, &mut parser, &mut tape, DEFAULT_RECURSION_LIMIT, allow_inf_nan)?;
let v = take_value_borrowed(peek, &mut parser, &mut tape, DEFAULT_RECURSION_LIMIT, allow_inf_nan)?;
parser.finish()?;
Ok(v)
}

/// Convert a borrowed JSON enum into an owned JSON enum.
pub fn into_static(self) -> JsonValue<'static> {
value_static(self)
}

/// Copy a borrowed JSON enum into an owned JSON enum.
pub fn to_static(&self) -> JsonValue<'static> {
value_static(self.clone())
}
}

fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
match v {
JsonValue::Null => JsonValue::Null,
JsonValue::Bool(b) => JsonValue::Bool(b),
JsonValue::Int(i) => JsonValue::Int(i),
JsonValue::BigInt(b) => JsonValue::BigInt(b),
JsonValue::Float(f) => JsonValue::Float(f),
JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()),
JsonValue::Array(v) => JsonValue::Array(Arc::new(v.iter().map(JsonValue::to_static).collect::<SmallVec<_>>())),
JsonValue::Object(o) => JsonValue::Object(Arc::new(o.to_static())),
}
}

impl JsonValue<'static> {
/// Parse a JSON enum from a byte slice, returning an owned version of the enum.
pub fn parse_owned(data: &[u8], allow_inf_nan: bool) -> Result<Self, JsonError> {
let mut parser = Parser::new(data);

let mut tape = Tape::default();
let peek = parser.peek()?;
let v = take_value_owned(peek, &mut parser, &mut tape, DEFAULT_RECURSION_LIMIT, allow_inf_nan)?;
parser.finish()?;
Ok(v)
}
Expand All @@ -74,13 +112,48 @@ macro_rules! check_recursion {
};
}

pub(crate) fn take_value(
pub(crate) fn take_value_borrowed<'j>(
peek: Peek,
parser: &mut Parser<'j>,
tape: &mut Tape,
recursion_limit: u8,
allow_inf_nan: bool,
) -> JsonResult<JsonValue<'j>> {
take_value(
peek,
parser,
tape,
recursion_limit,
allow_inf_nan,
&|s: StringOutput<'_, 'j>| s.into(),
)
}

pub(crate) fn take_value_owned<'j>(
peek: Peek,
parser: &mut Parser<'j>,
tape: &mut Tape,
recursion_limit: u8,
allow_inf_nan: bool,
) -> JsonResult<JsonValue<'static>> {
take_value(
peek,
parser,
tape,
recursion_limit,
allow_inf_nan,
&|s: StringOutput<'_, 'j>| Into::<String>::into(s).into(),
)
}

fn take_value<'j, 's>(
peek: Peek,
parser: &mut Parser,
parser: &mut Parser<'j>,
tape: &mut Tape,
mut recursion_limit: u8,
allow_inf_nan: bool,
) -> JsonResult<JsonValue> {
create_cow: &impl Fn(StringOutput<'_, 'j>) -> Cow<'s, str>,
) -> JsonResult<JsonValue<'s>> {
match peek {
Peek::True => {
parser.consume_true()?;
Expand All @@ -95,20 +168,20 @@ pub(crate) fn take_value(
Ok(JsonValue::Null)
}
Peek::String => {
let s = parser.consume_string::<StringDecoder>(tape)?;
Ok(JsonValue::Str(s.into()))
let s: StringOutput<'_, 'j> = parser.consume_string::<StringDecoder>(tape)?;
Ok(JsonValue::Str(create_cow(s)))
}
Peek::Array => {
// we could do something clever about guessing the size of the array
let mut array: SmallVec<[JsonValue; 8]> = SmallVec::new();
let mut array: SmallVec<[JsonValue<'s>; 8]> = SmallVec::new();
if let Some(peek_first) = parser.array_first()? {
check_recursion!(recursion_limit, parser.index,
let v = take_value(peek_first, parser, tape, recursion_limit, allow_inf_nan)?;
let v = take_value(peek_first, parser, tape, recursion_limit, allow_inf_nan, create_cow)?;
);
array.push(v);
while let Some(peek) = parser.array_step()? {
check_recursion!(recursion_limit, parser.index,
let v = take_value(peek, parser, tape, recursion_limit, allow_inf_nan)?;
let v = take_value(peek, parser, tape, recursion_limit, allow_inf_nan, create_cow)?;
);
array.push(v);
}
Expand All @@ -117,19 +190,19 @@ pub(crate) fn take_value(
}
Peek::Object => {
// same for objects
let mut object: LazyIndexMap<String, JsonValue> = LazyIndexMap::new();
let mut object: LazyIndexMap<Cow<'s, str>, JsonValue<'s>> = LazyIndexMap::new();
if let Some(first_key) = parser.object_first::<StringDecoder>(tape)? {
let first_key = first_key.into();
let first_key = create_cow(first_key);
let peek = parser.peek()?;
check_recursion!(recursion_limit, parser.index,
let first_value = take_value(peek, parser, tape, recursion_limit, allow_inf_nan)?;
let first_value = take_value(peek, parser, tape, recursion_limit, allow_inf_nan, create_cow)?;
);
object.insert(first_key, first_value);
while let Some(key) = parser.object_step::<StringDecoder>(tape)? {
let key = key.into();
let key = create_cow(key);
let peek = parser.peek()?;
check_recursion!(recursion_limit, parser.index,
let value = take_value(peek, parser, tape, recursion_limit, allow_inf_nan)?;
let value = take_value(peek, parser, tape, recursion_limit, allow_inf_nan, create_cow)?;
);
object.insert(key, value);
}
Expand Down
Loading
Loading