Skip to content

Commit

Permalink
experiment: use buffer and indexmap instead of lazy index map
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhewitt committed Sep 10, 2024
1 parent 546c4df commit fe07067
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 101 deletions.
1 change: 1 addition & 0 deletions crates/jiter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ smallvec = "1.11.0"
pyo3 = { workspace = true, optional = true, features = ["num-bigint"] }
lexical-parse-float = { version = "0.8.5", features = ["format"] }
bitvec = "1.0.1"
indexmap = "2.5.0"

[features]
python = ["dep:pyo3", "dep:pyo3-build-config"]
Expand Down
16 changes: 1 addition & 15 deletions crates/jiter/src/lazy_index_map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::borrow::{Borrow, Cow};
use std::borrow::Borrow;
use std::fmt;
use std::hash::Hash;
use std::slice::Iter as SliceIter;
Expand Down Expand Up @@ -129,20 +129,6 @@ where
}
}

impl<'j> LazyIndexMap<Cow<'j, str>, crate::JsonValue<'j>> {
pub(crate) fn to_static(&self) -> LazyIndexMap<Cow<'static, str>, crate::JsonValue<'static>> {
LazyIndexMap {
vec: self
.vec
.iter()
.map(|(k, v)| (k.to_string().into(), v.to_static()))
.collect(),
map: OnceLock::new(),
last_find: AtomicUsize::new(0),
}
}
}

impl<K: PartialEq, V: PartialEq> PartialEq for LazyIndexMap<K, V> {
fn eq(&self, other: &Self) -> bool {
self.vec == other.vec
Expand Down
169 changes: 86 additions & 83 deletions crates/jiter/src/value.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::borrow::Cow;
use std::sync::Arc;

use ahash::RandomState;
use indexmap::IndexMap;
use num_bigint::BigInt;
use smallvec::SmallVec;

use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT};
use crate::lazy_index_map::LazyIndexMap;
use crate::number_decoder::{NumberAny, NumberInt, NumberRange};
use crate::parse::{Parser, Peek};
use crate::string_decoder::{StringDecoder, StringDecoderRange, StringOutput, Tape};
Expand All @@ -24,7 +25,7 @@ pub enum JsonValue<'s> {
}

pub type JsonArray<'s> = Arc<SmallVec<[JsonValue<'s>; 8]>>;
pub type JsonObject<'s> = Arc<LazyIndexMap<Cow<'s, str>, JsonValue<'s>>>;
pub type JsonObject<'s> = Arc<IndexMap<Cow<'s, str>, JsonValue<'s>, RandomState>>;

#[cfg(feature = "python")]
impl pyo3::ToPyObject for JsonValue<'_> {
Expand Down Expand Up @@ -82,7 +83,11 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
JsonValue::Float(f) => JsonValue::Float(f),
JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()),
JsonValue::Array(v) => JsonValue::Array(Arc::new(v.iter().map(JsonValue::to_static).collect::<SmallVec<_>>())),
JsonValue::Object(o) => JsonValue::Object(Arc::new(o.to_static())),
JsonValue::Object(o) => JsonValue::Object(Arc::new(
o.iter()
.map(|(k, v)| (k.clone().into_owned().into(), v.to_static()))
.collect(),
)),
}
}

Expand Down Expand Up @@ -177,13 +182,12 @@ fn take_value<'j, 's>(
}
Peek::Object => {
// same for objects
let object = Arc::new(LazyIndexMap::new());
if let Some(first_key) = parser.object_first::<StringDecoder>(tape)? {
let first_key = create_cow(first_key);
take_value_recursive(
parser.peek()?,
RecursedValue::Object {
partial: object,
partial: Vec::with_capacity(8),
next_key: first_key,
},
parser,
Expand All @@ -193,7 +197,7 @@ fn take_value<'j, 's>(
create_cow,
)
} else {
Ok(JsonValue::Object(object))
Ok(JsonValue::Object(Arc::new(IndexMap::with_hasher(RandomState::new()))))

Check warning on line 200 in crates/jiter/src/value.rs

View check run for this annotation

Codecov / codecov/patch

crates/jiter/src/value.rs#L200

Added line #L200 was not covered by tests
}
}
_ => {
Expand All @@ -217,7 +221,7 @@ fn take_value<'j, 's>(
enum RecursedValue<'s> {
Array(JsonArray<'s>),
Object {
partial: JsonObject<'s>,
partial: Vec<(Cow<'s, str>, JsonValue<'s>)>,
next_key: Cow<'s, str>,
},
}
Expand Down Expand Up @@ -279,18 +283,17 @@ fn take_value_recursive<'j, 's>(
JsonValue::Array(array)
}
Peek::Object => {
let object = Arc::new(LazyIndexMap::new());
if let Some(next_key) = parser.object_first::<StringDecoder>(tape)? {
push_recursion!(
parser.peek()?,
RecursedValue::Object {
partial: object,
partial: Vec::with_capacity(8),
next_key: create_cow(next_key)
}
);
continue 'recursion;
}
JsonValue::Object(object)
JsonValue::Object(Arc::new(IndexMap::with_hasher(RandomState::new())))
}
_ => {
let n = parser
Expand Down Expand Up @@ -326,83 +329,81 @@ fn take_value_recursive<'j, 's>(
break JsonValue::Array(array);
}
}
RecursedValue::Object { partial, next_key } => {
let partial = Arc::get_mut(partial).expect("sole writer");
loop {
let value = match peek {
Peek::True => {
parser.consume_true()?;
JsonValue::Bool(true)
}
Peek::False => {
parser.consume_false()?;
JsonValue::Bool(false)
}
Peek::Null => {
parser.consume_null()?;
JsonValue::Null
}
Peek::String => {
let s = parser.consume_string::<StringDecoder>(tape, false)?;
JsonValue::Str(create_cow(s))
}
Peek::Array => {
let array = Arc::new(SmallVec::new());
if let Some(next_peek) = parser.array_first()? {
push_recursion!(next_peek, RecursedValue::Array(array));
// immediately jump to process the first value in the array
continue 'recursion;
}
JsonValue::Array(array)
RecursedValue::Object { partial, next_key } => loop {
let value = match peek {
Peek::True => {
parser.consume_true()?;
JsonValue::Bool(true)
}
Peek::False => {
parser.consume_false()?;
JsonValue::Bool(false)
}
Peek::Null => {
parser.consume_null()?;
JsonValue::Null
}
Peek::String => {
let s = parser.consume_string::<StringDecoder>(tape, false)?;
JsonValue::Str(create_cow(s))
}
Peek::Array => {
let array = Arc::new(SmallVec::new());
if let Some(next_peek) = parser.array_first()? {
push_recursion!(next_peek, RecursedValue::Array(array));
// immediately jump to process the first value in the array
continue 'recursion;
}
Peek::Object => {
let object = Arc::new(LazyIndexMap::new());
if let Some(yet_another_key) = parser.object_first::<StringDecoder>(tape)? {
push_recursion!(
parser.peek()?,
RecursedValue::Object {
partial: object,
next_key: create_cow(yet_another_key)
}
);
continue 'recursion;
}
JsonValue::Object(object)
JsonValue::Array(array)
}
Peek::Object => {
if let Some(yet_another_key) = parser.object_first::<StringDecoder>(tape)? {
push_recursion!(
parser.peek()?,
RecursedValue::Object {
partial: Vec::with_capacity(8),
next_key: create_cow(yet_another_key)
}
);
continue 'recursion;
}
_ => {
let n = parser
.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan)
.map_err(|e| {
if !peek.is_num() {
json_error!(ExpectedSomeValue, parser.index)
} else {
e
}
})?;
match n {
NumberAny::Int(NumberInt::Int(int)) => JsonValue::Int(int),
NumberAny::Int(NumberInt::BigInt(big_int)) => JsonValue::BigInt(big_int),
NumberAny::Float(float) => JsonValue::Float(float),
}
JsonValue::Object(Arc::new(IndexMap::with_hasher(RandomState::new())))
}
_ => {
let n = parser
.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan)
.map_err(|e| {
if !peek.is_num() {
json_error!(ExpectedSomeValue, parser.index)

Check warning on line 377 in crates/jiter/src/value.rs

View check run for this annotation

Codecov / codecov/patch

crates/jiter/src/value.rs#L376-L377

Added lines #L376 - L377 were not covered by tests
} else {
e

Check warning on line 379 in crates/jiter/src/value.rs

View check run for this annotation

Codecov / codecov/patch

crates/jiter/src/value.rs#L379

Added line #L379 was not covered by tests
}
})?;
match n {
NumberAny::Int(NumberInt::Int(int)) => JsonValue::Int(int),
NumberAny::Int(NumberInt::BigInt(big_int)) => JsonValue::BigInt(big_int),
NumberAny::Float(float) => JsonValue::Float(float),
}
};

// now try to advance position in the current object
if let Some(yet_another_key) = parser.object_step::<StringDecoder>(tape)?.map(create_cow) {
// object continuing
partial.insert(std::mem::replace(next_key, yet_another_key), value);
peek = parser.peek()?;
continue;
}
};

let RecursedValue::Object { mut partial, next_key } = current_recursion else {
unreachable!("known to be in object recursion");
};

Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value);
break JsonValue::Object(partial);
// now try to advance position in the current object
if let Some(yet_another_key) = parser.object_step::<StringDecoder>(tape)?.map(create_cow) {
// object continuing
partial.push((std::mem::replace(next_key, yet_another_key), value));
peek = parser.peek()?;
continue;
}
}

let RecursedValue::Object { mut partial, next_key } = current_recursion else {
unreachable!("known to be in object recursion");

Check warning on line 399 in crates/jiter/src/value.rs

View check run for this annotation

Codecov / codecov/patch

crates/jiter/src/value.rs#L399

Added line #L399 was not covered by tests
};

partial.push((next_key, value));
let mut map = IndexMap::with_hasher(RandomState::new());
map.extend(partial);
break JsonValue::Object(Arc::new(map));
},
};

// current array or object has finished;
Expand All @@ -424,12 +425,14 @@ fn take_value_recursive<'j, 's>(
JsonValue::Array(array)
}
RecursedValue::Object { mut partial, next_key } => {
Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value);
partial.push((next_key, value));
if let Some(next_key) = parser.object_step::<StringDecoder>(tape)?.map(create_cow) {
current_recursion = RecursedValue::Object { partial, next_key };
break parser.peek()?;
}
JsonValue::Object(partial)
let mut map = IndexMap::with_hasher(RandomState::new());
map.extend(partial);
JsonValue::Object(Arc::new(map))
}
}
};
Expand Down
8 changes: 5 additions & 3 deletions crates/jiter/tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use std::iter;
use std::str::FromStr;
use std::sync::Arc;

use ahash::RandomState;
use indexmap::IndexMap;
use num_bigint::BigInt;
use smallvec::smallvec;

Expand Down Expand Up @@ -658,7 +660,7 @@ fn json_value_object() {
let json = r#"{"foo": "bar", "spam": [1, null, true]}"#;
let v = JsonValue::parse(json.as_bytes(), false).unwrap();

let mut expected = LazyIndexMap::new();
let mut expected = IndexMap::with_hasher(RandomState::new());
expected.insert("foo".into(), JsonValue::Str("bar".into()));
expected.insert(
"spam".into(),
Expand Down Expand Up @@ -923,7 +925,7 @@ fn unique_iter_object() {
let value = JsonValue::parse(br#" {"x": 1, "x": 2} "#, false).unwrap();
if let JsonValue::Object(obj) = value {
assert_eq!(obj.len(), 1);
let mut unique = obj.iter_unique();
let mut unique = obj.iter();
let first = unique.next().unwrap();
assert_eq!(first.0, "x");
assert_eq!(first.1, &JsonValue::Int(2));
Expand All @@ -938,7 +940,7 @@ fn unique_iter_object_repeat() {
let value = JsonValue::parse(br#" {"x": 1, "x": 1} "#, false).unwrap();
if let JsonValue::Object(obj) = value {
assert_eq!(obj.len(), 1);
let mut unique = obj.iter_unique();
let mut unique = obj.iter();
let first = unique.next().unwrap();
assert_eq!(first.0, "x");
assert_eq!(first.1, &JsonValue::Int(1));
Expand Down

0 comments on commit fe07067

Please sign in to comment.