From 927f3e1a8e0da1e026be462d2ccbbc575346013a Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 13:25:09 +0000 Subject: [PATCH 1/6] search the vec for small LazyIndexMaps --- benches/main.rs | 34 ++++++++++++++++++++++++++++++++-- src/lazy_index_map.rs | 32 ++++++++++++++++++++++++-------- tests/main.rs | 28 +++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 11 deletions(-) diff --git a/benches/main.rs b/benches/main.rs index 41ea2e38..148964de 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -4,7 +4,7 @@ use std::hint::black_box; use std::fs::File; use std::io::Read; -use jiter::{Jiter, JsonValue, Peek}; +use jiter::{Jiter, JsonValue, LazyIndexMap, Peek}; use serde_json::Value; fn read_file(path: &str) -> String { @@ -215,6 +215,33 @@ test_cases!(floats_array); // src/github.com/json-iterator/go-benchmark/benchmark.go#L30C17-L30C29 test_cases!(medium_response); +fn lazy_map_lookup(length: i64, bench: &mut Bencher) { + bench.iter(|| { + let mut map: LazyIndexMap = LazyIndexMap::new(); + for i in 0..length { + let key = i.to_string(); + map.insert(key, JsonValue::Int(i)); + } + + // best case we get the next value each time + for i in 0..length { + black_box(map.get(&i.to_string()).unwrap()); + } + }) +} + +fn lazy_map_lookup_1_10(bench: &mut Bencher) { + lazy_map_lookup(10, bench); +} + +fn lazy_map_lookup_2_20(bench: &mut Bencher) { + lazy_map_lookup(20, bench); +} + +fn lazy_map_lookup_3_50(bench: &mut Bencher) { + lazy_map_lookup(50, bench); +} + benchmark_group!( benches, big_jiter_iter, @@ -246,6 +273,9 @@ benchmark_group!( true_array_serde_value, true_object_jiter_iter, true_object_jiter_value, - true_object_serde_value + true_object_serde_value, + lazy_map_lookup_1_10, + lazy_map_lookup_2_20, + lazy_map_lookup_3_50, ); benchmark_main!(benches); diff --git a/src/lazy_index_map.rs b/src/lazy_index_map.rs index 1cdc7630..8308ff0a 100644 --- a/src/lazy_index_map.rs +++ b/src/lazy_index_map.rs @@ -1,4 +1,5 @@ use std::borrow::Borrow; +use std::cell::Cell; use std::cmp::{Eq, PartialEq}; use std::fmt; use std::hash::Hash; @@ -13,6 +14,7 @@ use smallvec::SmallVec; pub struct LazyIndexMap { vec: SmallVec<[(K, V); 8]>, map: OnceLock>, + last_find: Cell, } impl fmt::Debug for LazyIndexMap @@ -25,6 +27,9 @@ where } } +// picked to be a good tradeoff after experimenting with `lazy_map_lookup` benchmark, should cover most models +const HASHMAP_THRESHOLD: usize = 16; + /// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed. impl LazyIndexMap where @@ -35,6 +40,7 @@ where Self { vec: SmallVec::new(), map: OnceLock::new(), + last_find: Cell::new(0), } } @@ -58,14 +64,24 @@ where K: Borrow + PartialEq, Q: Hash + Eq, { - let map = self.map.get_or_init(|| { - self.vec - .iter() - .enumerate() - .map(|(index, (key, _))| (key.clone(), index)) - .collect() - }); - map.get(key).map(|&i| &self.vec[i].1) + let vec_len = self.vec.len(); + // if the vec is longer than the threshold, we use the hashmap for lookups + if vec_len > HASHMAP_THRESHOLD { + self.get_map().get(key).map(|&i| &self.vec[i].1) + } else { + // otherwise we find the value in the vec + // we assume the most likely position for the match is at `last_find + 1` + let first_try = self.last_find.get() + 1; + for i in first_try..first_try + vec_len { + let index = i % vec_len; + let (k, v) = &self.vec[index]; + if k == key { + self.last_find.set(index); + return Some(v); + } + } + None + } } pub fn keys(&self) -> impl Iterator { diff --git a/tests/main.rs b/tests/main.rs index 6b3ccb0a..fa391958 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -873,13 +873,39 @@ fn test_4302_int_err() { } #[test] -fn lazy_index_map_prety() { +fn lazy_index_map_pretty() { let mut map = LazyIndexMap::new(); map.insert("foo".to_string(), JsonValue::Str("bar".to_string())); map.insert("spam".to_string(), JsonValue::Null); assert_eq!(format!("{map:?}"), r#"{"foo": Str("bar"), "spam": Null}"#); } +#[test] +fn lazy_index_map_small_get() { + let mut map = LazyIndexMap::new(); + map.insert("foo".to_string(), JsonValue::Str("bar".to_string())); + map.insert("spam".to_string(), JsonValue::Null); + + assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); +} + +#[test] +fn lazy_index_map_big_get() { + let mut map = LazyIndexMap::new(); + + for i in 0..25 { + let key = i.to_string(); + map.insert(key, JsonValue::Int(i)); + } + + assert_eq!(map.get("0"), Some(&JsonValue::Int(0))); + assert_eq!(map.get("10"), Some(&JsonValue::Int(10))); + assert_eq!(map.get("22"), Some(&JsonValue::Int(22))); +} + #[test] fn readme_jiter() { let json_data = r#" From a596dbf143597a49eb4cc10f072086b47825eef3 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 13:40:32 +0000 Subject: [PATCH 2/6] switch from Cell to Arc --- src/lazy_index_map.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/lazy_index_map.rs b/src/lazy_index_map.rs index 8308ff0a..af2469ae 100644 --- a/src/lazy_index_map.rs +++ b/src/lazy_index_map.rs @@ -1,10 +1,9 @@ use std::borrow::Borrow; -use std::cell::Cell; use std::cmp::{Eq, PartialEq}; use std::fmt; use std::hash::Hash; use std::slice::Iter as SliceIter; -use std::sync::OnceLock; +use std::sync::{Arc, Mutex, OnceLock}; use ahash::AHashMap; use smallvec::SmallVec; @@ -14,7 +13,7 @@ use smallvec::SmallVec; pub struct LazyIndexMap { vec: SmallVec<[(K, V); 8]>, map: OnceLock>, - last_find: Cell, + last_find: Arc>, } impl fmt::Debug for LazyIndexMap @@ -40,7 +39,7 @@ where Self { vec: SmallVec::new(), map: OnceLock::new(), - last_find: Cell::new(0), + last_find: Arc::new(Mutex::new(0)), } } @@ -71,16 +70,21 @@ where } else { // otherwise we find the value in the vec // we assume the most likely position for the match is at `last_find + 1` - let first_try = self.last_find.get() + 1; - for i in first_try..first_try + vec_len { - let index = i % vec_len; - let (k, v) = &self.vec[index]; - if k == key { - self.last_find.set(index); - return Some(v); + match self.last_find.lock() { + Ok(mut last_find) => { + let first_try = *last_find + 1; + for i in first_try..first_try + vec_len { + let index = i % vec_len; + let (k, v) = &self.vec[index]; + if k == key { + *last_find = index; + return Some(v); + } + } + None } + Err(_) => None, } - None } } From d9e31e83e5ed652252896c8504842736cbc584e6 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 15:35:28 +0000 Subject: [PATCH 3/6] use AtomicUsize --- src/lazy_index_map.rs | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/lazy_index_map.rs b/src/lazy_index_map.rs index af2469ae..5f55e733 100644 --- a/src/lazy_index_map.rs +++ b/src/lazy_index_map.rs @@ -3,17 +3,28 @@ use std::cmp::{Eq, PartialEq}; use std::fmt; use std::hash::Hash; use std::slice::Iter as SliceIter; -use std::sync::{Arc, Mutex, OnceLock}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::OnceLock; use ahash::AHashMap; use smallvec::SmallVec; /// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed. -#[derive(Clone, Default)] +#[derive(Default)] pub struct LazyIndexMap { vec: SmallVec<[(K, V); 8]>, map: OnceLock>, - last_find: Arc>, + last_find: AtomicUsize, +} + +impl Clone for LazyIndexMap { + fn clone(&self) -> Self { + Self { + vec: self.vec.clone(), + map: OnceLock::new(), + last_find: AtomicUsize::new(0), + } + } } impl fmt::Debug for LazyIndexMap @@ -39,7 +50,7 @@ where Self { vec: SmallVec::new(), map: OnceLock::new(), - last_find: Arc::new(Mutex::new(0)), + last_find: AtomicUsize::new(0), } } @@ -70,21 +81,16 @@ where } else { // otherwise we find the value in the vec // we assume the most likely position for the match is at `last_find + 1` - match self.last_find.lock() { - Ok(mut last_find) => { - let first_try = *last_find + 1; - for i in first_try..first_try + vec_len { - let index = i % vec_len; - let (k, v) = &self.vec[index]; - if k == key { - *last_find = index; - return Some(v); - } - } - None + let first_try = self.last_find.load(Ordering::Relaxed) + 1; + for i in first_try..first_try + vec_len { + let index = i % vec_len; + let (k, v) = &self.vec[index]; + if k == key { + self.last_find.store(index, Ordering::Relaxed); + return Some(v); } - Err(_) => None, } + None } } From 4c49e3e77a7a62be8f79a5f7619c4e69f592498d Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 16:03:48 +0000 Subject: [PATCH 4/6] improve coverage --- src/lazy_index_map.rs | 2 +- tests/main.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lazy_index_map.rs b/src/lazy_index_map.rs index 5f55e733..780a392d 100644 --- a/src/lazy_index_map.rs +++ b/src/lazy_index_map.rs @@ -66,7 +66,7 @@ where } pub fn is_empty(&self) -> bool { - self.get_map().is_empty() + self.vec.is_empty() } pub fn get(&self, key: &Q) -> Option<&V> diff --git a/tests/main.rs b/tests/main.rs index fa391958..5d8b74d3 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -875,9 +875,13 @@ fn test_4302_int_err() { #[test] fn lazy_index_map_pretty() { let mut map = LazyIndexMap::new(); + assert!(map.is_empty()); map.insert("foo".to_string(), JsonValue::Str("bar".to_string())); + assert!(!map.is_empty()); map.insert("spam".to_string(), JsonValue::Null); assert_eq!(format!("{map:?}"), r#"{"foo": Str("bar"), "spam": Null}"#); + let keys = map.keys().collect::>(); + assert_eq!(keys, vec!["foo", "spam"]); } #[test] From 553769d87ff033e9beeaad04b8a88fa23ddfa47b Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 16:05:27 +0000 Subject: [PATCH 5/6] improve coverage, take 2 --- tests/main.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/main.rs b/tests/main.rs index 5d8b74d3..4e360216 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -894,6 +894,7 @@ fn lazy_index_map_small_get() { assert_eq!(map.get("spam"), Some(&JsonValue::Null)); assert_eq!(map.get("spam"), Some(&JsonValue::Null)); assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map.get("other"), None); } #[test] @@ -908,6 +909,7 @@ fn lazy_index_map_big_get() { assert_eq!(map.get("0"), Some(&JsonValue::Int(0))); assert_eq!(map.get("10"), Some(&JsonValue::Int(10))); assert_eq!(map.get("22"), Some(&JsonValue::Int(22))); + assert_eq!(map.get("other"), None); } #[test] From 497a5adfe5485f84da7b3590fcad69029fe66d02 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 16:14:38 +0000 Subject: [PATCH 6/6] improve coverage on LazyIndexMap --- src/lazy_index_map.rs | 11 ++++++++++- tests/main.rs | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/lazy_index_map.rs b/src/lazy_index_map.rs index 780a392d..7377271f 100644 --- a/src/lazy_index_map.rs +++ b/src/lazy_index_map.rs @@ -10,13 +10,22 @@ use ahash::AHashMap; use smallvec::SmallVec; /// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed. -#[derive(Default)] pub struct LazyIndexMap { vec: SmallVec<[(K, V); 8]>, map: OnceLock>, last_find: AtomicUsize, } +impl Default for LazyIndexMap +where + K: Clone + fmt::Debug + Eq + Hash, + V: fmt::Debug, +{ + fn default() -> Self { + Self::new() + } +} + impl Clone for LazyIndexMap { fn clone(&self) -> Self { Self { diff --git a/tests/main.rs b/tests/main.rs index 4e360216..67e40c30 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -912,6 +912,27 @@ fn lazy_index_map_big_get() { assert_eq!(map.get("other"), None); } +#[test] +fn lazy_index_map_clone() { + let mut map = LazyIndexMap::default(); + + map.insert("foo".to_string(), JsonValue::Str("bar".to_string())); + map.insert("spam".to_string(), JsonValue::Null); + + assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map.get("other"), None); + + let map2 = map.clone(); + assert_eq!(map2.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map2.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map2.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map2.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map2.get("other"), None); +} + #[test] fn readme_jiter() { let json_data = r#"