From 927f3e1a8e0da1e026be462d2ccbbc575346013a Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 17 Jan 2024 13:25:09 +0000 Subject: [PATCH] search the vec for small LazyIndexMaps --- benches/main.rs | 34 ++++++++++++++++++++++++++++++++-- src/lazy_index_map.rs | 32 ++++++++++++++++++++++++-------- tests/main.rs | 28 +++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 11 deletions(-) diff --git a/benches/main.rs b/benches/main.rs index 41ea2e38..148964de 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -4,7 +4,7 @@ use std::hint::black_box; use std::fs::File; use std::io::Read; -use jiter::{Jiter, JsonValue, Peek}; +use jiter::{Jiter, JsonValue, LazyIndexMap, Peek}; use serde_json::Value; fn read_file(path: &str) -> String { @@ -215,6 +215,33 @@ test_cases!(floats_array); // src/github.com/json-iterator/go-benchmark/benchmark.go#L30C17-L30C29 test_cases!(medium_response); +fn lazy_map_lookup(length: i64, bench: &mut Bencher) { + bench.iter(|| { + let mut map: LazyIndexMap = LazyIndexMap::new(); + for i in 0..length { + let key = i.to_string(); + map.insert(key, JsonValue::Int(i)); + } + + // best case we get the next value each time + for i in 0..length { + black_box(map.get(&i.to_string()).unwrap()); + } + }) +} + +fn lazy_map_lookup_1_10(bench: &mut Bencher) { + lazy_map_lookup(10, bench); +} + +fn lazy_map_lookup_2_20(bench: &mut Bencher) { + lazy_map_lookup(20, bench); +} + +fn lazy_map_lookup_3_50(bench: &mut Bencher) { + lazy_map_lookup(50, bench); +} + benchmark_group!( benches, big_jiter_iter, @@ -246,6 +273,9 @@ benchmark_group!( true_array_serde_value, true_object_jiter_iter, true_object_jiter_value, - true_object_serde_value + true_object_serde_value, + lazy_map_lookup_1_10, + lazy_map_lookup_2_20, + lazy_map_lookup_3_50, ); benchmark_main!(benches); diff --git a/src/lazy_index_map.rs b/src/lazy_index_map.rs index 1cdc7630..8308ff0a 100644 --- a/src/lazy_index_map.rs +++ b/src/lazy_index_map.rs @@ -1,4 +1,5 @@ use std::borrow::Borrow; +use std::cell::Cell; use std::cmp::{Eq, PartialEq}; use std::fmt; use std::hash::Hash; @@ -13,6 +14,7 @@ use smallvec::SmallVec; pub struct LazyIndexMap { vec: SmallVec<[(K, V); 8]>, map: OnceLock>, + last_find: Cell, } impl fmt::Debug for LazyIndexMap @@ -25,6 +27,9 @@ where } } +// picked to be a good tradeoff after experimenting with `lazy_map_lookup` benchmark, should cover most models +const HASHMAP_THRESHOLD: usize = 16; + /// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed. impl LazyIndexMap where @@ -35,6 +40,7 @@ where Self { vec: SmallVec::new(), map: OnceLock::new(), + last_find: Cell::new(0), } } @@ -58,14 +64,24 @@ where K: Borrow + PartialEq, Q: Hash + Eq, { - let map = self.map.get_or_init(|| { - self.vec - .iter() - .enumerate() - .map(|(index, (key, _))| (key.clone(), index)) - .collect() - }); - map.get(key).map(|&i| &self.vec[i].1) + let vec_len = self.vec.len(); + // if the vec is longer than the threshold, we use the hashmap for lookups + if vec_len > HASHMAP_THRESHOLD { + self.get_map().get(key).map(|&i| &self.vec[i].1) + } else { + // otherwise we find the value in the vec + // we assume the most likely position for the match is at `last_find + 1` + let first_try = self.last_find.get() + 1; + for i in first_try..first_try + vec_len { + let index = i % vec_len; + let (k, v) = &self.vec[index]; + if k == key { + self.last_find.set(index); + return Some(v); + } + } + None + } } pub fn keys(&self) -> impl Iterator { diff --git a/tests/main.rs b/tests/main.rs index 6b3ccb0a..fa391958 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -873,13 +873,39 @@ fn test_4302_int_err() { } #[test] -fn lazy_index_map_prety() { +fn lazy_index_map_pretty() { let mut map = LazyIndexMap::new(); map.insert("foo".to_string(), JsonValue::Str("bar".to_string())); map.insert("spam".to_string(), JsonValue::Null); assert_eq!(format!("{map:?}"), r#"{"foo": Str("bar"), "spam": Null}"#); } +#[test] +fn lazy_index_map_small_get() { + let mut map = LazyIndexMap::new(); + map.insert("foo".to_string(), JsonValue::Str("bar".to_string())); + map.insert("spam".to_string(), JsonValue::Null); + + assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); + assert_eq!(map.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map.get("spam"), Some(&JsonValue::Null)); + assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string()))); +} + +#[test] +fn lazy_index_map_big_get() { + let mut map = LazyIndexMap::new(); + + for i in 0..25 { + let key = i.to_string(); + map.insert(key, JsonValue::Int(i)); + } + + assert_eq!(map.get("0"), Some(&JsonValue::Int(0))); + assert_eq!(map.get("10"), Some(&JsonValue::Int(10))); + assert_eq!(map.get("22"), Some(&JsonValue::Int(22))); +} + #[test] fn readme_jiter() { let json_data = r#"