Skip to content

Commit

Permalink
search the vec for small LazyIndexMaps
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Jan 17, 2024
1 parent 1ce40b3 commit 927f3e1
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 11 deletions.
34 changes: 32 additions & 2 deletions benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::hint::black_box;
use std::fs::File;
use std::io::Read;

use jiter::{Jiter, JsonValue, Peek};
use jiter::{Jiter, JsonValue, LazyIndexMap, Peek};
use serde_json::Value;

fn read_file(path: &str) -> String {
Expand Down Expand Up @@ -215,6 +215,33 @@ test_cases!(floats_array);
// src/github.com/json-iterator/go-benchmark/benchmark.go#L30C17-L30C29
test_cases!(medium_response);

fn lazy_map_lookup(length: i64, bench: &mut Bencher) {
bench.iter(|| {
let mut map: LazyIndexMap<String, JsonValue> = LazyIndexMap::new();
for i in 0..length {
let key = i.to_string();
map.insert(key, JsonValue::Int(i));
}

// best case we get the next value each time
for i in 0..length {
black_box(map.get(&i.to_string()).unwrap());
}
})
}

fn lazy_map_lookup_1_10(bench: &mut Bencher) {
lazy_map_lookup(10, bench);
}

fn lazy_map_lookup_2_20(bench: &mut Bencher) {
lazy_map_lookup(20, bench);
}

fn lazy_map_lookup_3_50(bench: &mut Bencher) {
lazy_map_lookup(50, bench);
}

benchmark_group!(
benches,
big_jiter_iter,
Expand Down Expand Up @@ -246,6 +273,9 @@ benchmark_group!(
true_array_serde_value,
true_object_jiter_iter,
true_object_jiter_value,
true_object_serde_value
true_object_serde_value,
lazy_map_lookup_1_10,
lazy_map_lookup_2_20,
lazy_map_lookup_3_50,
);
benchmark_main!(benches);
32 changes: 24 additions & 8 deletions src/lazy_index_map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::borrow::Borrow;
use std::cell::Cell;
use std::cmp::{Eq, PartialEq};
use std::fmt;
use std::hash::Hash;
Expand All @@ -13,6 +14,7 @@ use smallvec::SmallVec;
pub struct LazyIndexMap<K, V> {
vec: SmallVec<[(K, V); 8]>,
map: OnceLock<AHashMap<K, usize>>,
last_find: Cell<usize>,
}

impl<K, V> fmt::Debug for LazyIndexMap<K, V>
Expand All @@ -25,6 +27,9 @@ where
}
}

// picked to be a good tradeoff after experimenting with `lazy_map_lookup` benchmark, should cover most models
const HASHMAP_THRESHOLD: usize = 16;

/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed.
impl<K, V> LazyIndexMap<K, V>
where
Expand All @@ -35,6 +40,7 @@ where
Self {
vec: SmallVec::new(),
map: OnceLock::new(),
last_find: Cell::new(0),
}
}

Expand All @@ -58,14 +64,24 @@ where
K: Borrow<Q> + PartialEq<Q>,
Q: Hash + Eq,
{
let map = self.map.get_or_init(|| {
self.vec
.iter()
.enumerate()
.map(|(index, (key, _))| (key.clone(), index))
.collect()
});
map.get(key).map(|&i| &self.vec[i].1)
let vec_len = self.vec.len();
// if the vec is longer than the threshold, we use the hashmap for lookups
if vec_len > HASHMAP_THRESHOLD {
self.get_map().get(key).map(|&i| &self.vec[i].1)
} else {
// otherwise we find the value in the vec
// we assume the most likely position for the match is at `last_find + 1`
let first_try = self.last_find.get() + 1;
for i in first_try..first_try + vec_len {
let index = i % vec_len;
let (k, v) = &self.vec[index];
if k == key {
self.last_find.set(index);
return Some(v);
}
}
None

Check warning on line 83 in src/lazy_index_map.rs

View check run for this annotation

Codecov / codecov/patch

src/lazy_index_map.rs#L83

Added line #L83 was not covered by tests
}
}

pub fn keys(&self) -> impl Iterator<Item = &K> {
Expand Down
28 changes: 27 additions & 1 deletion tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -873,13 +873,39 @@ fn test_4302_int_err() {
}

#[test]
fn lazy_index_map_prety() {
fn lazy_index_map_pretty() {
let mut map = LazyIndexMap::new();
map.insert("foo".to_string(), JsonValue::Str("bar".to_string()));
map.insert("spam".to_string(), JsonValue::Null);
assert_eq!(format!("{map:?}"), r#"{"foo": Str("bar"), "spam": Null}"#);
}

#[test]
fn lazy_index_map_small_get() {
let mut map = LazyIndexMap::new();
map.insert("foo".to_string(), JsonValue::Str("bar".to_string()));
map.insert("spam".to_string(), JsonValue::Null);

assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string())));
assert_eq!(map.get("spam"), Some(&JsonValue::Null));
assert_eq!(map.get("spam"), Some(&JsonValue::Null));
assert_eq!(map.get("foo"), Some(&JsonValue::Str("bar".to_string())));
}

#[test]
fn lazy_index_map_big_get() {
let mut map = LazyIndexMap::new();

for i in 0..25 {
let key = i.to_string();
map.insert(key, JsonValue::Int(i));
}

assert_eq!(map.get("0"), Some(&JsonValue::Int(0)));
assert_eq!(map.get("10"), Some(&JsonValue::Int(10)));
assert_eq!(map.get("22"), Some(&JsonValue::Int(22)));
}

#[test]
fn readme_jiter() {
let json_data = r#"
Expand Down

0 comments on commit 927f3e1

Please sign in to comment.