Skip to content

Commit

Permalink
benchmark: use a rocksdb version of the dataset (#23)
Browse files Browse the repository at this point in the history
Use a rocksdb version of the dataset created from the preprocessed
HashMap. This loads almost instantly, has peak memory usage starting
around 75 MB, and takes about 220 MB on disk.
  • Loading branch information
nightlark committed Dec 19, 2024
1 parent ab91834 commit 1072ee8
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 4 deletions.
199 changes: 198 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ rayon = "1.10.0"
serde_json = "1.0.133"
sled = "0.34.7"
bincode = "1.3.3"
rocksdb = "0.22.0"
7 changes: 4 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::fs::metadata;
use streaming_iterator::StreamingIterator;
use tree_sitter::{Parser, Query, QueryCursor};
use walkdir::{DirEntry, WalkDir};
use rocksdb;

fn read_contents_file(file_path: &str) -> HashMap<String, Vec<(String, String)>> {
let mut package_map = HashMap::new();
Expand All @@ -32,13 +33,13 @@ fn read_contents_file(file_path: &str) -> HashMap<String, Vec<(String, String)>>
}

fn main() {
let db = sled::open("sled_db").expect("Failed to open sled database");
let db = rocksdb::DB::open_default("rocksdb").expect("Failed to open RocksDB database");

// for (key, value) in read_contents_file("Contents-amd64-noble") {
// let value_bytes: Vec<u8> = bincode::serialize(&value).expect("Failed to serialize value");
// db.insert(key, value_bytes).expect("Failed to insert into sled database");
// db.put(key, value_bytes).expect("Failed to insert into RocksDB database");
// }
// db.flush().expect("Failed to flush sled database");
// db.flush().expect("Failed to flush RocksDB database");
// return;

// let mut file_counts: Vec<_> = db.iter().map(|(file, packages)| (file, packages.len())).collect();
Expand Down

0 comments on commit 1072ee8

Please sign in to comment.