From a3d3d343f4a7a9169f6b8b5833618664b10f0532 Mon Sep 17 00:00:00 2001 From: pedrinfx Date: Tue, 21 Feb 2023 11:46:01 -0300 Subject: [PATCH 1/6] fix: force update index --- src/storage/lsm/index/mod.rs | 4 ---- src/storage/lsm/mod.rs | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/storage/lsm/index/mod.rs b/src/storage/lsm/index/mod.rs index 0dcefb9..fdfdc1e 100644 --- a/src/storage/lsm/index/mod.rs +++ b/src/storage/lsm/index/mod.rs @@ -14,10 +14,6 @@ pub fn check_if_index_exists(path: &path::Path) -> bool { pub fn write_index(path: &path::Path, index: &HashMap) { let _path = path.join("index"); - if index.is_empty() { - return; - } - let doc = bson::to_vec(index).unwrap(); let mut file = fs::File::create(_path).unwrap(); diff --git a/src/storage/lsm/mod.rs b/src/storage/lsm/mod.rs index b59dfd7..423b060 100644 --- a/src/storage/lsm/mod.rs +++ b/src/storage/lsm/mod.rs @@ -221,7 +221,7 @@ impl Lsm { keys } - pub fn drop(&mut self) { + pub fn clear_with_bloom_filter(&mut self) { self.clear(); self.bloom_filter.write().unwrap().clear(); } From bad317ff48ee5dd9bb8fb5d62a0e8e351355e3fd Mon Sep 17 00:00:00 2001 From: pedrinfx Date: Tue, 21 Feb 2023 15:26:05 -0300 Subject: [PATCH 2/6] refactor: refactoring snapshots --- src/lib.rs | 3 +- src/{storage/lsm => }/snapshots/mod.rs | 89 +++++++++----------------- src/storage/lsm/mod.rs | 10 +-- 3 files changed, 33 insertions(+), 69 deletions(-) rename src/{storage/lsm => }/snapshots/mod.rs (53%) diff --git a/src/lib.rs b/src/lib.rs index 65753a9..3ad4be5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,11 @@ pub mod bloom; mod dustdata; +pub mod snapshots; pub mod storage; pub use self::{ dustdata::DustData, dustdata::DustDataConfig, dustdata::Error, dustdata::ErrorCode, - dustdata::LsmConfig, dustdata::Result, dustdata::Size, storage::lsm::snapshots, + dustdata::LsmConfig, dustdata::Result, dustdata::Size, }; pub use bson; diff --git a/src/storage/lsm/snapshots/mod.rs b/src/snapshots/mod.rs similarity index 53% rename from src/storage/lsm/snapshots/mod.rs rename to src/snapshots/mod.rs index 874b6ee..d7425a8 100644 --- a/src/storage/lsm/snapshots/mod.rs +++ b/src/snapshots/mod.rs @@ -1,54 +1,11 @@ +use crate::bloom::BloomFilter; +use crate::storage::lsm::Lsm; use lz4::{Decoder, EncoderBuilder}; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, HashMap}; use std::fs; use std::io::{Read, Write}; -use std::path::PathBuf; - -use crate::bloom::BloomFilter; - -use super::Lsm; - -#[derive(Clone, Debug)] -pub struct SnapshotManager { - path: PathBuf, -} - -impl SnapshotManager { - pub fn new(path: PathBuf) -> Self { - SnapshotManager { path } - } - - pub fn load_last_snapshot(&self) -> Snapshot { - let mut paths = fs::read_dir(&self.path).unwrap(); - - let mut last_snapshot = paths.next().unwrap().unwrap().path(); - - for path in paths { - let path = path.unwrap().path(); - - if path.metadata().unwrap().modified().unwrap() - > last_snapshot.metadata().unwrap().modified().unwrap() - { - last_snapshot = path; - } - } - - let snapshot: Snapshot = Snapshot::load_snapshot(last_snapshot); - - snapshot - } - - pub fn load_snapshot_by_index(&self, index: usize) -> Snapshot { - let mut paths = fs::read_dir(&self.path).unwrap(); - - let snapshot = paths.nth(index).unwrap().unwrap().path(); - - let snapshot: Snapshot = Snapshot::load_snapshot(snapshot); - - snapshot - } -} +use std::path::Path; #[derive(Clone, Serialize, Deserialize)] pub struct Snapshot { @@ -70,20 +27,39 @@ impl Snapshot { } } - pub fn load_snapshot(path: PathBuf) -> Snapshot { + /// It opens a file, reads it into a buffer, and then deserializes the buffer into a BSON document + /// + /// Arguments: + /// + /// * `path`: The path to the file you want to read from. + /// + /// Returns: + /// + /// A `Snapshot` struct. + pub fn snapshot_from_file(path: &Path) -> Snapshot { let file = fs::File::open(path).unwrap(); - let mut decoder = Decoder::new(file).unwrap(); - let mut contents = Vec::new(); - decoder.read_to_end(&mut contents).unwrap(); - let snapshot: Snapshot = bson::from_slice(&contents).unwrap(); - snapshot + let mut snapshot = Vec::new(); + decoder.read_to_end(&mut snapshot).unwrap(); + + bson::from_slice(&snapshot).unwrap() } - pub fn create_snapshot(lsm: &Lsm, path: PathBuf) -> String { + /// It creates a new directory in the path provided, creates a new snapshot, serializes it, and writes + /// it to a file + /// + /// Arguments: + /// + /// * `lsm`: &Lsm - the LSM tree we want to snapshot + /// * `path`: The path to the directory where the snapshot will be saved. + /// + /// Returns: + /// + /// A string representing the timestamp of the snapshot. + pub fn snapshot_to_file(lsm: &Lsm, path: &Path) -> String { if !path.exists() { - std::fs::create_dir_all(path.clone()).unwrap(); + std::fs::create_dir_all(path).unwrap(); } let snapshot = Snapshot::new( @@ -121,9 +97,4 @@ impl Snapshot { pub fn get_dense_index(&self) -> &HashMap { &self.dense_index } - - pub fn timestamp(&self) -> String { - let now = chrono::Local::now(); - now.format("%Y-%m-%d-%H-%M-%S").to_string() - } } diff --git a/src/storage/lsm/mod.rs b/src/storage/lsm/mod.rs index 423b060..bac2c1e 100644 --- a/src/storage/lsm/mod.rs +++ b/src/storage/lsm/mod.rs @@ -6,12 +6,10 @@ use std::sync::{Arc, RwLock}; use crate::bloom::BloomFilter; use crate::dustdata::{Error, ErrorCode, Result}; - -use self::snapshots::Snapshot; +use crate::snapshots::Snapshot; pub mod filter; pub mod index; -pub mod snapshots; pub mod sstable; mod writer; @@ -26,7 +24,6 @@ pub struct Lsm { pub memtable: Arc>>, pub memtable_size: usize, pub lsm_config: LsmConfig, - pub snapshots: snapshots::SnapshotManager, pub dense_index: Arc>>, pub bloom_filter: Arc>, } @@ -51,17 +48,12 @@ impl Lsm { std::fs::create_dir_all(&config.sstable_path).unwrap(); } - let snapshots = snapshots::SnapshotManager::new( - std::path::Path::new(&config.sstable_path).join("snapshots"), - ); - Lsm { memtable: Arc::new(RwLock::new(BTreeMap::new())), bloom_filter: Arc::new(RwLock::new(bloom_filter)), dense_index: Arc::new(RwLock::new(index)), lsm_config: config, memtable_size: 0, // The current memtable size (in bytes) - snapshots, } } From 350544b4ccedf25aa722df12a967aaa0d6773018 Mon Sep 17 00:00:00 2001 From: peeeuzin Date: Sun, 21 May 2023 19:47:48 -0300 Subject: [PATCH 3/6] refactor: refactoring lsm --- Cargo.lock | 6 +- Cargo.toml | 2 +- src/dustdata.rs | 245 -------------------------------- src/lib.rs | 156 ++++++++------------ src/snapshots/mod.rs | 100 ------------- src/storage/lsm/error.rs | 21 +++ src/storage/lsm/filter/mod.rs | 92 +++++++----- src/storage/lsm/index/mod.rs | 52 ++++--- src/storage/lsm/memtable/mod.rs | 74 ++++++++++ src/storage/lsm/mod.rs | 232 +++++++++++------------------- src/storage/lsm/sstable.rs | 189 ++++++++++++------------ src/storage/lsm/writer.rs | 30 ---- 12 files changed, 416 insertions(+), 783 deletions(-) delete mode 100644 src/dustdata.rs delete mode 100644 src/snapshots/mod.rs create mode 100644 src/storage/lsm/error.rs create mode 100644 src/storage/lsm/memtable/mod.rs delete mode 100644 src/storage/lsm/writer.rs diff --git a/Cargo.lock b/Cargo.lock index 8f2dfd2..a71a6c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -48,14 +48,16 @@ dependencies = [ [[package]] name = "bson" -version = "2.4.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d76085681585d39016f4d3841eb019201fc54d2dd0d92ad1e4fab3bfb32754" +checksum = "9aeb8bae494e49dbc330dd23cf78f6f7accee22f640ce3ab17841badaa4ce232" dependencies = [ "ahash", "base64", + "bitvec", "hex", "indexmap", + "js-sys", "lazy_static", "rand", "serde", diff --git a/Cargo.toml b/Cargo.toml index ec129b4..fbe3882 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ exclude = [ [dependencies] -bson = "=2.4.0" +bson = "2.6.1" hex = "0.4.3" lz4 = "1.24.0" farmhash = "1.1.5" diff --git a/src/dustdata.rs b/src/dustdata.rs deleted file mode 100644 index 1660fd4..0000000 --- a/src/dustdata.rs +++ /dev/null @@ -1,245 +0,0 @@ -use super::storage; - -use std::ops::Add; -use std::path; -use storage::lsm; - -/// A LSM configuration -/// # Arguments -/// * `flush_threshold` - The number of bytes to flush before flushing to disk -/// * `detect_exit_signals` - Whether or not to detect exit signals (SIGTERM, SIGHUP, etc.) -#[derive(Clone)] -pub struct LsmConfig { - pub flush_threshold: Size, -} - -/// A DustData configuration -/// # Arguments -/// * `verbose` - Whether or not to print verbose output -/// * `path` - The path to the data directory -/// * `lsm_config` - The LSM configuration -#[derive(Clone)] -pub struct DustDataConfig { - pub path: path::PathBuf, - pub lsm_config: LsmConfig, -} - -pub struct DustData { - pub config: DustDataConfig, - pub lsm: storage::lsm::Lsm, -} - -#[derive(Clone, PartialEq, Eq, Debug)] -pub enum Size { - Bytes(usize), - Kilobytes(usize), - Megabytes(usize), - Gigabytes(usize), -} - -impl Add for Size { - type Output = Size; - - fn add(self, rhs: Self) -> Self::Output { - fn calc(a: usize, b: usize) -> Size { - let mut bytes = a + b; - - let mut gigabytes = 0; - let mut megabytes = 0; - let mut kilobytes = 0; - - while bytes >= 1024 * 1024 * 1024 { - gigabytes += 1; - bytes -= 1024 * 1024 * 1024; - } - - while bytes >= 1024 * 1024 { - megabytes += 1; - bytes -= 1024 * 1024; - } - - while bytes >= 1024 { - kilobytes += 1; - bytes -= 1024; - } - - if gigabytes > 0 { - Size::Gigabytes(gigabytes) - } else if megabytes > 0 { - Size::Megabytes(megabytes) - } else if kilobytes > 0 { - Size::Kilobytes(kilobytes) - } else { - Size::Bytes(bytes) - } - } - - match (self, rhs) { - (Size::Bytes(a), Size::Bytes(b)) => calc(a, b), - (Size::Bytes(a), Size::Kilobytes(b)) => calc(a, b * 1024), - (Size::Bytes(a), Size::Megabytes(b)) => calc(a, b * 1024 * 1024), - (Size::Bytes(a), Size::Gigabytes(b)) => calc(a, b * 1024 * 1024 * 1024), - - (Size::Kilobytes(a), Size::Bytes(b)) => calc(a * 1024, b * 1024), - (Size::Kilobytes(a), Size::Kilobytes(b)) => calc(a * 1024, b * 1024), - (Size::Kilobytes(a), Size::Megabytes(b)) => calc(a * 1024, b * 1024 * 1024), - (Size::Kilobytes(a), Size::Gigabytes(b)) => calc(a * 1024, b * 1024 * 1024 * 1024), - - (Size::Megabytes(a), Size::Bytes(b)) => calc(a * 1024 * 1024, b), - (Size::Megabytes(a), Size::Kilobytes(b)) => calc(a * 1024 * 1024, b * 1024), - (Size::Megabytes(a), Size::Megabytes(b)) => calc(a * 1024 * 1024, b * 1024 * 1024), - (Size::Megabytes(a), Size::Gigabytes(b)) => { - calc(a * 1024 * 1024, b * 1024 * 1024 * 1024) - } - - (Size::Gigabytes(a), Size::Bytes(b)) => calc(a * 1024 * 1024 * 1024, b), - (Size::Gigabytes(a), Size::Kilobytes(b)) => calc(a * 1024 * 1024 * 1024, b * 1024), - (Size::Gigabytes(a), Size::Megabytes(b)) => { - calc(a * 1024 * 1024 * 1024, b * 1024 * 1024) - } - (Size::Gigabytes(a), Size::Gigabytes(b)) => { - calc(a * 1024 * 1024 * 1024, b * 1024 * 1024 * 1024) - } - } - } -} - -pub fn size_to_usize(size: Size) -> usize { - match size { - Size::Bytes(bytes) => bytes, - Size::Kilobytes(kilobytes) => kilobytes * 1024, - Size::Megabytes(megabytes) => megabytes * 1024 * 1024, - Size::Gigabytes(gigabytes) => gigabytes * 1024 * 1024 * 1024, - } -} - -pub type Result = std::result::Result; - -#[derive(Debug, Clone)] -pub struct Error { - pub code: ErrorCode, - pub message: String, -} - -#[derive(Debug, Clone)] -pub enum ErrorCode { - NotFound, - KeyExists, - KeyNotExists, -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "Code: {} - Message: {}", self.code, self.message) - } -} - -impl std::fmt::Display for ErrorCode { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - ErrorCode::NotFound => write!(f, "NotFound"), - ErrorCode::KeyExists => write!(f, "KeyExists"), - ErrorCode::KeyNotExists => write!(f, "KeyNotExists"), - } - } -} - -impl DustData { - pub fn new(configuration: DustDataConfig) -> Self { - let lsm = storage::lsm::Lsm::new(lsm::LsmConfig { - flush_threshold: size_to_usize(configuration.clone().lsm_config.flush_threshold), - sstable_path: configuration.clone().path, - }); - - Self { - lsm, - config: configuration, - } - } - - /// Get a value with a key - /// # Arguments - /// - `key`: a key to search for - /// # Returns - /// - `Result>` if value was found returns a bson document - pub fn get(&self, key: &str) -> Result> { - self.lsm.get(key) - } - - /// Insert a value with a key - /// # Arguments - /// - `key`: a key. - /// - `document`: a bson document to insert - pub fn insert(&mut self, key: &str, bson: bson::Bson) -> Result<()> { - self.lsm.insert(key, bson) - } - - /// Delete a value with a key - /// # Arguments - /// - `key`: a key to search for and delete it. - pub fn delete(&mut self, key: &str) -> Result<()> { - self.lsm.delete(key) - } - - /// Update a value with a key - /// # Arguments - /// - `key`: a key to search for and update it. - /// - `document`: the new document to replace the old one. - pub fn update(&mut self, key: &str, bson: bson::Bson) -> Result<()> { - self.lsm.update(key, bson) - } - - /// Check if key exists. - /// # Arguments - /// - `key`: a key to check if exists. - pub fn contains(&mut self, key: &str) -> bool { - self.lsm.contains(key) - } - - /// List all keys. - /// # Returns - /// - `Result>` a vector of all keys. - pub fn list_keys(&self) -> Result> { - Ok(self.lsm.list_keys()) - } - - /// Flush all data to disk. - /// # Returns - /// - `Result<()>` if successful returns nothing. - pub fn flush(&mut self) -> Result<()> { - self.lsm.flush() - } -} - -#[cfg(test)] -mod size_tests { - use super::*; - - #[test] - fn add_impl_bytes() { - let size = Size::Bytes(1); - let size2 = Size::Bytes(2); - assert_eq!(size + size2, Size::Bytes(3)); - } - - #[test] - fn add_impl_gb() { - let size = Size::Gigabytes(1); - let size2 = Size::Gigabytes(2); - assert_eq!(size + size2, Size::Gigabytes(3)); - } - - #[test] - fn add_impl_mb() { - let size = Size::Megabytes(1); - let size2 = Size::Megabytes(2); - assert_eq!(size + size2, Size::Megabytes(3)); - } - - #[test] - fn add_impl_kb() { - let size = Size::Kilobytes(1); - let size2 = Size::Kilobytes(2); - assert_eq!(size + size2, Size::Kilobytes(3)); - } -} diff --git a/src/lib.rs b/src/lib.rs index 3ad4be5..0198290 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,124 +1,86 @@ pub mod bloom; -mod dustdata; -pub mod snapshots; pub mod storage; -pub use self::{ - dustdata::DustData, dustdata::DustDataConfig, dustdata::Error, dustdata::ErrorCode, - dustdata::LsmConfig, dustdata::Result, dustdata::Size, -}; pub use bson; -/// Initialize the database -/// # Arguments -/// * `config` - A configuration object -/// # Returns -/// - DustData object -pub fn initialize(config: dustdata::DustDataConfig) -> dustdata::DustData { - dustdata::DustData::new(config) -} - #[cfg(test)] mod dustdata_tests { use super::*; - fn get_default_config() -> DustDataConfig { - DustDataConfig { - path: std::path::Path::new("./test_data/dustdata").to_path_buf(), - lsm_config: LsmConfig { - flush_threshold: Size::Megabytes(128), - }, - } - } + use storage::lsm::{Lsm, LsmConfig}; - #[test] - fn insert_document() { - let config = get_default_config(); - - let mut dd = initialize(config); - - let now = std::time::SystemTime::now(); - dd.insert( - "insert_doc", - bson::bson!({ - "test": "test" - }), - ) - .unwrap(); - println!("Insert took: {:?}", now.elapsed().unwrap()); - - let now = std::time::SystemTime::now(); - assert!(dd.get("insert_doc").unwrap().is_some()); - println!("Get took: {:?}", now.elapsed().unwrap()); - - dd.delete("insert_doc").unwrap(); // delete the test document + fn initialize() -> Lsm { + let config = LsmConfig { + flush_threshold: 100, + sstable_path: std::path::PathBuf::from("./test_data"), + }; + + Lsm::new(config) } #[test] - fn update_document() { - let config = get_default_config(); - - let mut dd = initialize(config); - dd.insert( - "update_doc", - bson::bson!({ - "test": "test" - }), - ) - .unwrap(); - - let now = std::time::SystemTime::now(); - dd.update( - "update_doc", - bson::bson! ({ - "test": "test2" - }), - ) - .unwrap(); - println!("Update took: {:?}", now.elapsed().unwrap()); - - let ls = dd.list_keys().unwrap(); - - let get = dd.get("update_doc").unwrap().unwrap(); - let get = get.as_document().unwrap(); - - let get = get.get("test").unwrap().as_str().unwrap(); - - assert_eq!(get, "test2"); - assert!(ls.contains(&"update_doc".to_string())); - - dd.delete("update_doc").unwrap(); // delete the test document + fn memtable_test() { + let mut dd = initialize(); + + let doc = bson::bson!({ + "name": "Pedro", + "age": 20, + "address": { + "street": "123 Main St", + "city": "New York", + "state": "NY", + "zip": 10001 + }, + }); + + dd.insert("user:1", doc).unwrap(); + + assert!(dd.contains("user:1")); + + let user = dd.get("user:1").unwrap().unwrap(); + let user = user.as_document().unwrap(); + + let name = user.get("name").unwrap().as_str().unwrap(); + assert_eq!(name, "Pedro"); + + let age = user.get("age").unwrap().as_i32().unwrap(); + assert_eq!(age, 20); + + dd.delete("user:1").unwrap(); } #[test] - fn reading_on_sstable() { - let config = get_default_config(); + fn sstable_test() { + let mut dd = initialize(); + + let doc = bson::bson!({ + "name": "John", + "age": 26, + "address": { + "street": "123 Main St", + "city": "New York", + "state": "NY", + "zip": 10001 + }, + }); - let mut dd = initialize(config); + dd.insert("user:2", doc).unwrap(); - dd.insert( - "read_sstable", - bson::bson!({ - "test": "test" - }), - ) - .unwrap(); + assert!(dd.contains("user:2")); - // flush the sstable dd.flush().unwrap(); - let ls = dd.list_keys().unwrap(); + assert!(dd.contains("user:2")); - let now = std::time::SystemTime::now(); - let get = dd.get("read_sstable").unwrap().unwrap(); - println!("SSTable Get took: {:?}", now.elapsed().unwrap()); - let get = get.as_document().unwrap(); + let user = dd.get("user:2").unwrap().unwrap(); + let user = user.as_document().unwrap(); - let get = get.get("test").unwrap().as_str().unwrap(); + let name = user.get("name").unwrap().as_str().unwrap(); + assert_eq!(name, "John"); - assert_eq!(get, "test"); - assert!(ls.contains(&"read_sstable".to_string())); + let age = user.get("age").unwrap().as_i32().unwrap(); + assert_eq!(age, 26); - dd.delete("read_sstable").unwrap(); // delete the test document + dd.delete("user:2").unwrap(); } } diff --git a/src/snapshots/mod.rs b/src/snapshots/mod.rs deleted file mode 100644 index d7425a8..0000000 --- a/src/snapshots/mod.rs +++ /dev/null @@ -1,100 +0,0 @@ -use crate::bloom::BloomFilter; -use crate::storage::lsm::Lsm; -use lz4::{Decoder, EncoderBuilder}; -use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, HashMap}; -use std::fs; -use std::io::{Read, Write}; -use std::path::Path; - -#[derive(Clone, Serialize, Deserialize)] -pub struct Snapshot { - pub memtable: BTreeMap, - pub bloom_filter: BloomFilter, - pub dense_index: HashMap, -} - -impl Snapshot { - pub fn new( - memtable: BTreeMap, - bloom_filter: BloomFilter, - dense_index: HashMap, - ) -> Snapshot { - Snapshot { - memtable, - bloom_filter, - dense_index, - } - } - - /// It opens a file, reads it into a buffer, and then deserializes the buffer into a BSON document - /// - /// Arguments: - /// - /// * `path`: The path to the file you want to read from. - /// - /// Returns: - /// - /// A `Snapshot` struct. - pub fn snapshot_from_file(path: &Path) -> Snapshot { - let file = fs::File::open(path).unwrap(); - let mut decoder = Decoder::new(file).unwrap(); - - let mut snapshot = Vec::new(); - decoder.read_to_end(&mut snapshot).unwrap(); - - bson::from_slice(&snapshot).unwrap() - } - - /// It creates a new directory in the path provided, creates a new snapshot, serializes it, and writes - /// it to a file - /// - /// Arguments: - /// - /// * `lsm`: &Lsm - the LSM tree we want to snapshot - /// * `path`: The path to the directory where the snapshot will be saved. - /// - /// Returns: - /// - /// A string representing the timestamp of the snapshot. - pub fn snapshot_to_file(lsm: &Lsm, path: &Path) -> String { - if !path.exists() { - std::fs::create_dir_all(path).unwrap(); - } - - let snapshot = Snapshot::new( - lsm.memtable.read().unwrap().clone(), - lsm.bloom_filter.read().unwrap().clone(), - lsm.dense_index.read().unwrap().clone(), - ); - - let now = chrono::Local::now(); - let timestamp = now.format("%Y-%m-%d-%H-%M-%S").to_string(); - - let snapshot_path = path.join(timestamp.clone()); - let file = fs::File::create(snapshot_path).unwrap(); - - let snapshot = bson::to_vec(&snapshot).unwrap(); - - let mut encoder = EncoderBuilder::new() - .build(file) - .expect("cannot create encoder"); - - encoder.write_all(&snapshot).unwrap(); - encoder.flush().unwrap(); - - timestamp - } - - pub fn get_memtable(&self) -> &BTreeMap { - &self.memtable - } - - pub fn get_bloom_filter(&self) -> &BloomFilter { - &self.bloom_filter - } - - pub fn get_dense_index(&self) -> &HashMap { - &self.dense_index - } -} diff --git a/src/storage/lsm/error.rs b/src/storage/lsm/error.rs new file mode 100644 index 0000000..b1c817d --- /dev/null +++ b/src/storage/lsm/error.rs @@ -0,0 +1,21 @@ +#[derive(Debug)] +pub struct Error { + pub code: ErrorKind, +} + +impl Error { + pub fn new(code: ErrorKind) -> Self { + Self { code } + } +} + +#[derive(Debug)] +pub enum ErrorKind { + AlreadyExists, + KeyNotFound, + IoError, + Corrupted, + Other, +} + +pub type Result = std::result::Result; diff --git a/src/storage/lsm/filter/mod.rs b/src/storage/lsm/filter/mod.rs index 77b513e..62a0f45 100644 --- a/src/storage/lsm/filter/mod.rs +++ b/src/storage/lsm/filter/mod.rs @@ -3,60 +3,74 @@ use lz4::{Decoder, EncoderBuilder}; use std::{ io::{Read, Write}, path, + sync::{Arc, RwLock}, }; -pub fn check_if_filter_exists(path: &path::Path) -> bool { - let _path = path.join("filter"); - - _path.exists() +#[derive(Clone)] +pub struct Filter { + pub bloom: Arc>, + path: path::PathBuf, } -pub fn write_filter(path: &path::Path, filter: &BloomFilter) { - let _path = path.join("filter"); - - if !check_if_filter_exists(path) { - std::fs::create_dir_all(_path.clone()).unwrap(); +impl Filter { + pub fn new(path: path::PathBuf) -> Self { + let path = path.join("sstable.filter"); + let bloom_rate = 0.01; + + let bloom_filter = if path.exists() { + Filter::read_filter(&path) + } else { + BloomFilter::new(bloom_rate, 100000) + }; + + Self { + bloom: Arc::new(RwLock::new(bloom_filter)), + path, + } } - // bitvec - - let bitvec_file = std::fs::File::create(_path.join("bitvec")).unwrap(); - - let mut encoder = EncoderBuilder::new() - .level(4) - .build(bitvec_file) - .expect("cannot create encoder"); + fn write_filter(path: &path::Path, filter: &BloomFilter) { + let filter_file = std::fs::File::create(path).unwrap(); - encoder.write_all(&filter.bitvec).unwrap(); + let mut encoder = EncoderBuilder::new() + .level(4) + .build(filter_file) + .expect("cannot create encoder"); - encoder.flush().unwrap(); + let filter_content = bson::to_vec(filter).unwrap(); - // hashes - - let mut hashes_file = std::fs::File::create(_path.join("hashes")).unwrap(); - hashes_file.write_all(&filter.hashes.to_le_bytes()).unwrap(); - - hashes_file.sync_all().unwrap(); -} + encoder.write_all(&filter_content).unwrap(); + encoder.flush().unwrap(); + } -pub fn read_filter(path: &path::Path) -> BloomFilter { - let _path = path.join("filter"); + fn read_filter(path: &path::Path) -> BloomFilter { + let filter_file = std::fs::File::open(path).unwrap(); + let mut decoder = Decoder::new(filter_file).unwrap(); - // bitvec + let mut filter: Vec = Vec::new(); + decoder.read_to_end(&mut filter).unwrap(); - let bitvec_file = std::fs::File::open(_path.join("bitvec")).unwrap(); - let mut decoder = Decoder::new(bitvec_file).unwrap(); + bson::from_slice(&filter).unwrap() + } - let mut bitvec: Vec = Vec::new(); - decoder.read_to_end(&mut bitvec).unwrap(); + pub fn insert(&mut self, key: &str) { + self.bloom.write().unwrap().insert(key); + } - // hashes + pub fn contains(&self, key: &str) -> bool { + self.bloom.read().unwrap().contains(key) + } - let mut hashes_file = std::fs::File::open(_path.join("hashes")).unwrap(); - let mut hashes_read: Vec = Vec::new(); - hashes_file.read_to_end(&mut hashes_read).unwrap(); + pub fn delete(&mut self, key: &str) { + self.bloom.write().unwrap().delete(key); + } - let hashes = i64::from_le_bytes(hashes_read.try_into().unwrap()); + pub fn flush(&mut self) { + let filter = self.bloom.read().unwrap().clone(); + Filter::write_filter(&self.path, &filter); + } - BloomFilter { bitvec, hashes } + pub fn clear(&mut self) { + self.bloom.write().unwrap().clear(); + } } diff --git a/src/storage/lsm/index/mod.rs b/src/storage/lsm/index/mod.rs index fdfdc1e..502e831 100644 --- a/src/storage/lsm/index/mod.rs +++ b/src/storage/lsm/index/mod.rs @@ -2,35 +2,49 @@ use std::{ collections::HashMap, fs, io::{Read, Write}, + ops::Deref, path, + sync::{Arc, RwLock}, }; -pub fn check_if_index_exists(path: &path::Path) -> bool { - let _path = path.join("index"); - - _path.exists() +#[derive(Clone)] +pub struct Index { + pub index: Arc>>, + pub index_path: path::PathBuf, } -pub fn write_index(path: &path::Path, index: &HashMap) { - let _path = path.join("index"); +impl Index { + pub fn new(index_path: path::PathBuf) -> Self { + let index = if index_path.exists() { + Index::read_index(&index_path) + } else { + HashMap::new() + }; - let doc = bson::to_vec(index).unwrap(); + Self { + index: Arc::new(RwLock::new(index)), + index_path, + } + } - let mut file = fs::File::create(_path).unwrap(); - file.write_all(&doc).unwrap(); + fn read_index(path: &path::Path) -> HashMap { + let mut file = fs::File::open(path).unwrap(); + let mut bytes_to_read: Vec = Vec::new(); + file.read_to_end(&mut bytes_to_read).unwrap(); - file.sync_data().unwrap(); - file.flush().unwrap(); -} + let index_bson: HashMap = bson::from_slice(&bytes_to_read).unwrap(); -pub fn read_index(path: &path::Path) -> HashMap { - let _path = path.join("index"); + index_bson + } - let mut file = fs::File::open(_path).unwrap(); - let mut bytes_to_read: Vec = Vec::new(); - file.read_to_end(&mut bytes_to_read).unwrap(); + pub fn write_index(&self) { + let index = self.index.read().unwrap(); + let doc = bson::to_vec(index.deref()).unwrap(); - let index_bson: HashMap = bson::from_slice(&bytes_to_read).unwrap(); + let mut file = fs::File::create(self.index_path.clone()).unwrap(); + file.write_all(&doc).unwrap(); - index_bson + file.sync_all().unwrap(); + file.flush().unwrap(); + } } diff --git a/src/storage/lsm/memtable/mod.rs b/src/storage/lsm/memtable/mod.rs new file mode 100644 index 0000000..0f69942 --- /dev/null +++ b/src/storage/lsm/memtable/mod.rs @@ -0,0 +1,74 @@ +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; + +use super::error::{Error, ErrorKind, Result}; + +#[derive(Clone)] +pub struct Memtable { + pub table: Arc>>, +} + +#[allow(clippy::new_without_default)] +impl Memtable { + pub fn new() -> Self { + Self { + table: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub fn insert(&mut self, key: &str, value: bson::Bson) -> Result<()> { + if self.contains(key) { + return Err(Error::new(ErrorKind::AlreadyExists)); + } + + self.table.write().unwrap().insert(key.to_string(), value); + + Ok(()) + } + + pub fn contains(&self, key: &str) -> bool { + self.table.read().unwrap().contains_key(key) + } + + pub fn get(&self, key: &str) -> Option { + self.table.read().unwrap().get(key).cloned() + } + + pub fn delete(&mut self, key: &str) -> Result> { + if !self.contains(key) { + return Err(Error::new(ErrorKind::KeyNotFound)); + } + + let value = self.table.write().unwrap().remove(key); + + Ok(value) + } + + pub fn update(&mut self, key: &str, new_value: bson::Bson) -> Result> { + if !self.contains(key) { + return Err(Error::new(ErrorKind::KeyNotFound)); + } + + let old_value = self + .table + .write() + .unwrap() + .insert(key.to_string(), new_value); + + Ok(old_value) + } + + pub fn clear(&mut self) { + self.table.write().unwrap().clear(); + } + + pub fn is_empty(&self) -> bool { + self.table.read().unwrap().is_empty() + } + + pub fn get_memtable(&self) -> HashMap { + self.table.read().unwrap().clone() + } +} diff --git a/src/storage/lsm/mod.rs b/src/storage/lsm/mod.rs index bac2c1e..ce57cf4 100644 --- a/src/storage/lsm/mod.rs +++ b/src/storage/lsm/mod.rs @@ -1,17 +1,13 @@ -use std::collections::{BTreeMap, HashMap}; -use std::mem; use std::ops::Deref; -use std::path; -use std::sync::{Arc, RwLock}; - -use crate::bloom::BloomFilter; -use crate::dustdata::{Error, ErrorCode, Result}; -use crate::snapshots::Snapshot; +use std::{mem, path}; +pub mod error; pub mod filter; pub mod index; +pub mod memtable; pub mod sstable; -mod writer; + +use error::{Error, ErrorKind, Result}; #[derive(Clone, Debug)] pub struct LsmConfig { @@ -21,63 +17,42 @@ pub struct LsmConfig { #[derive(Clone)] pub struct Lsm { - pub memtable: Arc>>, - pub memtable_size: usize, + pub memtable: memtable::Memtable, pub lsm_config: LsmConfig, - pub dense_index: Arc>>, - pub bloom_filter: Arc>, + pub dense_index: index::Index, + pub bloom_filter: filter::Filter, + pub sstable: sstable::SSTable, } impl Lsm { - pub fn new(config: LsmConfig) -> Lsm { - let bloom_rate = 0.01; - - let index = if index::check_if_index_exists(&config.sstable_path) { - index::read_index(&config.sstable_path) - } else { - HashMap::new() - }; - - let bloom_filter = if filter::check_if_filter_exists(&config.sstable_path) { - filter::read_filter(&config.sstable_path) - } else { - BloomFilter::new(bloom_rate, 100000) - }; - - if !path::Path::new(&config.sstable_path).exists() { - std::fs::create_dir_all(&config.sstable_path).unwrap(); - } + pub fn new(lsm_config: LsmConfig) -> Lsm { + let dense_index = index::Index::new(lsm_config.sstable_path.join("sstable.index")); + let sstable = sstable::SSTable::new(lsm_config.clone().sstable_path); + let bloom_filter = filter::Filter::new(lsm_config.clone().sstable_path); + let memtable = memtable::Memtable::new(); Lsm { - memtable: Arc::new(RwLock::new(BTreeMap::new())), - bloom_filter: Arc::new(RwLock::new(bloom_filter)), - dense_index: Arc::new(RwLock::new(index)), - lsm_config: config, - memtable_size: 0, // The current memtable size (in bytes) + memtable, + bloom_filter, + dense_index, + sstable, + lsm_config, } } - pub fn insert(&mut self, key: &str, value: bson::Bson) -> Result<()> { + pub fn insert(&mut self, key: &str, value: bson::Bson) -> Result { if self.contains(key) { - return Err(Error { - code: ErrorCode::KeyExists, - message: "Key already exists".to_string(), - }); + return Err(Error::new(ErrorKind::AlreadyExists)); } - self.memtable_size += mem::size_of_val(&value); - - self.memtable - .write() - .unwrap() - .insert(key.to_string(), value); - self.bloom_filter.write().unwrap().insert(key); + self.memtable.insert(key, value.clone())?; + self.bloom_filter.insert(key); - if self.memtable_size >= self.lsm_config.flush_threshold { + if mem::size_of_val(&self.memtable.table) >= self.lsm_config.flush_threshold { self.flush().unwrap(); } - Ok(()) + Ok(value) } pub fn get(&self, key: &str) -> Result> { @@ -85,171 +60,130 @@ impl Lsm { return Ok(None); } - let memtable = self.memtable.read().unwrap(); - - match memtable.get(&key.to_string()) { - Some(document) => Ok(Some(document.clone())), + match self.memtable.get(key) { + Some(document) => Ok(Some(document)), None => { - let dense_index = self.dense_index.read().unwrap(); - let offset = dense_index.get(&key.to_string()).unwrap(); - Ok(sstable::Segment::read_with_offset( - offset.to_string(), - &self.lsm_config.sstable_path, - )) + let dense_index = self.dense_index.index.read().unwrap(); + let (file_index, offset) = dense_index.get(&key.to_string()).unwrap(); + + self.sstable.get(file_index, offset) } } } - pub fn delete(&mut self, key: &str) -> Result<()> { + pub fn delete(&mut self, key: &str) -> Result> { if !self.contains(key) { - return Err(Error { - code: ErrorCode::KeyNotExists, - message: "Key does not exist".to_string(), - }); + return Err(Error::new(ErrorKind::KeyNotFound)); } - let value = self.memtable.write().unwrap().remove(&key.to_string()); - self.dense_index.write().unwrap().remove(&key.to_string()); - self.bloom_filter.write().unwrap().delete(key); + let value = self.get(key).unwrap(); - if let Some(value) = value { - self.memtable_size += mem::size_of_val(&value); - } + self.memtable.delete(key).ok(); + self.dense_index + .index + .write() + .unwrap() + .remove(&key.to_string()); + self.bloom_filter.delete(key); - Ok(()) + Ok(value) } pub fn update(&mut self, key: &str, value: bson::Bson) -> Result<()> { if !self.contains(key) { - return Err(Error { - code: ErrorCode::KeyNotExists, - message: "Key does not exist".to_string(), - }); + return Err(Error::new(ErrorKind::KeyNotFound)); } - let mut memtable = self.memtable.write().unwrap(); - let mut bloom_filter = self.bloom_filter.write().unwrap(); - // Delete the old value from the bloom filter - bloom_filter.delete(key); + self.bloom_filter.delete(key); - let mut dense_index = self.dense_index.write().unwrap(); + let mut dense_index = self.dense_index.index.write().unwrap(); dense_index.remove(&key.to_string()); drop(dense_index); - memtable.insert(key.to_string(), value); - drop(memtable); + self.memtable.insert(key, value)?; - bloom_filter.insert(key); - drop(bloom_filter); + self.bloom_filter.insert(key); Ok(()) } pub fn flush(&mut self) -> Result<()> { - let memtable = self.get_memtable(); - let mut dense_index = self.dense_index.write().unwrap(); - - if memtable.is_empty() { - index::write_index(&self.lsm_config.sstable_path, dense_index.deref()); - filter::write_filter( - &self.lsm_config.sstable_path, - self.bloom_filter.read().unwrap().deref(), - ); + if self.memtable.is_empty() { + self.dense_index.write_index(); + self.bloom_filter.flush(); Ok(()) } else { - let segments = sstable::Segment::from_tree(&memtable, &self.lsm_config.sstable_path); + let memtable = self.memtable.get_memtable(); + let segments = sstable::Segment::from_tree(&memtable); - for token in segments.1 { - dense_index.insert(token.0, token.1); - } + let file_index = self.sstable.write_segment_file(segments.0).unwrap(); - index::write_index(&self.lsm_config.sstable_path, dense_index.deref()); + let mut dense_index = self.dense_index.index.write().unwrap(); + + for (key, offset) in segments.1 { + dense_index.insert(key.to_string(), (file_index, offset)); + } drop(dense_index); - filter::write_filter( - &self.lsm_config.sstable_path, - self.bloom_filter.read().unwrap().deref(), - ); + self.dense_index.write_index(); - self.memtable.write().unwrap().clear(); - self.memtable_size = 0; + self.bloom_filter.flush(); + + self.memtable.clear(); Ok(()) } } - pub fn get_memtable(&self) -> BTreeMap { - self.memtable.read().unwrap().clone() - } - pub fn contains(&self, key: &str) -> bool { - self.bloom_filter.read().unwrap().contains(key) + self.bloom_filter.contains(key) } - pub fn clear(&self) { - self.memtable.write().unwrap().clear(); - self.dense_index.write().unwrap().clear(); - } - - pub fn update_index(&self) { - let index = self.dense_index.read().unwrap().clone(); - index::write_index(&self.lsm_config.sstable_path, &index); + pub fn clear(&mut self) { + self.memtable.clear(); + self.dense_index.index.write().unwrap().clear(); + self.bloom_filter.clear(); } pub fn list_keys(&self) -> Vec { let mut keys = Vec::new(); - for key in self.memtable.read().unwrap().keys() { + for key in self.memtable.table.read().unwrap().keys() { keys.push(key.clone()); } - for key in self.dense_index.read().unwrap().keys() { + for key in self.dense_index.index.read().unwrap().keys() { keys.push(key.clone()); } keys } - - pub fn clear_with_bloom_filter(&mut self) { - self.clear(); - self.bloom_filter.write().unwrap().clear(); - } - - pub fn load_snapshot(path: path::PathBuf, snapshot: Snapshot) { - sstable::Segment::from_tree(snapshot.get_memtable(), &path); - index::write_index(&path, snapshot.get_dense_index()); - filter::write_filter(&path, snapshot.get_bloom_filter()); - } } impl Drop for Lsm { fn drop(&mut self) { - let memtable = self.memtable.read().unwrap(); - let mut dense_index = self.dense_index.write().unwrap(); - - if memtable.len() == 0 { - index::write_index(&self.lsm_config.sstable_path, dense_index.deref()); - filter::write_filter( - &self.lsm_config.sstable_path, - self.bloom_filter.read().unwrap().deref(), - ); + if self.memtable.is_empty() { + self.dense_index.write_index(); + self.bloom_filter.flush(); } else { - let segments = - sstable::Segment::from_tree(memtable.deref(), &self.lsm_config.sstable_path); + let memtable = self.memtable.table.read().unwrap(); + + let segments = sstable::Segment::from_tree(memtable.deref()); + let file_index = self.sstable.write_segment_file(segments.0).unwrap(); - for token in segments.1 { - dense_index.insert(token.0, token.1); + let mut dense_index = self.dense_index.index.write().unwrap(); + + for (key, offset) in segments.1 { + dense_index.insert(key.to_string(), (file_index, offset)); } - index::write_index(&self.lsm_config.sstable_path, dense_index.deref()); + drop(dense_index); - filter::write_filter( - &self.lsm_config.sstable_path, - self.bloom_filter.read().unwrap().deref(), - ); + self.dense_index.write_index(); + self.bloom_filter.flush(); } } } diff --git a/src/storage/lsm/sstable.rs b/src/storage/lsm/sstable.rs index 62ddadb..a601bfd 100644 --- a/src/storage/lsm/sstable.rs +++ b/src/storage/lsm/sstable.rs @@ -1,16 +1,14 @@ -use std::collections::BTreeMap; +use std::collections::HashMap; use std::fs; use std::io::{Read, Seek, SeekFrom}; use std::path; -use bson::Document; +use super::error::{Error, ErrorKind, Result}; -use super::writer::Writer; +fn get_last_file_index(path: path::PathBuf) -> usize { + let files = fs::read_dir(path).unwrap(); -fn get_last_index(path: path::PathBuf) -> usize { - let segments = fs::read_dir(path).unwrap(); - - segments + files .filter(|segment| { let segment = segment.as_ref().unwrap(); @@ -19,127 +17,116 @@ fn get_last_index(path: path::PathBuf) -> usize { .count() } -fn create_filename(path: path::PathBuf) -> String { - let count = get_last_index(path); +pub struct Segment; - format!("Data_{}_{}.db", count, env!("CARGO_PKG_VERSION")) -} +impl Segment { + /// Returns the segment in bytes and the offset of each document + pub fn from_tree(tree: &HashMap) -> (Vec, Vec<(&String, u64)>) { + let mut segment = Vec::new(); + let mut offsets = Vec::new(); -fn get_file_that_starts_with_index(path: path::PathBuf, index: usize) -> String { - let mut segments = fs::read_dir(path).unwrap(); - let segment = segments.find(|segment| { - let segment = segment.as_ref().unwrap(); - - segment - .file_name() - .to_str() - .unwrap() - .starts_with(&format!("Data_{}", index)) - }); - - segment - .unwrap() - .unwrap() - .file_name() - .to_str() - .unwrap() - .to_string() -} + for (key, value) in tree.iter() { + let offset = segment.len() as u64; -impl Writer for Segment { - fn file(&self) -> fs::File { - self.file.try_clone().unwrap() - } + // we need to wrap the value in a document + let value_to_doc = bson::doc! { + "_": value, + }; + + // extend the segment (the document length is already in the bson document) + let bytes_value = bson::to_vec(&value_to_doc).unwrap(); + segment.extend_from_slice(&bytes_value); + + // push the key and the offset - fn file_index(&self) -> usize { - self.file_index + offsets.push((key, offset)); + } + + (segment, offsets) } -} -#[derive(Clone, Debug)] -pub struct Token { - pub key: String, - pub value: bson::Document, - pub segment_offset: usize, -} + pub fn read_with_offset(offset: u64, segment: Vec) -> Result> { + // read the first bytes to see document length + let mut bson_length = [0; 4]; -pub struct Segment { - pub path_data: String, - pub file: fs::File, - pub file_index: usize, -} + let mut cursor = std::io::Cursor::new(segment); -impl Segment { - pub fn new(path: &path::Path) -> Self { - let _path = path.join("data"); + // seek to offset pos and read the first byte + cursor.seek(SeekFrom::Start(offset)).unwrap(); + cursor.read_exact(&mut bson_length).unwrap(); - if !_path.exists() { - fs::create_dir_all(_path.clone()).unwrap(); - } + let bson_length = i32::from_le_bytes(bson_length); + // now we know the document length, we can read the document - let filename = create_filename(_path.clone()); - let count = get_last_index(_path.clone()); + let mut document_bytes = vec![0; bson_length as usize]; - let file_path = _path.join(filename); + // go to the offset again and read the document + cursor.seek(SeekFrom::Start(offset)).unwrap(); + cursor.read_exact(&mut document_bytes).unwrap(); - Self { - path_data: file_path.clone().to_str().unwrap().to_string(), - file: fs::File::create(file_path).unwrap(), - file_index: count, - } - } + // deserialize the document + let doc: bson::Document = + bson::from_slice(&document_bytes).map_err(|_| Error::new(ErrorKind::Corrupted))?; + + let bson = doc.get("_").unwrap().clone(); - pub fn read_with_offset(offset: String, path: &path::Path) -> Option { - let splited_offset = offset.split('_').collect::>(); - let file_index = splited_offset[0].parse::().unwrap(); - let offset = splited_offset[1].parse::().unwrap(); + Ok(Some(bson)) // done + } +} - let path = path.join("data"); - let file_path = path.join(get_file_that_starts_with_index( - (*path).to_path_buf(), - file_index as usize, - )); +#[derive(Clone)] +pub struct SSTable { + path: path::PathBuf, +} - if !path.exists() { - return None; +impl SSTable { + pub fn new(sstable_path: path::PathBuf) -> Self { + if !path::Path::new(&sstable_path).exists() { + std::fs::create_dir_all(&sstable_path).unwrap(); } - let mut file = fs::File::open(file_path).unwrap(); + Self { path: sstable_path } + } + + pub fn write_segment_file(&self, segment: Vec) -> std::io::Result { + let segment_index = get_last_file_index(self.path.clone()); + let filename = format!("Data_{}.db", segment_index); - let mut document_length = [0; 1]; - file.seek(SeekFrom::Start(offset)).unwrap(); - file.read_exact(&mut document_length).unwrap(); + // write metadata into segment + let metadata = bson::doc! { + "version": env!("CARGO_PKG_VERSION"), + }; - let mut document_bytes = vec![0; document_length[0] as usize]; - file.seek(SeekFrom::Start(offset)).unwrap(); - file.read_exact(&mut document_bytes).unwrap(); + let mut full_file = Vec::new(); + full_file.extend_from_slice(&bson::to_vec(&metadata).unwrap()); - let document: Document = bson::from_slice(&document_bytes).unwrap(); + full_file.extend_from_slice(&segment); - Some(document.get("_v").unwrap().clone()) - } + fs::write(self.path.join(filename), full_file)?; - pub fn write(&mut self, key: &str, value: bson::Bson) -> (String, String) { - // Returns the key and the offset to put in sparse index - (key.to_string(), self.persist(key, value).unwrap()) + Ok(segment_index) } - pub fn from_tree( - tree: &BTreeMap, - path: &path::Path, - ) -> (Segment, Vec<(String, String)>) { - let mut segment = Segment::new(path); + pub fn read_segment_file(&self, segment_index: usize) -> std::io::Result> { + let filename = format!("Data_{}.db", segment_index); - let mut tree = tree.iter().collect::>(); - tree.sort_by(|a, b| a.0.cmp(b.0)); + let segment_with_metadata = fs::read(self.path.join(filename))?; - let mut tokens: Vec<(String, String)> = Vec::new(); + let mut metadata_length = [0; 4]; + metadata_length.copy_from_slice(&segment_with_metadata[0..4]); - for (key, value) in tree.iter() { - let token = segment.write(&(*key).clone(), (*value).clone()); - tokens.push(token); - } + let metadata_length = i32::from_le_bytes(metadata_length); + + let segment_without_metadata = segment_with_metadata.split_at(metadata_length as usize).1; + + Ok(segment_without_metadata.to_vec()) + } + + pub fn get(&self, file_index: &usize, offset: &u64) -> Result> { + let segment = self.read_segment_file(*file_index).unwrap(); + + let document = Segment::read_with_offset(*offset, segment).unwrap(); - (segment, tokens) + Ok(document) } } diff --git a/src/storage/lsm/writer.rs b/src/storage/lsm/writer.rs deleted file mode 100644 index ffd1955..0000000 --- a/src/storage/lsm/writer.rs +++ /dev/null @@ -1,30 +0,0 @@ -use std::fs::File; -use std::io::Write; - -pub const SEGMENT_SEPARATOR: u8 = 0xAC; - -pub trait Writer { - fn file(&self) -> File; - fn file_index(&self) -> usize; - - /// # Returns - /// - `Ok(String)` if the write was successful returns the offset - /// - `Err(())` if the write failed - fn persist(&mut self, key: &str, value: bson::Bson) -> Result { - let temp_doc = bson::doc! { - "_k": key, - "_v": value, - }; - - let mut file = self.file(); - - let offset = file.metadata().unwrap().len(); - - let mut bytes = bson::to_vec(&temp_doc).unwrap(); - bytes.push(SEGMENT_SEPARATOR); - - file.write_all(&bytes).unwrap(); - - Ok(format!("{}_{}", self.file_index(), offset)) - } -} From 72a7c6e971665c4ceb865c3c9f87b4ff1039f63f Mon Sep 17 00:00:00 2001 From: peeeuzin Date: Mon, 22 May 2023 21:53:53 -0300 Subject: [PATCH 4/6] feat: using `logging` instead of `snapshot` --- Cargo.lock | 11 +++ Cargo.toml | 3 +- src/storage/lsm/filter/mod.rs | 21 +++++- src/storage/lsm/index/mod.rs | 21 ++++++ src/storage/lsm/logging/mod.rs | 130 +++++++++++++++++++++++++++++++++ src/storage/lsm/mod.rs | 83 +++++++++++++-------- src/storage/lsm/sstable.rs | 6 +- 7 files changed, 239 insertions(+), 36 deletions(-) create mode 100644 src/storage/lsm/logging/mod.rs diff --git a/Cargo.lock b/Cargo.lock index a71a6c9..48ee920 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,7 @@ dependencies = [ "bson", "chrono", "farmhash", + "fs2", "hex", "lz4", "serde", @@ -179,6 +180,16 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f35ce9c8fb9891c75ceadbc330752951a4e369b50af10775955aeb9af3eee34b" +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "funty" version = "2.0.0" diff --git a/Cargo.toml b/Cargo.toml index fbe3882..7f9ca9b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,4 +22,5 @@ lz4 = "1.24.0" farmhash = "1.1.5" bitvec = "1.0.1" serde = {version = "1.0.148", features = ["derive"]} -chrono = "0.4.23" \ No newline at end of file +chrono = "0.4.23" +fs2 = "0.4.3" \ No newline at end of file diff --git a/src/storage/lsm/filter/mod.rs b/src/storage/lsm/filter/mod.rs index 62a0f45..4dbd716 100644 --- a/src/storage/lsm/filter/mod.rs +++ b/src/storage/lsm/filter/mod.rs @@ -6,6 +6,8 @@ use std::{ sync::{Arc, RwLock}, }; +use fs2::FileExt; + #[derive(Clone)] pub struct Filter { pub bloom: Arc>, @@ -14,7 +16,7 @@ pub struct Filter { impl Filter { pub fn new(path: path::PathBuf) -> Self { - let path = path.join("sstable.filter"); + let path = path.join("DUSTDATA.filter"); let bloom_rate = 0.01; let bloom_filter = if path.exists() { @@ -32,6 +34,8 @@ impl Filter { fn write_filter(path: &path::Path, filter: &BloomFilter) { let filter_file = std::fs::File::create(path).unwrap(); + filter_file.lock_exclusive().unwrap(); + let mut encoder = EncoderBuilder::new() .level(4) .build(filter_file) @@ -41,10 +45,15 @@ impl Filter { encoder.write_all(&filter_content).unwrap(); encoder.flush().unwrap(); + + encoder.writer().unlock().unwrap(); } fn read_filter(path: &path::Path) -> BloomFilter { let filter_file = std::fs::File::open(path).unwrap(); + + filter_file.lock_exclusive().unwrap(); + let mut decoder = Decoder::new(filter_file).unwrap(); let mut filter: Vec = Vec::new(); @@ -74,3 +83,13 @@ impl Filter { self.bloom.write().unwrap().clear(); } } + +impl Drop for Filter { + fn drop(&mut self) { + self.flush(); + + let file = std::fs::File::open(self.path.clone()).unwrap(); + + file.unlock().unwrap(); + } +} diff --git a/src/storage/lsm/index/mod.rs b/src/storage/lsm/index/mod.rs index 502e831..c29e196 100644 --- a/src/storage/lsm/index/mod.rs +++ b/src/storage/lsm/index/mod.rs @@ -7,6 +7,8 @@ use std::{ sync::{Arc, RwLock}, }; +use fs2::FileExt; + #[derive(Clone)] pub struct Index { pub index: Arc>>, @@ -15,6 +17,8 @@ pub struct Index { impl Index { pub fn new(index_path: path::PathBuf) -> Self { + let index_path = index_path.join("DUSTDATA.index"); + let index = if index_path.exists() { Index::read_index(&index_path) } else { @@ -29,6 +33,9 @@ impl Index { fn read_index(path: &path::Path) -> HashMap { let mut file = fs::File::open(path).unwrap(); + + file.lock_exclusive().unwrap(); + let mut bytes_to_read: Vec = Vec::new(); file.read_to_end(&mut bytes_to_read).unwrap(); @@ -42,9 +49,23 @@ impl Index { let doc = bson::to_vec(index.deref()).unwrap(); let mut file = fs::File::create(self.index_path.clone()).unwrap(); + + file.lock_exclusive().unwrap(); + file.write_all(&doc).unwrap(); file.sync_all().unwrap(); file.flush().unwrap(); + file.unlock().unwrap(); + } +} + +impl Drop for Index { + fn drop(&mut self) { + self.write_index(); + + let file = fs::File::open(self.index_path.clone()).unwrap(); + + file.unlock().unwrap(); } } diff --git a/src/storage/lsm/logging/mod.rs b/src/storage/lsm/logging/mod.rs new file mode 100644 index 0000000..db048a7 --- /dev/null +++ b/src/storage/lsm/logging/mod.rs @@ -0,0 +1,130 @@ +use std::{ + fs, + io::{Read, Write}, + path, +}; + +use serde::{Deserialize, Serialize}; + +use fs2::FileExt; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Logging { + pub log: Vec, + pub log_path: path::PathBuf, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum LogOp { + Insert { + key: String, + value: bson::Bson, + }, + Delete { + key: String, + value: bson::Bson, + }, + Update { + key: String, + old_value: bson::Bson, + new_value: bson::Bson, + }, +} + +impl Logging { + pub fn new(path: path::PathBuf) -> Self { + let log_path = path.join("DUSTDATA.logging"); + + if log_path.exists() { + Logging::read_log_file(log_path) + } else { + Logging { + log: Vec::new(), + log_path, + } + } + } + + pub fn insert(&mut self, key: &str, value: bson::Bson) { + self.log.push(LogOp::Insert { + key: key.to_string(), + value, + }); + } + + pub fn delete(&mut self, key: &str, value: bson::Bson) { + self.log.push(LogOp::Delete { + key: key.to_string(), + value, + }); + } + + pub fn update(&mut self, key: &str, old_value: bson::Bson, new_value: bson::Bson) { + self.log.push(LogOp::Update { + key: key.to_string(), + old_value, + new_value, + }); + } + + fn reverse_operation(op: LogOp) -> LogOp { + match op { + LogOp::Insert { key, value } => LogOp::Delete { key, value }, + LogOp::Delete { key, value } => LogOp::Insert { key, value }, + LogOp::Update { + key, + old_value, + new_value, + } => LogOp::Update { + key, + old_value: new_value, + new_value: old_value, + }, + } + } + + pub fn rollback(&mut self, offset: u32) -> Vec { + let mut ops = Vec::new(); + + let log = self.log.split_off(offset as usize); + + for op in log.iter().rev() { + let reverse_op = Logging::reverse_operation(op.clone()); + self.log.push(reverse_op.clone()); + ops.push(reverse_op); + } + + ops + } + + pub fn flush(&self) { + let self_vec = bson::to_vec(self).unwrap(); + + let mut file = fs::File::create(self.log_path.clone()).unwrap(); + + file.write_all(&self_vec).unwrap(); + + file.sync_data().unwrap(); + file.flush().unwrap(); + } + + fn read_log_file(path: path::PathBuf) -> Self { + let mut file = fs::File::open(path).unwrap(); + + file.lock_exclusive().unwrap(); + + let mut content = Vec::new(); + file.read_to_end(&mut content).unwrap(); + + bson::from_slice(&content).unwrap() + } +} + +impl Drop for Logging { + fn drop(&mut self) { + self.flush(); + let file = fs::File::open(self.log_path.clone()).unwrap(); + + file.unlock().unwrap(); + } +} diff --git a/src/storage/lsm/mod.rs b/src/storage/lsm/mod.rs index ce57cf4..b822b10 100644 --- a/src/storage/lsm/mod.rs +++ b/src/storage/lsm/mod.rs @@ -4,6 +4,7 @@ use std::{mem, path}; pub mod error; pub mod filter; pub mod index; +pub mod logging; pub mod memtable; pub mod sstable; @@ -22,13 +23,15 @@ pub struct Lsm { pub dense_index: index::Index, pub bloom_filter: filter::Filter, pub sstable: sstable::SSTable, + pub logging: logging::Logging, } impl Lsm { pub fn new(lsm_config: LsmConfig) -> Lsm { - let dense_index = index::Index::new(lsm_config.sstable_path.join("sstable.index")); + let dense_index = index::Index::new(lsm_config.clone().sstable_path); let sstable = sstable::SSTable::new(lsm_config.clone().sstable_path); let bloom_filter = filter::Filter::new(lsm_config.clone().sstable_path); + let logging = logging::Logging::new(lsm_config.clone().sstable_path); let memtable = memtable::Memtable::new(); Lsm { @@ -37,6 +40,7 @@ impl Lsm { dense_index, sstable, lsm_config, + logging, } } @@ -45,6 +49,7 @@ impl Lsm { return Err(Error::new(ErrorKind::AlreadyExists)); } + self.logging.insert(key, value.clone()); self.memtable.insert(key, value.clone())?; self.bloom_filter.insert(key); @@ -71,13 +76,13 @@ impl Lsm { } } - pub fn delete(&mut self, key: &str) -> Result> { + pub fn delete(&mut self, key: &str) -> Result { if !self.contains(key) { return Err(Error::new(ErrorKind::KeyNotFound)); } - let value = self.get(key).unwrap(); - + let value = self.get(key).unwrap().unwrap(); + self.logging.delete(key, value.clone()); self.memtable.delete(key).ok(); self.dense_index .index @@ -89,32 +94,27 @@ impl Lsm { Ok(value) } - pub fn update(&mut self, key: &str, value: bson::Bson) -> Result<()> { + pub fn update(&mut self, key: &str, value: bson::Bson) -> Result { if !self.contains(key) { return Err(Error::new(ErrorKind::KeyNotFound)); } - // Delete the old value from the bloom filter - self.bloom_filter.delete(key); - - let mut dense_index = self.dense_index.index.write().unwrap(); - dense_index.remove(&key.to_string()); - drop(dense_index); + let old_value = self.get(key)?; - self.memtable.insert(key, value)?; + self.logging + .update(key, old_value.clone().unwrap(), value.clone()); - self.bloom_filter.insert(key); + self.memtable + .table + .write() + .unwrap() + .insert(key.to_string(), value); - Ok(()) + Ok(old_value.unwrap()) } pub fn flush(&mut self) -> Result<()> { - if self.memtable.is_empty() { - self.dense_index.write_index(); - self.bloom_filter.flush(); - - Ok(()) - } else { + if !self.memtable.is_empty() { let memtable = self.memtable.get_memtable(); let segments = sstable::Segment::from_tree(&memtable); @@ -128,14 +128,24 @@ impl Lsm { drop(dense_index); - self.dense_index.write_index(); + self.memtable.clear(); + } - self.bloom_filter.flush(); + self.dense_index.write_index(); + self.bloom_filter.flush(); + self.logging.flush(); - self.memtable.clear(); + Ok(()) + } + + pub fn rollback(&mut self, offset: u32) -> Result<()> { + let reverse_ops = self.logging.rollback(offset); - Ok(()) + for reverse_op in reverse_ops { + self.execute_logging_op(reverse_op)?; } + + Ok(()) } pub fn contains(&self, key: &str) -> bool { @@ -161,14 +171,25 @@ impl Lsm { keys } + + fn execute_logging_op(&mut self, op: logging::LogOp) -> Result<()> { + match op { + logging::LogOp::Insert { key, value } => self.insert(&key, value), + logging::LogOp::Delete { key, value: _ } => self.delete(&key), + logging::LogOp::Update { + key, + old_value: _, + new_value, + } => self.update(&key, new_value), + }?; + + Ok(()) + } } impl Drop for Lsm { fn drop(&mut self) { - if self.memtable.is_empty() { - self.dense_index.write_index(); - self.bloom_filter.flush(); - } else { + if !self.memtable.is_empty() { let memtable = self.memtable.table.read().unwrap(); let segments = sstable::Segment::from_tree(memtable.deref()); @@ -181,9 +202,9 @@ impl Drop for Lsm { } drop(dense_index); - - self.dense_index.write_index(); - self.bloom_filter.flush(); } + + self.dense_index.write_index(); + self.bloom_filter.flush(); } } diff --git a/src/storage/lsm/sstable.rs b/src/storage/lsm/sstable.rs index a601bfd..92b992e 100644 --- a/src/storage/lsm/sstable.rs +++ b/src/storage/lsm/sstable.rs @@ -89,9 +89,6 @@ impl SSTable { } pub fn write_segment_file(&self, segment: Vec) -> std::io::Result { - let segment_index = get_last_file_index(self.path.clone()); - let filename = format!("Data_{}.db", segment_index); - // write metadata into segment let metadata = bson::doc! { "version": env!("CARGO_PKG_VERSION"), @@ -102,6 +99,9 @@ impl SSTable { full_file.extend_from_slice(&segment); + let segment_index = get_last_file_index(self.path.clone()); + let filename = format!("Data_{}.db", segment_index); + fs::write(self.path.join(filename), full_file)?; Ok(segment_index) From 671eab5f5483c8aefe24372253a8c250f3224202 Mon Sep 17 00:00:00 2001 From: peeeuzin Date: Mon, 22 May 2023 21:58:01 -0300 Subject: [PATCH 5/6] deps: removing `chrono` dependency --- Cargo.lock | 188 +---------------------------------------------------- Cargo.toml | 1 - 2 files changed, 2 insertions(+), 187 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48ee920..e0a9b62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,15 +13,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -63,7 +54,7 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "time 0.3.17", + "time", "uuid", ] @@ -85,88 +76,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-integer", - "num-traits", - "time 0.1.45", - "wasm-bindgen", - "winapi", -] - -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" - -[[package]] -name = "cxx" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdf07d07d6531bfcdbe9b8b739b104610c6508dcc4d63b410585faf338241daf" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2eb5b96ecdc99f72657332953d4d9c50135af1bac34277801cc3937906ebd39" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac040a39517fd1674e0f32177648334b0f4074625b5588a64519804ba0553b12" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1362b0ddcfc4eb0a1f57b68bd77dd99f0e826958a96abd0ae9bd092e114ffed6" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "dustdata" version = "1.3.2" dependencies = [ "bitvec", "bson", - "chrono", "farmhash", "fs2", "hex", @@ -204,7 +119,7 @@ checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -219,30 +134,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "iana-time-zone" -version = "0.1.53" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "winapi", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" -dependencies = [ - "cxx", - "cxx-build", -] - [[package]] name = "indexmap" version = "1.9.2" @@ -280,15 +171,6 @@ version = "0.2.138" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" -[[package]] -name = "link-cplusplus" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9272ab7b96c9046fbc5bc56c06c117cb639fe2d509df0c421cad82d2915cf369" -dependencies = [ - "cc", -] - [[package]] name = "log" version = "0.4.17" @@ -318,25 +200,6 @@ dependencies = [ "libc", ] -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" -dependencies = [ - "autocfg", -] - [[package]] name = "once_cell" version = "1.16.0" @@ -409,12 +272,6 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" -[[package]] -name = "scratch" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8132065adcfd6e02db789d9285a0deb2f3fcb04002865ab67d5fb103533898" - [[package]] name = "serde" version = "1.0.151" @@ -473,26 +330,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" -[[package]] -name = "termcolor" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - [[package]] name = "time" version = "0.3.17" @@ -526,12 +363,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - [[package]] name = "uuid" version = "1.2.2" @@ -548,12 +379,6 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -630,15 +455,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 7f9ca9b..de0eea0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,5 +22,4 @@ lz4 = "1.24.0" farmhash = "1.1.5" bitvec = "1.0.1" serde = {version = "1.0.148", features = ["derive"]} -chrono = "0.4.23" fs2 = "0.4.3" \ No newline at end of file From e2e808923b9fca98d9e2d5791906426c262e5741 Mon Sep 17 00:00:00 2001 From: peeeuzin Date: Mon, 22 May 2023 22:03:29 -0300 Subject: [PATCH 6/6] version: v2.0.0-beta.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 12 +++++------- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e0a9b62..6976dca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,7 +78,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "dustdata" -version = "1.3.2" +version = "2.0.0-beta.0" dependencies = [ "bitvec", "bson", diff --git a/Cargo.toml b/Cargo.toml index de0eea0..7ca096b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dustdata" -version = "1.3.2" +version = "2.0.0-beta.0" edition = "2021" description = "A data concurrency control storage engine to Rustbase" repository = "https://github.com/rustbase/dustdata" diff --git a/README.md b/README.md index 3e489d3..0c709c2 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Add the following to your `Cargo.toml`: ```toml [dependencies] -dustdata = "1.3.2" +dustdata = "2.0.0-beta.0" ``` # Usage @@ -35,14 +35,12 @@ Initialize a DustData interface. ```rust // DustData Configuration -let config = dustdata::DustDataConfig { - path: std::path::Path::new("./test_data/dustdata").to_path_buf(), - lsm_config: dustdata::LsmConfig { - flush_threshold: dustdata::Size::Megabytes(128), - } +let config = LsmConfig { + flush_threshold: 10240 // 10KB + sstable_path: PathBuf::from("./test_data"), }; -let mut dustdata = dustdata::initialize(config); +let dustdata = Lsm::new(config); ``` ## Insert a data