Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
marvin-j97 committed May 25, 2024
1 parent 1787fbe commit 064d618
Show file tree
Hide file tree
Showing 21 changed files with 127 additions and 81 deletions.
30 changes: 17 additions & 13 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,21 @@ name = "lsm_tree"
path = "src/lib.rs"

[features]
default = []
default = ["lz4"]
lz4 = ["dep:lz4_flex"]
miniz = ["dep:miniz_oxide"]
bloom = ["dep:seahash"]
segment_history = ["dep:serde", "dep:serde_json"]
miniz_oxide = ["dep:miniz_oxide"]

[dependencies]
byteorder = "1.5.0"
crc32fast = "1.4.0"
crossbeam-skiplist = "0.1.3"
double-ended-peekable = "0.1.0"
log = "0.4.21"
lz4_flex = "0.11.3"
lz4_flex = { version = "0.11.3", optional = true }
miniz_oxide = { version = "0.7.3", optional = true }
path-absolutize = "3.1.1"
quick_cache = { version = "0.5.1", default-features = false, features = [] }
seahash = { version = "4.1.0", optional = true }
Expand All @@ -42,20 +46,20 @@ fs_extra = "1.3.0"
nanoid = "0.4.0"
test-log = "0.2.16"

[[bench]]
name = "bloom"
harness = false
path = "benches/bloom.rs"
required-features = ["bloom"]
# [[bench]]
# name = "bloom"
# harness = false
# path = "benches/bloom.rs"
# required-features = ["bloom"]

[[bench]]
name = "tree"
harness = false
path = "benches/tree.rs"
required-features = []
required-features = ["lz4", "miniz"]

[[bench]]
name = "level_manifest"
harness = false
path = "benches/level_manifest.rs"
required-features = []
# [[bench]]
# name = "level_manifest"
# harness = false
# path = "benches/level_manifest.rs"
# required-features = []
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ All contributions are to be licensed as MIT OR Apache-2.0.
### Run benchmarks

```bash
cargo bench --features bloom
cargo bench --features bloom --features lz4 --features miniz
```

## Footnotes
Expand Down
55 changes: 30 additions & 25 deletions benches/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,12 @@ fn index_block_find_handle(c: &mut Criterion) {
fn load_block_from_disk(c: &mut Criterion) {
let mut group = c.benchmark_group("Load block from disk");

for block_size in [1, 4, 8, 16, 32, 64] {
group.bench_function(format!("{block_size} KiB"), |b| {
for comp_type in [
CompressionType::None,
CompressionType::Lz4,
CompressionType::Miniz,
] {
for block_size in [1, 4, 8, 16, 32, 64] {
let block_size = block_size * 1_024;

let mut size = 0;
Expand All @@ -388,32 +392,33 @@ fn load_block_from_disk(c: &mut Criterion) {
}
}

let mut block = ValueBlock {
items: items.clone().into_boxed_slice(),
header: BlockHeader {
compression: CompressionType::Lz4,
crc: 0,
data_length: 0,
previous_block_offset: 0,
},
};
group.bench_function(format!("{block_size} KiB [{comp_type}]"), |b| {
let mut block = ValueBlock {
items: items.clone().into_boxed_slice(),
header: BlockHeader {
compression: comp_type,
crc: 0,
data_length: 0,
previous_block_offset: 0,
},
};

// Serialize block
block.header.crc = ValueBlock::create_crc(&block.items).unwrap();
let (header, data) = ValueBlock::to_bytes_compressed(&items, 0, comp_type).unwrap();

let mut file = tempfile::tempfile().unwrap();
header.serialize(&mut file).unwrap();
file.write_all(&data).unwrap();

// Serialize block
block.header.crc = ValueBlock::create_crc(&block.items).unwrap();
let (header, data) =
ValueBlock::to_bytes_compressed(&items, 0, CompressionType::Lz4).unwrap();

let mut file = tempfile::tempfile().unwrap();
header.serialize(&mut file).unwrap();
file.write_all(&data).unwrap();

b.iter(|| {
let loaded_block = ValueBlock::from_file_compressed(&mut file, 0).unwrap();
b.iter(|| {
let loaded_block = ValueBlock::from_file_compressed(&mut file, 0).unwrap();

assert_eq!(loaded_block.items.len(), block.items.len());
assert_eq!(loaded_block.header.crc, block.header.crc);
assert_eq!(loaded_block.items.len(), block.items.len());
assert_eq!(loaded_block.header.crc, block.header.crc);
});
});
});
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/compaction/fifo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ mod tests {
created_at,
id,
file_size: 1,
compression: crate::segment::meta::CompressionType::Lz4,
compression: crate::segment::meta::CompressionType::None,
table_type: crate::segment::meta::TableType::Block,
item_count: 0,
key_count: 0,
Expand Down
2 changes: 1 addition & 1 deletion src/compaction/levelled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ mod tests {
created_at: unix_timestamp().as_nanos(),
id,
file_size: size,
compression: crate::segment::meta::CompressionType::Lz4,
compression: crate::segment::meta::CompressionType::None,
table_type: crate::segment::meta::TableType::Block,
item_count: 0,
key_count: 0,
Expand Down
2 changes: 1 addition & 1 deletion src/compaction/maintenance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ mod tests {
created_at,
id,
file_size: 1,
compression: crate::segment::meta::CompressionType::Lz4,
compression: crate::segment::meta::CompressionType::None,
table_type: crate::segment::meta::TableType::Block,
item_count: 0,
key_count: 0,
Expand Down
2 changes: 1 addition & 1 deletion src/compaction/tiered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ mod tests {
created_at: 0,
id,
file_size: size_mib * 1_024 * 1_024,
compression: crate::segment::meta::CompressionType::Lz4,
compression: crate::segment::meta::CompressionType::None,
table_type: crate::segment::meta::TableType::Block,
item_count: 0,
key_count: 0,
Expand Down
2 changes: 1 addition & 1 deletion src/compaction/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ fn merge_segments(
block_size: opts.config.inner.block_size,
evict_tombstones: should_evict_tombstones,
folder: segments_base_folder.clone(),
compression: CompressionType::Lz4,
compression: CompressionType::None,

#[cfg(feature = "bloom")]
bloom_fp_rate,
Expand Down
11 changes: 8 additions & 3 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,13 @@ impl Default for PersistedConfig {
block_size: 4_096,
level_count: 7,
r#type: TreeType::Standard,
compression: CompressionType::Lz4,
table_type: TableType::Block,

#[cfg(not(feature = "lz4"))]
compression: CompressionType::None,

#[cfg(feature = "lz4")]
compression: CompressionType::Lz4,
}
}
}
Expand Down Expand Up @@ -279,7 +284,7 @@ mod tests {
fn tree_config_raw() -> crate::Result<()> {
let config = PersistedConfig {
r#type: TreeType::Standard,
compression: CompressionType::Lz4,
compression: CompressionType::None,
table_type: TableType::Block,
block_size: 4_096,
level_count: 7,
Expand Down Expand Up @@ -318,7 +323,7 @@ mod tests {
fn tree_config_serde_round_trip() -> crate::Result<()> {
let config = PersistedConfig {
r#type: TreeType::Standard,
compression: CompressionType::Lz4,
compression: CompressionType::None,
table_type: TableType::Block,
block_size: 4_096,
level_count: 7,
Expand Down
10 changes: 2 additions & 8 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::{
serde::{DeserializeError, SerializeError},
version::Version,
CompressionType,
};
use lz4_flex::block::DecompressError;

/// Represents errors that can occur in the LSM-tree
#[derive(Debug)]
Expand All @@ -17,7 +17,7 @@ pub enum Error {
Deserialize(DeserializeError),

/// Decompression failed
Decompress(DecompressError),
Decompress(CompressionType),

/// Invalid or unparseable data format version
InvalidVersion(Option<Version>),
Expand Down Expand Up @@ -49,11 +49,5 @@ impl From<DeserializeError> for Error {
}
}

impl From<DecompressError> for Error {
fn from(value: DecompressError) -> Self {
Self::Decompress(value)
}
}

/// Tree result
pub type Result<T> = std::result::Result<T, Error>;
2 changes: 1 addition & 1 deletion src/levels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ mod tests {
created_at: 0,
id,
file_size: 0,
compression: crate::segment::meta::CompressionType::Lz4,
compression: crate::segment::meta::CompressionType::None,
table_type: crate::segment::meta::TableType::Block,
item_count: 0,
key_count: 0,
Expand Down
2 changes: 1 addition & 1 deletion src/segment/block/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ mod tests {
#[test]
fn block_header_raw() -> crate::Result<()> {
let header = Header {
compression: CompressionType::Lz4,
compression: CompressionType::None,
crc: 4,
previous_block_offset: 2,
data_length: 15,
Expand Down
24 changes: 17 additions & 7 deletions src/segment/block/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
pub mod header;

use super::meta::CompressionType;
use crate::serde::{Deserializable, Serializable};
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use header::Header as BlockHeader;
use lz4_flex::{compress_prepend_size, decompress_size_prepended};
use std::io::{Cursor, Read};

use super::meta::CompressionType;

/// A disk-based block
///
/// A block is split into its header and a blob of data.
Expand All @@ -32,7 +30,14 @@ impl<T: Clone + Serializable + Deserializable> Block<T> {

let bytes = match header.compression {
super::meta::CompressionType::None => bytes,
super::meta::CompressionType::Lz4 => decompress_size_prepended(&bytes)?,

#[cfg(feature = "lz4")]
super::meta::CompressionType::Lz4 => lz4_flex::decompress_size_prepended(&bytes)
.map_err(|_| crate::Error::Decompress(header.compression))?,

#[cfg(feature = "miniz")]
super::meta::CompressionType::Miniz => miniz_oxide::inflate::decompress_to_vec(&bytes)
.map_err(|_| crate::Error::Decompress(header.compression))?,
};
let mut bytes = Cursor::new(bytes);

Expand Down Expand Up @@ -113,7 +118,12 @@ impl<T: Clone + Serializable + Deserializable> Block<T> {

Ok(match compression {
CompressionType::None => buf,
CompressionType::Lz4 => compress_prepend_size(&buf),

#[cfg(feature = "lz4")]
CompressionType::Lz4 => lz4_flex::compress_prepend_size(&buf),

#[cfg(feature = "miniz")]
CompressionType::Miniz => miniz_oxide::deflate::compress_to_vec(&buf, 10),
})
}
}
Expand All @@ -136,7 +146,7 @@ mod tests {
// Serialize to bytes
let mut serialized = Vec::new();

let (header, data) = ValueBlock::to_bytes_compressed(&items, 0, CompressionType::Lz4)?;
let (header, data) = ValueBlock::to_bytes_compressed(&items, 0, CompressionType::None)?;

header.serialize(&mut serialized)?;
serialized.write_all(&data)?;
Expand Down Expand Up @@ -165,7 +175,7 @@ mod tests {
// Serialize to bytes
let mut serialized = Vec::new();

let (header, data) = ValueBlock::to_bytes_compressed(&items, 0, CompressionType::Lz4)?;
let (header, data) = ValueBlock::to_bytes_compressed(&items, 0, CompressionType::None)?;

header.serialize(&mut serialized)?;
serialized.write_all(&data)?;
Expand Down
30 changes: 29 additions & 1 deletion src/segment/meta/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,24 @@
#[allow(clippy::module_name_repetitions)]
pub enum CompressionType {
None,

#[cfg(feature = "lz4")]
Lz4,

#[cfg(feature = "miniz")]
Miniz,
}

impl From<CompressionType> for u8 {
fn from(val: CompressionType) -> Self {
match val {
CompressionType::None => 0,

#[cfg(feature = "lz4")]
CompressionType::Lz4 => 1,

#[cfg(feature = "miniz")]
CompressionType::Miniz => 2,
}
}
}
Expand All @@ -24,14 +34,32 @@ impl TryFrom<u8> for CompressionType {
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
0 => Ok(Self::None),

#[cfg(feature = "lz4")]
1 => Ok(Self::Lz4),

#[cfg(feature = "miniz")]
2 => Ok(Self::Miniz),

_ => Err(()),
}
}
}

impl std::fmt::Display for CompressionType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "lz4")
write!(
f,
"{}",
match self {
Self::None => "no compression",

#[cfg(feature = "lz4")]
Self::Lz4 => "lz4",

#[cfg(feature = "miniz")]
Self::Miniz => "miniz",
}
)
}
}
Loading

0 comments on commit 064d618

Please sign in to comment.