From f938f66464c95b92170a009fbac6ba0071cbf0b6 Mon Sep 17 00:00:00 2001 From: hillium Date: Fri, 4 Aug 2023 16:46:38 +0800 Subject: [PATCH 1/9] added ffi for next level segments Signed-off-by: hillium --- .gitmodules | 4 ++-- librocksdb_sys/crocksdb/c.cc | 17 +++++++++++++++++ librocksdb_sys/crocksdb/crocksdb/c.h | 9 +++++++++ librocksdb_sys/rocksdb | 2 +- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1dce46cb8..d473beb99 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "rocksdb"] path = librocksdb_sys/rocksdb - url = https://github.com/tikv/rocksdb.git - branch = 6.29.tikv + url = https://github.com/yujuncen/rocksdb.git + branch = next-level-seg-on-compaction [submodule "titan"] path = librocksdb_sys/libtitan_sys/titan diff --git a/librocksdb_sys/crocksdb/c.cc b/librocksdb_sys/crocksdb/c.cc index 1debdf0ed..cec59e308 100644 --- a/librocksdb_sys/crocksdb/c.cc +++ b/librocksdb_sys/crocksdb/c.cc @@ -6572,6 +6572,23 @@ int crocksdb_sst_partitioner_context_output_level( return context->rep->output_level; } +int crocksdb_sst_partitioner_next_level_segment_count( + crocksdb_sst_partitioner_context_t* context) { + return context->rep->OutputNextLevelSegmentCount(); +} + +void crocksdb_sst_partitioner_next_level_segment( + crocksdb_sst_partitioner_context_t* context, int index, + const char** smallest_key, size_t* smallest_key_len, const char** largest_key, + size_t* largest_key_len, int* size) { + Slice small, large; + context->rep->OutputNextLevelSegment(index, &small, &large, size); + *smallest_key = small.data(); + *smallest_key_len = small.size(); + *largest_key = large.data(); + *largest_key_len = large.size(); +} + const char* crocksdb_sst_partitioner_context_smallest_key( crocksdb_sst_partitioner_context_t* context, size_t* key_len) { auto& smallest_key = context->rep->smallest_user_key; diff --git a/librocksdb_sys/crocksdb/crocksdb/c.h b/librocksdb_sys/crocksdb/crocksdb/c.h index 19a67aff1..84ffee0ac 100644 --- a/librocksdb_sys/crocksdb/crocksdb/c.h +++ b/librocksdb_sys/crocksdb/crocksdb/c.h @@ -2587,6 +2587,15 @@ crocksdb_sst_partitioner_context_smallest_key( extern C_ROCKSDB_LIBRARY_API const char* crocksdb_sst_partitioner_context_largest_key( crocksdb_sst_partitioner_context_t* context, size_t* key_len); +extern C_ROCKSDB_LIBRARY_API int +crocksdb_sst_partitioner_next_level_segment_count( + crocksdb_sst_partitioner_context_t* context); +extern C_ROCKSDB_LIBRARY_API void +crocksdb_sst_partitioner_next_level_segment( + crocksdb_sst_partitioner_context_t* context, int index, + const char** smallest_key, size_t* smallest_key_len, + const char** largest_key, size_t* largest_key_len, + int* size); extern C_ROCKSDB_LIBRARY_API void crocksdb_sst_partitioner_context_set_is_full_compaction( crocksdb_sst_partitioner_context_t* context, diff --git a/librocksdb_sys/rocksdb b/librocksdb_sys/rocksdb index 5b9cef986..f54a53545 160000 --- a/librocksdb_sys/rocksdb +++ b/librocksdb_sys/rocksdb @@ -1 +1 @@ -Subproject commit 5b9cef986931a129bc6009391df768ebf1a7af6b +Subproject commit f54a53545cca8c7bac53dcad8499d2d08f9ed3dd From 258cd08b56bd5689be254f69cf38506c60227148 Mon Sep 17 00:00:00 2001 From: hillium Date: Mon, 7 Aug 2023 17:49:59 +0800 Subject: [PATCH 2/9] added segments Signed-off-by: hillium --- librocksdb_sys/crocksdb/c.cc | 4 ++-- librocksdb_sys/crocksdb/crocksdb/c.h | 4 ++-- librocksdb_sys/src/lib.rs | 12 ++++++++++ src/sst_partitioner.rs | 33 ++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/librocksdb_sys/crocksdb/c.cc b/librocksdb_sys/crocksdb/c.cc index cec59e308..d5d0ccdb6 100644 --- a/librocksdb_sys/crocksdb/c.cc +++ b/librocksdb_sys/crocksdb/c.cc @@ -6572,12 +6572,12 @@ int crocksdb_sst_partitioner_context_output_level( return context->rep->output_level; } -int crocksdb_sst_partitioner_next_level_segment_count( +int crocksdb_sst_partitioner_context_next_level_segment_count( crocksdb_sst_partitioner_context_t* context) { return context->rep->OutputNextLevelSegmentCount(); } -void crocksdb_sst_partitioner_next_level_segment( +void crocksdb_sst_partitioner_context_next_level_segment( crocksdb_sst_partitioner_context_t* context, int index, const char** smallest_key, size_t* smallest_key_len, const char** largest_key, size_t* largest_key_len, int* size) { diff --git a/librocksdb_sys/crocksdb/crocksdb/c.h b/librocksdb_sys/crocksdb/crocksdb/c.h index 84ffee0ac..e94fe0f95 100644 --- a/librocksdb_sys/crocksdb/crocksdb/c.h +++ b/librocksdb_sys/crocksdb/crocksdb/c.h @@ -2588,10 +2588,10 @@ extern C_ROCKSDB_LIBRARY_API const char* crocksdb_sst_partitioner_context_largest_key( crocksdb_sst_partitioner_context_t* context, size_t* key_len); extern C_ROCKSDB_LIBRARY_API int -crocksdb_sst_partitioner_next_level_segment_count( +crocksdb_sst_partitioner_context_next_level_segment_count( crocksdb_sst_partitioner_context_t* context); extern C_ROCKSDB_LIBRARY_API void -crocksdb_sst_partitioner_next_level_segment( +crocksdb_sst_partitioner_context_next_level_segment( crocksdb_sst_partitioner_context_t* context, int index, const char** smallest_key, size_t* smallest_key_len, const char** largest_key, size_t* largest_key_len, diff --git a/librocksdb_sys/src/lib.rs b/librocksdb_sys/src/lib.rs index 39d553079..d1df5389e 100644 --- a/librocksdb_sys/src/lib.rs +++ b/librocksdb_sys/src/lib.rs @@ -2817,6 +2817,18 @@ extern "C" { largest_key: *const c_char, key_len: size_t, ); + pub fn crocksdb_sst_partitioner_context_next_level_segment( + context: *mut DBSstPartitionerContext, + index: c_int, + smallest_key: *mut *const c_char, + smallest_key_len: *mut size_t, + largest_key: *mut *const c_char, + largest_key_len: *mut size_t, + size: *mut size_t, + ); + pub fn crocksdb_sst_partitioner_context_next_level_segment_count( + context: *mut DBSstPartitionerContext, + ) -> c_int; pub fn crocksdb_sst_partitioner_factory_create( underlying: *mut c_void, diff --git a/src/sst_partitioner.rs b/src/sst_partitioner.rs index 7480e405a..7d0b009a4 100644 --- a/src/sst_partitioner.rs +++ b/src/sst_partitioner.rs @@ -22,6 +22,14 @@ pub struct SstPartitionerContext<'a> { pub output_level: i32, pub smallest_key: &'a [u8], pub largest_key: &'a [u8], + pub next_level_segments: Vec>, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct Segment<'a> { + pub smallest_key: &'a [u8], + pub largest_key: &'a [u8], + pub segment_size: usize, } pub trait SstPartitioner { @@ -103,6 +111,9 @@ extern "C" fn sst_partitioner_factory_create_partitioner *mut DBSstPartitioner { let factory = unsafe { &*(ctx as *mut F) }; + let segment_size = + unsafe { crocksdb_ffi::crocksdb_sst_partitioner_context_next_level_segment_count(context) }; + let mut segments = Vec::with_capacity(segment_size as usize); let context = unsafe { let mut smallest_key_len: usize = 0; let smallest_key = crocksdb_ffi::crocksdb_sst_partitioner_context_smallest_key( @@ -114,6 +125,27 @@ extern "C" fn sst_partitioner_factory_create_partitioner Date: Tue, 8 Aug 2023 18:07:38 +0800 Subject: [PATCH 3/9] make raw ffi Signed-off-by: hillium --- src/sst_partitioner.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/sst_partitioner.rs b/src/sst_partitioner.rs index 7d0b009a4..cb6fcea1c 100644 --- a/src/sst_partitioner.rs +++ b/src/sst_partitioner.rs @@ -22,14 +22,8 @@ pub struct SstPartitionerContext<'a> { pub output_level: i32, pub smallest_key: &'a [u8], pub largest_key: &'a [u8], - pub next_level_segments: Vec>, -} - -#[derive(Clone, PartialEq, Eq, Debug)] -pub struct Segment<'a> { - pub smallest_key: &'a [u8], - pub largest_key: &'a [u8], - pub segment_size: usize, + pub next_level_boundaries: Vec<&'a [u8]>, + pub next_level_sizes: Vec, } pub trait SstPartitioner { @@ -113,7 +107,8 @@ extern "C" fn sst_partitioner_factory_create_partitioner Date: Thu, 17 Aug 2023 17:26:04 +0800 Subject: [PATCH 4/9] update rocksdb Signed-off-by: hillium --- librocksdb_sys/rocksdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/librocksdb_sys/rocksdb b/librocksdb_sys/rocksdb index f54a53545..0c20c20b0 160000 --- a/librocksdb_sys/rocksdb +++ b/librocksdb_sys/rocksdb @@ -1 +1 @@ -Subproject commit f54a53545cca8c7bac53dcad8499d2d08f9ed3dd +Subproject commit 0c20c20b08899276e4f751bb8f03be5d0c055496 From 80da5f388850a6857a805722ae4b4cb46703187b Mon Sep 17 00:00:00 2001 From: hillium Date: Thu, 17 Aug 2023 18:19:04 +0800 Subject: [PATCH 5/9] fix panic Signed-off-by: hillium --- src/sst_partitioner.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sst_partitioner.rs b/src/sst_partitioner.rs index cb6fcea1c..f02035a61 100644 --- a/src/sst_partitioner.rs +++ b/src/sst_partitioner.rs @@ -107,8 +107,13 @@ extern "C" fn sst_partitioner_factory_create_partitioner Date: Thu, 7 Sep 2023 13:55:14 +0800 Subject: [PATCH 6/9] go to master Signed-off-by: hillium --- .gitmodules | 4 ++-- librocksdb_sys/rocksdb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index d473beb99..1dce46cb8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "rocksdb"] path = librocksdb_sys/rocksdb - url = https://github.com/yujuncen/rocksdb.git - branch = next-level-seg-on-compaction + url = https://github.com/tikv/rocksdb.git + branch = 6.29.tikv [submodule "titan"] path = librocksdb_sys/libtitan_sys/titan diff --git a/librocksdb_sys/rocksdb b/librocksdb_sys/rocksdb index 76db5774c..e2f6ec7e2 160000 --- a/librocksdb_sys/rocksdb +++ b/librocksdb_sys/rocksdb @@ -1 +1 @@ -Subproject commit 76db5774c6daf4244ff11ab35ead5eb9f5d23d76 +Subproject commit e2f6ec7e27195082a336180e9f68448d1a8dbc5c From fe6d76b70d6a01b69eb887d1a7fd6461dfd66d0a Mon Sep 17 00:00:00 2001 From: hillium Date: Thu, 7 Sep 2023 15:03:11 +0800 Subject: [PATCH 7/9] run formatter Signed-off-by: hillium --- librocksdb_sys/crocksdb/c.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/librocksdb_sys/crocksdb/c.cc b/librocksdb_sys/crocksdb/c.cc index 854417e13..3da354b13 100644 --- a/librocksdb_sys/crocksdb/c.cc +++ b/librocksdb_sys/crocksdb/c.cc @@ -6580,8 +6580,8 @@ int crocksdb_sst_partitioner_context_next_level_segment_count( void crocksdb_sst_partitioner_context_next_level_segment( crocksdb_sst_partitioner_context_t* context, int index, - const char** smallest_key, size_t* smallest_key_len, const char** largest_key, - size_t* largest_key_len, int* size) { + const char** smallest_key, size_t* smallest_key_len, + const char** largest_key, size_t* largest_key_len, int* size) { Slice small, large; context->rep->OutputNextLevelSegment(index, &small, &large, size); *smallest_key = small.data(); From 4756ec3855e44cf5e9f0657ed3a88c61506e0176 Mon Sep 17 00:00:00 2001 From: hillium Date: Fri, 8 Sep 2023 12:38:15 +0800 Subject: [PATCH 8/9] added unit tests, fix format Signed-off-by: hillium --- librocksdb_sys/crocksdb/c.cc | 25 +++++++++ librocksdb_sys/crocksdb/crocksdb/c.h | 17 ++++-- librocksdb_sys/src/lib.rs | 19 +++++-- src/sst_partitioner.rs | 83 ++++++++++++++++++++++------ 4 files changed, 115 insertions(+), 29 deletions(-) diff --git a/librocksdb_sys/crocksdb/c.cc b/librocksdb_sys/crocksdb/c.cc index 3da354b13..7bc33e4a0 100644 --- a/librocksdb_sys/crocksdb/c.cc +++ b/librocksdb_sys/crocksdb/c.cc @@ -6578,6 +6578,31 @@ int crocksdb_sst_partitioner_context_next_level_segment_count( return context->rep->OutputNextLevelSegmentCount(); } +size_t crocksdb_sst_partitioner_context_get_next_level_size( + crocksdb_sst_partitioner_context_t* context, int index) { + return context->rep->output_next_level_size[index]; +} + +void crocksdb_sst_partitioner_context_get_next_level_boundary( + crocksdb_sst_partitioner_context_t* context, int index, const char** key, + size_t* key_len) { + const auto s = context->rep->output_next_level_boundaries[index]; + *key = s.data(); + *key_len = s.size(); +} + +void crocksdb_sst_partitioner_context_push_bounary_and_size( + crocksdb_sst_partitioner_context_t* context, const char* boundary_key, + size_t boundary_key_len, size_t size) { + if (!context->rep->output_next_level_boundaries.empty()) { + // The first boundary means the left-bondary, which isn't a segment. + // Its size should be ignored. + context->rep->output_next_level_size.push_back(size); + } + context->rep->output_next_level_boundaries.emplace_back(boundary_key, + boundary_key_len); +} + void crocksdb_sst_partitioner_context_next_level_segment( crocksdb_sst_partitioner_context_t* context, int index, const char** smallest_key, size_t* smallest_key_len, diff --git a/librocksdb_sys/crocksdb/crocksdb/c.h b/librocksdb_sys/crocksdb/crocksdb/c.h index e94fe0f95..c636627dd 100644 --- a/librocksdb_sys/crocksdb/crocksdb/c.h +++ b/librocksdb_sys/crocksdb/crocksdb/c.h @@ -2590,12 +2590,17 @@ crocksdb_sst_partitioner_context_largest_key( extern C_ROCKSDB_LIBRARY_API int crocksdb_sst_partitioner_context_next_level_segment_count( crocksdb_sst_partitioner_context_t* context); -extern C_ROCKSDB_LIBRARY_API void -crocksdb_sst_partitioner_context_next_level_segment( - crocksdb_sst_partitioner_context_t* context, int index, - const char** smallest_key, size_t* smallest_key_len, - const char** largest_key, size_t* largest_key_len, - int* size); +extern C_ROCKSDB_LIBRARY_API void +crocksdb_sst_partitioner_context_get_next_level_boundary( + crocksdb_sst_partitioner_context_t* context, int index, const char** key, + size_t* key_len); +extern C_ROCKSDB_LIBRARY_API size_t +crocksdb_sst_partitioner_context_get_next_level_size( + crocksdb_sst_partitioner_context_t* context, int index); +extern C_ROCKSDB_LIBRARY_API void +crocksdb_sst_partitioner_context_push_bounary_and_size( + crocksdb_sst_partitioner_context_t* context, const char* boundary_key, + size_t boundary_key_len, size_t size); extern C_ROCKSDB_LIBRARY_API void crocksdb_sst_partitioner_context_set_is_full_compaction( crocksdb_sst_partitioner_context_t* context, diff --git a/librocksdb_sys/src/lib.rs b/librocksdb_sys/src/lib.rs index 8be25109f..6df6e1d1b 100644 --- a/librocksdb_sys/src/lib.rs +++ b/librocksdb_sys/src/lib.rs @@ -2818,14 +2818,21 @@ extern "C" { largest_key: *const c_char, key_len: size_t, ); - pub fn crocksdb_sst_partitioner_context_next_level_segment( + pub fn crocksdb_sst_partitioner_context_get_next_level_boundary( context: *mut DBSstPartitionerContext, index: c_int, - smallest_key: *mut *const c_char, - smallest_key_len: *mut size_t, - largest_key: *mut *const c_char, - largest_key_len: *mut size_t, - size: *mut size_t, + key: *mut *const c_char, + key_len: *mut size_t, + ); + pub fn crocksdb_sst_partitioner_context_get_next_level_size( + context: *mut DBSstPartitionerContext, + index: c_int, + ) -> size_t; + pub fn crocksdb_sst_partitioner_context_push_bounary_and_size( + context: *mut DBSstPartitionerContext, + boundary_key: *const c_char, + boundary_key_len: size_t, + size: size_t, ); pub fn crocksdb_sst_partitioner_context_next_level_segment_count( context: *mut DBSstPartitionerContext, diff --git a/src/sst_partitioner.rs b/src/sst_partitioner.rs index f02035a61..be40587f2 100644 --- a/src/sst_partitioner.rs +++ b/src/sst_partitioner.rs @@ -125,26 +125,35 @@ extern "C" fn sst_partitioner_factory_create_partitioner 0 { + let mut first_boundary_key = std::ptr::null(); + let mut first_boundary_key_len = 0usize; + crocksdb_ffi::crocksdb_sst_partitioner_context_get_next_level_boundary( + context, + 0, + &mut first_boundary_key, + &mut first_boundary_key_len as _, + ); + next_level_boundaries.push(slice::from_raw_parts( + first_boundary_key as *const u8, + first_boundary_key_len, + )) + } for i in 0..segment_size { - let mut start_key_len = 0usize; - let mut end_key_len = 0usize; - let mut start_key = std::ptr::null(); - let mut end_key = std::ptr::null(); - let mut size = 0usize; - crocksdb_ffi::crocksdb_sst_partitioner_context_next_level_segment( + let mut boundary_key_len = 0usize; + let mut boundary_key = std::ptr::null(); + crocksdb_ffi::crocksdb_sst_partitioner_context_get_next_level_boundary( context, - i, - &mut start_key as _, - &mut start_key_len as _, - &mut end_key as _, - &mut end_key_len as _, - &mut size as _, + i + 1, + &mut boundary_key as _, + &mut boundary_key_len as _, ); - if i == 0 { - next_level_boundaries - .push(slice::from_raw_parts(start_key as *const u8, start_key_len)); - } - next_level_boundaries.push(slice::from_raw_parts(end_key as *const u8, end_key_len)); + let size = + crocksdb_ffi::crocksdb_sst_partitioner_context_get_next_level_size(context, i); + next_level_boundaries.push(slice::from_raw_parts( + boundary_key as *const u8, + boundary_key_len, + )); next_level_sizes.push(size); } SstPartitionerContext { @@ -223,6 +232,9 @@ mod test { pub output_level: Option, pub smallest_key: Option>, pub largest_key: Option>, + + pub next_level_boundaries: Vec>, + pub next_level_sizes: Vec, } impl Default for TestState { @@ -246,6 +258,8 @@ mod test { output_level: None, smallest_key: None, largest_key: None, + next_level_boundaries: vec![], + next_level_sizes: vec![], } } } @@ -308,6 +322,12 @@ mod test { s.output_level = Some(context.output_level); s.smallest_key = Some(context.smallest_key.to_vec()); s.largest_key = Some(context.largest_key.to_vec()); + s.next_level_boundaries = context + .next_level_boundaries + .iter() + .map(|v| v.to_vec()) + .collect(); + s.next_level_sizes = context.next_level_sizes.clone(); Some(TestSstPartitioner { state: self.state.clone(), @@ -340,6 +360,12 @@ mod test { const OUTPUT_LEVEL: i32 = 3; const SMALLEST_KEY: &[u8] = b"aaaa"; const LARGEST_KEY: &[u8] = b"bbbb"; + const BOUNDARIES_AND_SIZES: [(&[u8], usize); 4] = [ + (b"aaaa", 0usize), + (b"aaab", 42usize), + (b"aaba", 96usize), + (b"bbbb", 256usize), + ]; let s = Arc::new(Mutex::new(TestState::default())); let factory = new_sst_partitioner_factory(TestSstPartitionerFactory { state: s.clone() }); @@ -364,6 +390,14 @@ mod test { LARGEST_KEY.as_ptr() as *const c_char, LARGEST_KEY.len(), ); + for (boundary_key, size) in BOUNDARIES_AND_SIZES { + crocksdb_ffi::crocksdb_sst_partitioner_context_push_bounary_and_size( + context, + boundary_key.as_ptr() as *const i8, + boundary_key.len(), + size, + ); + } } let partitioner = unsafe { crocksdb_ffi::crocksdb_sst_partitioner_factory_create_partitioner(factory, context) @@ -376,6 +410,21 @@ mod test { assert_eq!(OUTPUT_LEVEL, sl.output_level.unwrap()); assert_eq!(SMALLEST_KEY, sl.smallest_key.as_ref().unwrap().as_slice()); assert_eq!(LARGEST_KEY, sl.largest_key.as_ref().unwrap().as_slice()); + assert_eq!( + BOUNDARIES_AND_SIZES + .iter() + .map(|x| x.0) + .collect::>(), + sl.next_level_boundaries + ); + assert_eq!( + BOUNDARIES_AND_SIZES + .iter() + .skip(1) + .map(|x| x.1) + .collect::>(), + sl.next_level_sizes + ); } unsafe { crocksdb_ffi::crocksdb_sst_partitioner_destroy(partitioner); From ad0a2d781019095d0ef24ac9b4130143ce849f02 Mon Sep 17 00:00:00 2001 From: hillium Date: Fri, 8 Sep 2023 14:06:11 +0800 Subject: [PATCH 9/9] remove unused function Signed-off-by: hillium --- librocksdb_sys/crocksdb/c.cc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/librocksdb_sys/crocksdb/c.cc b/librocksdb_sys/crocksdb/c.cc index 0e4f8c5eb..1f4c77d1e 100644 --- a/librocksdb_sys/crocksdb/c.cc +++ b/librocksdb_sys/crocksdb/c.cc @@ -6631,18 +6631,6 @@ void crocksdb_sst_partitioner_context_push_bounary_and_size( boundary_key_len); } -void crocksdb_sst_partitioner_context_next_level_segment( - crocksdb_sst_partitioner_context_t* context, int index, - const char** smallest_key, size_t* smallest_key_len, - const char** largest_key, size_t* largest_key_len, int* size) { - Slice small, large; - context->rep->OutputNextLevelSegment(index, &small, &large, size); - *smallest_key = small.data(); - *smallest_key_len = small.size(); - *largest_key = large.data(); - *largest_key_len = large.size(); -} - const char* crocksdb_sst_partitioner_context_smallest_key( crocksdb_sst_partitioner_context_t* context, size_t* key_len) { auto& smallest_key = context->rep->smallest_user_key;