Skip to content

Commit 5acb884

Browse files
committed
Turbopack: remove value compression dictionary
It doesn't have benefit as each block is already large as we merge small values together and medium size values are already large. This also gives the benefit that we do not need to recompress medium value blocks when doing compaction
1 parent 670e14a commit 5acb884

File tree

10 files changed

+70
-160
lines changed

10 files changed

+70
-160
lines changed

turbopack/crates/turbo-persistence-tools/src/main.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ fn main() -> Result<()> {
3535
amqf_entries,
3636
sst_size,
3737
key_compression_dictionary_size,
38-
value_compression_dictionary_size,
3938
block_count,
4039
} in meta_file.entries
4140
{
@@ -45,15 +44,11 @@ fn main() -> Result<()> {
4544
);
4645
println!(" AMQF {amqf_entries} entries = {} KiB", amqf_size / 1024);
4746
println!(
48-
" {} KiB = {} kiB key compression dict + {} KiB value compression dict + \
49-
{block_count} blocks (avg {} bytes/block)",
47+
" {} KiB = {} kiB key compression dict + {block_count} blocks (avg {} \
48+
bytes/block)",
5049
sst_size / 1024,
5150
key_compression_dictionary_size / 1024,
52-
value_compression_dictionary_size / 1024,
53-
(sst_size
54-
- key_compression_dictionary_size as u64
55-
- value_compression_dictionary_size as u64)
56-
/ block_count as u64
51+
(sst_size - key_compression_dictionary_size as u64) / block_count as u64
5752
);
5853
}
5954
if !meta_file.obsolete_sst_files.is_empty() {

turbopack/crates/turbo-persistence/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ A meta file can contain metadata about multiple SST files. The metadata is store
4545
- foreach described SST file
4646
- 4 bytes sequence number of the SST file
4747
- 2 bytes key Compression Dictionary length
48-
- 2 bytes value Compression Dictionary length
4948
- 2 bytes block count
5049
- 8 bytes min hash
5150
- 8 bytes max hash
@@ -59,7 +58,6 @@ A meta file can contain metadata about multiple SST files. The metadata is store
5958
The SST file contains only data without any header.
6059

6160
- serialized key Compression Dictionary
62-
- serialized value Compression Dictionary
6361
- foreach block
6462
- 4 bytes uncompressed block length
6563
- compressed data

turbopack/crates/turbo-persistence/src/collector.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ impl<K: StoreKey, const SIZE_SHIFT: usize> Collector<K, SIZE_SHIFT> {
9292
self.entries.push(entry);
9393
}
9494

95-
/// Sorts the entries and returns them along with the total key and value sizes. This doesn't
95+
/// Sorts the entries and returns them along with the total key size. This doesn't
9696
/// clear the entries.
97-
pub fn sorted(&mut self) -> (&[CollectorEntry<K>], usize, usize) {
97+
pub fn sorted(&mut self) -> (&[CollectorEntry<K>], usize) {
9898
self.entries.sort_unstable_by(|a, b| a.key.cmp(&b.key));
99-
(&self.entries, self.total_key_size, self.total_value_size)
99+
(&self.entries, self.total_key_size)
100100
}
101101

102102
/// Clears the collector.

turbopack/crates/turbo-persistence/src/db.rs

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -897,8 +897,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
897897
amqf,
898898
key_compression_dictionary_length: entry
899899
.key_compression_dictionary_length(),
900-
value_compression_dictionary_length: entry
901-
.value_compression_dictionary_length(),
902900
block_count: entry.block_count(),
903901
size: entry.size(),
904902
entries: 0,
@@ -913,7 +911,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
913911
parallel_scheduler: &S,
914912
entries: &[LookupEntry<'l>],
915913
total_key_size: usize,
916-
total_value_size: usize,
917914
path: &Path,
918915
seq: u32,
919916
) -> Result<(u32, File, StaticSortedFileBuilderMeta<'static>)>
@@ -923,7 +920,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
923920
write_static_stored_file(
924921
entries,
925922
total_key_size,
926-
total_value_size,
927923
&path.join(format!("{seq:08}.sst")),
928924
)
929925
})?;
@@ -958,7 +954,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
958954
let mut current: Option<LookupEntry<'_>> = None;
959955
let mut entries = Vec::new();
960956
let mut last_entries = Vec::new();
961-
let mut last_entries_total_sizes = (0, 0);
957+
let mut last_entries_total_key_size = 0;
962958
for entry in iter {
963959
let entry = entry?;
964960

@@ -974,15 +970,10 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
974970
> DATA_THRESHOLD_PER_COMPACTED_FILE
975971
|| entries.len() >= MAX_ENTRIES_PER_COMPACTED_FILE
976972
{
977-
let (
978-
selected_total_key_size,
979-
selected_total_value_size,
980-
) = last_entries_total_sizes;
973+
let selected_total_key_size =
974+
last_entries_total_key_size;
981975
swap(&mut entries, &mut last_entries);
982-
last_entries_total_sizes = (
983-
total_key_size - key_size,
984-
total_value_size - value_size,
985-
);
976+
last_entries_total_key_size = total_key_size - key_size;
986977
total_key_size = key_size;
987978
total_value_size = value_size;
988979

@@ -996,7 +987,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
996987
&self.parallel_scheduler,
997988
&entries,
998989
selected_total_key_size,
999-
selected_total_value_size,
1000990
path,
1001991
seq,
1002992
)?);
@@ -1014,7 +1004,8 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10141004
}
10151005
if let Some(entry) = current {
10161006
total_key_size += entry.key.len();
1017-
total_value_size += entry.value.uncompressed_size_in_sst();
1007+
// Obsolete as we no longer need total_value_size
1008+
// total_value_size += entry.value.uncompressed_size_in_sst();
10181009
entries.push(entry);
10191010
}
10201011

@@ -1027,7 +1018,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10271018
&self.parallel_scheduler,
10281019
&entries,
10291020
total_key_size,
1030-
total_value_size,
10311021
path,
10321022
seq,
10331023
)?);
@@ -1038,8 +1028,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10381028
if !last_entries.is_empty() {
10391029
last_entries.append(&mut entries);
10401030

1041-
last_entries_total_sizes.0 += total_key_size;
1042-
last_entries_total_sizes.1 += total_value_size;
1031+
last_entries_total_key_size += total_key_size;
10431032

10441033
let (part1, part2) = last_entries.split_at(last_entries.len() / 2);
10451034

@@ -1051,8 +1040,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10511040
&self.parallel_scheduler,
10521041
part1,
10531042
// We don't know the exact sizes so we estimate them
1054-
last_entries_total_sizes.0 / 2,
1055-
last_entries_total_sizes.1 / 2,
1043+
last_entries_total_key_size / 2,
10561044
path,
10571045
seq1,
10581046
)?);
@@ -1061,8 +1049,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10611049
new_sst_files.push(create_sst_file(
10621050
&self.parallel_scheduler,
10631051
part2,
1064-
last_entries_total_sizes.0 / 2,
1065-
last_entries_total_sizes.1 / 2,
1052+
last_entries_total_key_size / 2,
10661053
path,
10671054
seq2,
10681055
)?);
@@ -1262,8 +1249,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
12621249
amqf_entries: amqf.len(),
12631250
key_compression_dictionary_size: entry
12641251
.key_compression_dictionary_length(),
1265-
value_compression_dictionary_size: entry
1266-
.value_compression_dictionary_length(),
12671252
block_count: entry.block_count(),
12681253
}
12691254
})
@@ -1301,6 +1286,5 @@ pub struct MetaFileEntryInfo {
13011286
pub amqf_entries: usize,
13021287
pub sst_size: u64,
13031288
pub key_compression_dictionary_size: u16,
1304-
pub value_compression_dictionary_size: u16,
13051289
pub block_count: u16,
13061290
}

turbopack/crates/turbo-persistence/src/lookup_entry.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ pub enum LazyLookupValue<'l> {
2222
Medium {
2323
uncompressed_size: u32,
2424
block: &'l [u8],
25-
dictionary: &'l [u8],
2625
},
2726
}
2827

@@ -79,11 +78,9 @@ impl Entry for LookupEntry<'_> {
7978
LazyLookupValue::Medium {
8079
uncompressed_size,
8180
block,
82-
dictionary,
8381
} => EntryValue::MediumCompressed {
8482
uncompressed_size: *uncompressed_size,
8583
block,
86-
dictionary,
8784
},
8885
}
8986
}

turbopack/crates/turbo-persistence/src/meta_file.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,6 @@ impl MetaEntry {
144144
self.sst_data.key_compression_dictionary_length
145145
}
146146

147-
pub fn value_compression_dictionary_length(&self) -> u16 {
148-
self.sst_data.value_compression_dictionary_length
149-
}
150-
151147
pub fn block_count(&self) -> u16 {
152148
self.sst_data.block_count
153149
}
@@ -222,7 +218,6 @@ impl MetaFile {
222218
sst_data: StaticSortedFileMetaData {
223219
sequence_number: file.read_u32::<BE>()?,
224220
key_compression_dictionary_length: file.read_u16::<BE>()?,
225-
value_compression_dictionary_length: file.read_u16::<BE>()?,
226221
block_count: file.read_u16::<BE>()?,
227222
},
228223
family,

turbopack/crates/turbo-persistence/src/meta_file_builder.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ impl<'a> MetaFileBuilder<'a> {
5858
for (sequence_number, sst) in &self.entries {
5959
file.write_u32::<BE>(*sequence_number)?;
6060
file.write_u16::<BE>(sst.key_compression_dictionary_length)?;
61-
file.write_u16::<BE>(sst.value_compression_dictionary_length)?;
6261
file.write_u16::<BE>(sst.block_count)?;
6362
file.write_u64::<BE>(sst.min_hash)?;
6463
file.write_u64::<BE>(sst.max_hash)?;

turbopack/crates/turbo-persistence/src/static_sorted_file.rs

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ pub struct StaticSortedFileMetaData {
6565
pub sequence_number: u32,
6666
/// The length of the key compression dictionary.
6767
pub key_compression_dictionary_length: u16,
68-
/// The length of the value compression dictionary.
69-
pub value_compression_dictionary_length: u16,
7068
/// The number of blocks in the SST file.
7169
pub block_count: u16,
7270
}
@@ -79,21 +77,14 @@ impl StaticSortedFileMetaData {
7977

8078
pub fn blocks_start(&self) -> usize {
8179
let k: usize = self.key_compression_dictionary_length.into();
82-
let v: usize = self.value_compression_dictionary_length.into();
83-
k + v
80+
k
8481
}
8582

8683
pub fn key_compression_dictionary_range(&self) -> Range<usize> {
8784
let start = 0;
8885
let end: usize = self.key_compression_dictionary_length.into();
8986
start..end
9087
}
91-
92-
pub fn value_compression_dictionary_range(&self) -> Range<usize> {
93-
let start = self.key_compression_dictionary_length as usize;
94-
let end = start + self.value_compression_dictionary_length as usize;
95-
start..end
96-
}
9788
}
9889

9990
/// A memory mapped SST file.
@@ -310,7 +301,7 @@ impl StaticSortedFile {
310301
match value_block_cache.get_value_or_guard(&(self.meta.sequence_number, block), None) {
311302
GuardResult::Value(block) => block,
312303
GuardResult::Guard(guard) => {
313-
let block = self.read_value_block(block)?;
304+
let block = self.read_small_value_block(block)?;
314305
let _ = guard.insert(block.clone());
315306
block
316307
}
@@ -323,33 +314,34 @@ impl StaticSortedFile {
323314
fn read_key_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
324315
self.read_block(
325316
block_index,
326-
&self.mmap[self.meta.key_compression_dictionary_range()],
317+
Some(&self.mmap[self.meta.key_compression_dictionary_range()]),
327318
false,
328319
)
329320
}
330321

322+
/// Reads a value block from the file.
323+
fn read_small_value_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
324+
self.read_block(block_index, None, false)
325+
}
326+
331327
/// Reads a value block from the file.
332328
fn read_value_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
333-
self.read_block(
334-
block_index,
335-
&self.mmap[self.meta.value_compression_dictionary_range()],
336-
false,
337-
)
329+
self.read_block(block_index, None, true)
338330
}
339331

340332
/// Reads a block from the file.
341333
fn read_block(
342334
&self,
343335
block_index: u16,
344-
compression_dictionary: &[u8],
336+
compression_dictionary: Option<&[u8]>,
345337
long_term: bool,
346338
) -> Result<ArcSlice<u8>> {
347339
let (uncompressed_length, block) = self.get_compressed_block(block_index)?;
348340

349341
let buffer = decompress_into_arc(
350342
uncompressed_length,
351343
block,
352-
Some(compression_dictionary),
344+
compression_dictionary,
353345
long_term,
354346
)?;
355347
Ok(ArcSlice::from(buffer))
@@ -496,8 +488,6 @@ impl<'l> StaticSortedFileIter<'l> {
496488
LazyLookupValue::Medium {
497489
uncompressed_size,
498490
block,
499-
dictionary: &self.this.mmap
500-
[self.this.meta.value_compression_dictionary_range()],
501491
}
502492
} else {
503493
let value = self

0 commit comments

Comments
 (0)