Skip to content

Commit f45d2de

Browse files
committed
Turbopack: remove value compression dictionary
It doesn't have benefit as each block is already large as we merge small values together and medium size values are already large. This also gives the benefit that we do not need to recompress medium value blocks when doing compaction
1 parent 818e070 commit f45d2de

File tree

10 files changed

+71
-160
lines changed

10 files changed

+71
-160
lines changed

turbopack/crates/turbo-persistence-tools/src/main.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ fn main() -> Result<()> {
3535
amqf_entries,
3636
sst_size,
3737
key_compression_dictionary_size,
38-
value_compression_dictionary_size,
3938
block_count,
4039
} in meta_file.entries
4140
{
@@ -45,15 +44,11 @@ fn main() -> Result<()> {
4544
);
4645
println!(" AMQF {amqf_entries} entries = {} KiB", amqf_size / 1024);
4746
println!(
48-
" {} KiB = {} kiB key compression dict + {} KiB value compression dict + \
49-
{block_count} blocks (avg {} bytes/block)",
47+
" {} KiB = {} kiB key compression dict + {block_count} blocks (avg {} \
48+
bytes/block)",
5049
sst_size / 1024,
5150
key_compression_dictionary_size / 1024,
52-
value_compression_dictionary_size / 1024,
53-
(sst_size
54-
- key_compression_dictionary_size as u64
55-
- value_compression_dictionary_size as u64)
56-
/ block_count as u64
51+
(sst_size - key_compression_dictionary_size as u64) / block_count as u64
5752
);
5853
}
5954
if !meta_file.obsolete_sst_files.is_empty() {

turbopack/crates/turbo-persistence/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ A meta file can contain metadata about multiple SST files. The metadata is store
4545
- foreach described SST file
4646
- 4 bytes sequence number of the SST file
4747
- 2 bytes key Compression Dictionary length
48-
- 2 bytes value Compression Dictionary length
4948
- 2 bytes block count
5049
- 8 bytes min hash
5150
- 8 bytes max hash
@@ -59,7 +58,6 @@ A meta file can contain metadata about multiple SST files. The metadata is store
5958
The SST file contains only data without any header.
6059

6160
- serialized key Compression Dictionary
62-
- serialized value Compression Dictionary
6361
- foreach block
6462
- 4 bytes uncompressed block length
6563
- compressed data

turbopack/crates/turbo-persistence/src/collector.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ impl<K: StoreKey, const SIZE_SHIFT: usize> Collector<K, SIZE_SHIFT> {
9292
self.entries.push(entry);
9393
}
9494

95-
/// Sorts the entries and returns them along with the total key and value sizes. This doesn't
95+
/// Sorts the entries and returns them along with the total key size. This doesn't
9696
/// clear the entries.
97-
pub fn sorted(&mut self) -> (&[CollectorEntry<K>], usize, usize) {
97+
pub fn sorted(&mut self) -> (&[CollectorEntry<K>], usize) {
9898
self.entries.sort_unstable_by(|a, b| a.key.cmp(&b.key));
99-
(&self.entries, self.total_key_size, self.total_value_size)
99+
(&self.entries, self.total_key_size)
100100
}
101101

102102
/// Clears the collector.

turbopack/crates/turbo-persistence/src/db.rs

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -902,8 +902,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
902902
amqf,
903903
key_compression_dictionary_length: entry
904904
.key_compression_dictionary_length(),
905-
value_compression_dictionary_length: entry
906-
.value_compression_dictionary_length(),
907905
block_count: entry.block_count(),
908906
size: entry.size(),
909907
entries: 0,
@@ -918,7 +916,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
918916
parallel_scheduler: &S,
919917
entries: &[LookupEntry<'l>],
920918
total_key_size: usize,
921-
total_value_size: usize,
922919
path: &Path,
923920
seq: u32,
924921
) -> Result<(u32, File, StaticSortedFileBuilderMeta<'static>)>
@@ -929,7 +926,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
929926
write_static_stored_file(
930927
entries,
931928
total_key_size,
932-
total_value_size,
933929
&path.join(format!("{seq:08}.sst")),
934930
)
935931
})?;
@@ -964,7 +960,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
964960
let mut current: Option<LookupEntry<'_>> = None;
965961
let mut entries = Vec::new();
966962
let mut last_entries = Vec::new();
967-
let mut last_entries_total_sizes = (0, 0);
963+
let mut last_entries_total_key_size = 0;
968964
for entry in iter {
969965
let entry = entry?;
970966

@@ -981,15 +977,11 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
981977
> DATA_THRESHOLD_PER_COMPACTED_FILE
982978
|| entries.len() >= MAX_ENTRIES_PER_COMPACTED_FILE
983979
{
984-
let (
985-
selected_total_key_size,
986-
selected_total_value_size,
987-
) = last_entries_total_sizes;
980+
let selected_total_key_size =
981+
last_entries_total_key_size;
988982
swap(&mut entries, &mut last_entries);
989-
last_entries_total_sizes = (
990-
total_key_size - key_size,
991-
total_value_size - value_size,
992-
);
983+
last_entries_total_key_size =
984+
total_key_size - key_size;
993985
total_key_size = key_size;
994986
total_value_size = value_size;
995987

@@ -1003,7 +995,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
1003995
&self.parallel_scheduler,
1004996
&entries,
1005997
selected_total_key_size,
1006-
selected_total_value_size,
1007998
path,
1008999
seq,
10091000
)?);
@@ -1021,7 +1012,8 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10211012
}
10221013
if let Some(entry) = current {
10231014
total_key_size += entry.key.len();
1024-
total_value_size += entry.value.uncompressed_size_in_sst();
1015+
// Obsolete as we no longer need total_value_size
1016+
// total_value_size += entry.value.uncompressed_size_in_sst();
10251017
entries.push(entry);
10261018
}
10271019

@@ -1034,7 +1026,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10341026
&self.parallel_scheduler,
10351027
&entries,
10361028
total_key_size,
1037-
total_value_size,
10381029
path,
10391030
seq,
10401031
)?);
@@ -1045,8 +1036,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10451036
if !last_entries.is_empty() {
10461037
last_entries.append(&mut entries);
10471038

1048-
last_entries_total_sizes.0 += total_key_size;
1049-
last_entries_total_sizes.1 += total_value_size;
1039+
last_entries_total_key_size += total_key_size;
10501040

10511041
let (part1, part2) =
10521042
last_entries.split_at(last_entries.len() / 2);
@@ -1059,8 +1049,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10591049
&self.parallel_scheduler,
10601050
part1,
10611051
// We don't know the exact sizes so we estimate them
1062-
last_entries_total_sizes.0 / 2,
1063-
last_entries_total_sizes.1 / 2,
1052+
last_entries_total_key_size / 2,
10641053
path,
10651054
seq1,
10661055
)?);
@@ -1069,8 +1058,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10691058
new_sst_files.push(create_sst_file(
10701059
&self.parallel_scheduler,
10711060
part2,
1072-
last_entries_total_sizes.0 / 2,
1073-
last_entries_total_sizes.1 / 2,
1061+
last_entries_total_key_size / 2,
10741062
path,
10751063
seq2,
10761064
)?);
@@ -1271,8 +1259,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
12711259
amqf_entries: amqf.len(),
12721260
key_compression_dictionary_size: entry
12731261
.key_compression_dictionary_length(),
1274-
value_compression_dictionary_size: entry
1275-
.value_compression_dictionary_length(),
12761262
block_count: entry.block_count(),
12771263
}
12781264
})
@@ -1310,6 +1296,5 @@ pub struct MetaFileEntryInfo {
13101296
pub amqf_entries: usize,
13111297
pub sst_size: u64,
13121298
pub key_compression_dictionary_size: u16,
1313-
pub value_compression_dictionary_size: u16,
13141299
pub block_count: u16,
13151300
}

turbopack/crates/turbo-persistence/src/lookup_entry.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ pub enum LazyLookupValue<'l> {
2222
Medium {
2323
uncompressed_size: u32,
2424
block: &'l [u8],
25-
dictionary: &'l [u8],
2625
},
2726
}
2827

@@ -79,11 +78,9 @@ impl Entry for LookupEntry<'_> {
7978
LazyLookupValue::Medium {
8079
uncompressed_size,
8180
block,
82-
dictionary,
8381
} => EntryValue::MediumCompressed {
8482
uncompressed_size: *uncompressed_size,
8583
block,
86-
dictionary,
8784
},
8885
}
8986
}

turbopack/crates/turbo-persistence/src/meta_file.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,6 @@ impl MetaEntry {
144144
self.sst_data.key_compression_dictionary_length
145145
}
146146

147-
pub fn value_compression_dictionary_length(&self) -> u16 {
148-
self.sst_data.value_compression_dictionary_length
149-
}
150-
151147
pub fn block_count(&self) -> u16 {
152148
self.sst_data.block_count
153149
}
@@ -222,7 +218,6 @@ impl MetaFile {
222218
sst_data: StaticSortedFileMetaData {
223219
sequence_number: file.read_u32::<BE>()?,
224220
key_compression_dictionary_length: file.read_u16::<BE>()?,
225-
value_compression_dictionary_length: file.read_u16::<BE>()?,
226221
block_count: file.read_u16::<BE>()?,
227222
},
228223
family,

turbopack/crates/turbo-persistence/src/meta_file_builder.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ impl<'a> MetaFileBuilder<'a> {
5858
for (sequence_number, sst) in &self.entries {
5959
file.write_u32::<BE>(*sequence_number)?;
6060
file.write_u16::<BE>(sst.key_compression_dictionary_length)?;
61-
file.write_u16::<BE>(sst.value_compression_dictionary_length)?;
6261
file.write_u16::<BE>(sst.block_count)?;
6362
file.write_u64::<BE>(sst.min_hash)?;
6463
file.write_u64::<BE>(sst.max_hash)?;

turbopack/crates/turbo-persistence/src/static_sorted_file.rs

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ pub struct StaticSortedFileMetaData {
6565
pub sequence_number: u32,
6666
/// The length of the key compression dictionary.
6767
pub key_compression_dictionary_length: u16,
68-
/// The length of the value compression dictionary.
69-
pub value_compression_dictionary_length: u16,
7068
/// The number of blocks in the SST file.
7169
pub block_count: u16,
7270
}
@@ -79,21 +77,14 @@ impl StaticSortedFileMetaData {
7977

8078
pub fn blocks_start(&self) -> usize {
8179
let k: usize = self.key_compression_dictionary_length.into();
82-
let v: usize = self.value_compression_dictionary_length.into();
83-
k + v
80+
k
8481
}
8582

8683
pub fn key_compression_dictionary_range(&self) -> Range<usize> {
8784
let start = 0;
8885
let end: usize = self.key_compression_dictionary_length.into();
8986
start..end
9087
}
91-
92-
pub fn value_compression_dictionary_range(&self) -> Range<usize> {
93-
let start = self.key_compression_dictionary_length as usize;
94-
let end = start + self.value_compression_dictionary_length as usize;
95-
start..end
96-
}
9788
}
9889

9990
/// A memory mapped SST file.
@@ -310,7 +301,7 @@ impl StaticSortedFile {
310301
match value_block_cache.get_value_or_guard(&(self.meta.sequence_number, block), None) {
311302
GuardResult::Value(block) => block,
312303
GuardResult::Guard(guard) => {
313-
let block = self.read_value_block(block)?;
304+
let block = self.read_small_value_block(block)?;
314305
let _ = guard.insert(block.clone());
315306
block
316307
}
@@ -323,33 +314,34 @@ impl StaticSortedFile {
323314
fn read_key_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
324315
self.read_block(
325316
block_index,
326-
&self.mmap[self.meta.key_compression_dictionary_range()],
317+
Some(&self.mmap[self.meta.key_compression_dictionary_range()]),
327318
false,
328319
)
329320
}
330321

322+
/// Reads a value block from the file.
323+
fn read_small_value_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
324+
self.read_block(block_index, None, false)
325+
}
326+
331327
/// Reads a value block from the file.
332328
fn read_value_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
333-
self.read_block(
334-
block_index,
335-
&self.mmap[self.meta.value_compression_dictionary_range()],
336-
false,
337-
)
329+
self.read_block(block_index, None, true)
338330
}
339331

340332
/// Reads a block from the file.
341333
fn read_block(
342334
&self,
343335
block_index: u16,
344-
compression_dictionary: &[u8],
336+
compression_dictionary: Option<&[u8]>,
345337
long_term: bool,
346338
) -> Result<ArcSlice<u8>> {
347339
let (uncompressed_length, block) = self.get_compressed_block(block_index)?;
348340

349341
let buffer = decompress_into_arc(
350342
uncompressed_length,
351343
block,
352-
Some(compression_dictionary),
344+
compression_dictionary,
353345
long_term,
354346
)?;
355347
Ok(ArcSlice::from(buffer))
@@ -496,8 +488,6 @@ impl<'l> StaticSortedFileIter<'l> {
496488
LazyLookupValue::Medium {
497489
uncompressed_size,
498490
block,
499-
dictionary: &self.this.mmap
500-
[self.this.meta.value_compression_dictionary_range()],
501491
}
502492
} else {
503493
let value = self

0 commit comments

Comments
 (0)