Skip to content

Commit f246833

Browse files
authored
Turbopack: remove value compression dictionary (#82338)
### What? remove value compression dictionary It doesn't have benefit as each block is already large as we merge small values together and medium size values are already large. This also gives the benefit that we do not need to recompress medium value blocks when doing compaction
1 parent 1400faa commit f246833

File tree

10 files changed

+73
-163
lines changed

10 files changed

+73
-163
lines changed

turbopack/crates/turbo-persistence-tools/src/main.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ fn main() -> Result<()> {
3535
amqf_entries,
3636
sst_size,
3737
key_compression_dictionary_size,
38-
value_compression_dictionary_size,
3938
block_count,
4039
} in meta_file.entries
4140
{
@@ -45,15 +44,11 @@ fn main() -> Result<()> {
4544
);
4645
println!(" AMQF {amqf_entries} entries = {} KiB", amqf_size / 1024);
4746
println!(
48-
" {} KiB = {} kiB key compression dict + {} KiB value compression dict + \
49-
{block_count} blocks (avg {} bytes/block)",
47+
" {} KiB = {} kiB key compression dict + {block_count} blocks (avg {} \
48+
bytes/block)",
5049
sst_size / 1024,
5150
key_compression_dictionary_size / 1024,
52-
value_compression_dictionary_size / 1024,
53-
(sst_size
54-
- key_compression_dictionary_size as u64
55-
- value_compression_dictionary_size as u64)
56-
/ block_count as u64
51+
(sst_size - key_compression_dictionary_size as u64) / block_count as u64
5752
);
5853
}
5954
if !meta_file.obsolete_sst_files.is_empty() {

turbopack/crates/turbo-persistence/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ A meta file can contain metadata about multiple SST files. The metadata is store
4545
- foreach described SST file
4646
- 4 bytes sequence number of the SST file
4747
- 2 bytes key Compression Dictionary length
48-
- 2 bytes value Compression Dictionary length
4948
- 2 bytes block count
5049
- 8 bytes min hash
5150
- 8 bytes max hash
@@ -59,7 +58,6 @@ A meta file can contain metadata about multiple SST files. The metadata is store
5958
The SST file contains only data without any header.
6059

6160
- serialized key Compression Dictionary
62-
- serialized value Compression Dictionary
6361
- foreach block
6462
- 4 bytes uncompressed block length
6563
- compressed data

turbopack/crates/turbo-persistence/src/collector.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ impl<K: StoreKey, const SIZE_SHIFT: usize> Collector<K, SIZE_SHIFT> {
9292
self.entries.push(entry);
9393
}
9494

95-
/// Sorts the entries and returns them along with the total key and value sizes. This doesn't
95+
/// Sorts the entries and returns them along with the total key size. This doesn't
9696
/// clear the entries.
97-
pub fn sorted(&mut self) -> (&[CollectorEntry<K>], usize, usize) {
97+
pub fn sorted(&mut self) -> (&[CollectorEntry<K>], usize) {
9898
self.entries.sort_unstable_by(|a, b| a.key.cmp(&b.key));
99-
(&self.entries, self.total_key_size, self.total_value_size)
99+
(&self.entries, self.total_key_size)
100100
}
101101

102102
/// Clears the collector.

turbopack/crates/turbo-persistence/src/db.rs

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -898,8 +898,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
898898
amqf,
899899
key_compression_dictionary_length: entry
900900
.key_compression_dictionary_length(),
901-
value_compression_dictionary_length: entry
902-
.value_compression_dictionary_length(),
903901
block_count: entry.block_count(),
904902
size: entry.size(),
905903
entries: 0,
@@ -914,7 +912,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
914912
parallel_scheduler: &S,
915913
entries: &[LookupEntry<'l>],
916914
total_key_size: usize,
917-
total_value_size: usize,
918915
path: &Path,
919916
seq: u32,
920917
) -> Result<(u32, File, StaticSortedFileBuilderMeta<'static>)>
@@ -924,7 +921,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
924921
write_static_stored_file(
925922
entries,
926923
total_key_size,
927-
total_value_size,
928924
&path.join(format!("{seq:08}.sst")),
929925
)
930926
})?;
@@ -959,7 +955,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
959955
let mut current: Option<LookupEntry<'_>> = None;
960956
let mut entries = Vec::new();
961957
let mut last_entries = Vec::new();
962-
let mut last_entries_total_sizes = (0, 0);
958+
let mut last_entries_total_key_size = 0;
963959
for entry in iter {
964960
let entry = entry?;
965961

@@ -975,15 +971,10 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
975971
> DATA_THRESHOLD_PER_COMPACTED_FILE
976972
|| entries.len() >= MAX_ENTRIES_PER_COMPACTED_FILE
977973
{
978-
let (
979-
selected_total_key_size,
980-
selected_total_value_size,
981-
) = last_entries_total_sizes;
974+
let selected_total_key_size =
975+
last_entries_total_key_size;
982976
swap(&mut entries, &mut last_entries);
983-
last_entries_total_sizes = (
984-
total_key_size - key_size,
985-
total_value_size - value_size,
986-
);
977+
last_entries_total_key_size = total_key_size - key_size;
987978
total_key_size = key_size;
988979
total_value_size = value_size;
989980

@@ -997,7 +988,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
997988
&self.parallel_scheduler,
998989
&entries,
999990
selected_total_key_size,
1000-
selected_total_value_size,
1001991
path,
1002992
seq,
1003993
)?);
@@ -1015,7 +1005,8 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10151005
}
10161006
if let Some(entry) = current {
10171007
total_key_size += entry.key.len();
1018-
total_value_size += entry.value.uncompressed_size_in_sst();
1008+
// Obsolete as we no longer need total_value_size
1009+
// total_value_size += entry.value.uncompressed_size_in_sst();
10191010
entries.push(entry);
10201011
}
10211012

@@ -1028,7 +1019,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10281019
&self.parallel_scheduler,
10291020
&entries,
10301021
total_key_size,
1031-
total_value_size,
10321022
path,
10331023
seq,
10341024
)?);
@@ -1039,8 +1029,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10391029
if !last_entries.is_empty() {
10401030
last_entries.append(&mut entries);
10411031

1042-
last_entries_total_sizes.0 += total_key_size;
1043-
last_entries_total_sizes.1 += total_value_size;
1032+
last_entries_total_key_size += total_key_size;
10441033

10451034
let (part1, part2) = last_entries.split_at(last_entries.len() / 2);
10461035

@@ -1052,8 +1041,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10521041
&self.parallel_scheduler,
10531042
part1,
10541043
// We don't know the exact sizes so we estimate them
1055-
last_entries_total_sizes.0 / 2,
1056-
last_entries_total_sizes.1 / 2,
1044+
last_entries_total_key_size / 2,
10571045
path,
10581046
seq1,
10591047
)?);
@@ -1062,8 +1050,7 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
10621050
new_sst_files.push(create_sst_file(
10631051
&self.parallel_scheduler,
10641052
part2,
1065-
last_entries_total_sizes.0 / 2,
1066-
last_entries_total_sizes.1 / 2,
1053+
last_entries_total_key_size / 2,
10671054
path,
10681055
seq2,
10691056
)?);
@@ -1263,8 +1250,6 @@ impl<S: ParallelScheduler> TurboPersistence<S> {
12631250
amqf_entries: amqf.len(),
12641251
key_compression_dictionary_size: entry
12651252
.key_compression_dictionary_length(),
1266-
value_compression_dictionary_size: entry
1267-
.value_compression_dictionary_length(),
12681253
block_count: entry.block_count(),
12691254
}
12701255
})
@@ -1302,6 +1287,5 @@ pub struct MetaFileEntryInfo {
13021287
pub amqf_entries: usize,
13031288
pub sst_size: u64,
13041289
pub key_compression_dictionary_size: u16,
1305-
pub value_compression_dictionary_size: u16,
13061290
pub block_count: u16,
13071291
}

turbopack/crates/turbo-persistence/src/lookup_entry.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ pub enum LazyLookupValue<'l> {
2222
Medium {
2323
uncompressed_size: u32,
2424
block: &'l [u8],
25-
dictionary: &'l [u8],
2625
},
2726
}
2827

@@ -79,11 +78,9 @@ impl Entry for LookupEntry<'_> {
7978
LazyLookupValue::Medium {
8079
uncompressed_size,
8180
block,
82-
dictionary,
8381
} => EntryValue::MediumCompressed {
8482
uncompressed_size: *uncompressed_size,
8583
block,
86-
dictionary,
8784
},
8885
}
8986
}

turbopack/crates/turbo-persistence/src/meta_file.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,6 @@ impl MetaEntry {
144144
self.sst_data.key_compression_dictionary_length
145145
}
146146

147-
pub fn value_compression_dictionary_length(&self) -> u16 {
148-
self.sst_data.value_compression_dictionary_length
149-
}
150-
151147
pub fn block_count(&self) -> u16 {
152148
self.sst_data.block_count
153149
}
@@ -222,7 +218,6 @@ impl MetaFile {
222218
sst_data: StaticSortedFileMetaData {
223219
sequence_number: file.read_u32::<BE>()?,
224220
key_compression_dictionary_length: file.read_u16::<BE>()?,
225-
value_compression_dictionary_length: file.read_u16::<BE>()?,
226221
block_count: file.read_u16::<BE>()?,
227222
},
228223
family,

turbopack/crates/turbo-persistence/src/meta_file_builder.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ impl<'a> MetaFileBuilder<'a> {
5858
for (sequence_number, sst) in &self.entries {
5959
file.write_u32::<BE>(*sequence_number)?;
6060
file.write_u16::<BE>(sst.key_compression_dictionary_length)?;
61-
file.write_u16::<BE>(sst.value_compression_dictionary_length)?;
6261
file.write_u16::<BE>(sst.block_count)?;
6362
file.write_u64::<BE>(sst.min_hash)?;
6463
file.write_u64::<BE>(sst.max_hash)?;

turbopack/crates/turbo-persistence/src/static_sorted_file.rs

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ pub struct StaticSortedFileMetaData {
6565
pub sequence_number: u32,
6666
/// The length of the key compression dictionary.
6767
pub key_compression_dictionary_length: u16,
68-
/// The length of the value compression dictionary.
69-
pub value_compression_dictionary_length: u16,
7068
/// The number of blocks in the SST file.
7169
pub block_count: u16,
7270
}
@@ -79,21 +77,14 @@ impl StaticSortedFileMetaData {
7977

8078
pub fn blocks_start(&self) -> usize {
8179
let k: usize = self.key_compression_dictionary_length.into();
82-
let v: usize = self.value_compression_dictionary_length.into();
83-
k + v
80+
k
8481
}
8582

8683
pub fn key_compression_dictionary_range(&self) -> Range<usize> {
8784
let start = 0;
8885
let end: usize = self.key_compression_dictionary_length.into();
8986
start..end
9087
}
91-
92-
pub fn value_compression_dictionary_range(&self) -> Range<usize> {
93-
let start = self.key_compression_dictionary_length as usize;
94-
let end = start + self.value_compression_dictionary_length as usize;
95-
start..end
96-
}
9788
}
9889

9990
/// A memory mapped SST file.
@@ -310,7 +301,7 @@ impl StaticSortedFile {
310301
match value_block_cache.get_value_or_guard(&(self.meta.sequence_number, block), None) {
311302
GuardResult::Value(block) => block,
312303
GuardResult::Guard(guard) => {
313-
let block = self.read_value_block(block)?;
304+
let block = self.read_small_value_block(block)?;
314305
let _ = guard.insert(block.clone());
315306
block
316307
}
@@ -323,33 +314,34 @@ impl StaticSortedFile {
323314
fn read_key_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
324315
self.read_block(
325316
block_index,
326-
&self.mmap[self.meta.key_compression_dictionary_range()],
317+
Some(&self.mmap[self.meta.key_compression_dictionary_range()]),
327318
false,
328319
)
329320
}
330321

322+
/// Reads a value block from the file.
323+
fn read_small_value_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
324+
self.read_block(block_index, None, false)
325+
}
326+
331327
/// Reads a value block from the file.
332328
fn read_value_block(&self, block_index: u16) -> Result<ArcSlice<u8>> {
333-
self.read_block(
334-
block_index,
335-
&self.mmap[self.meta.value_compression_dictionary_range()],
336-
false,
337-
)
329+
self.read_block(block_index, None, true)
338330
}
339331

340332
/// Reads a block from the file.
341333
fn read_block(
342334
&self,
343335
block_index: u16,
344-
compression_dictionary: &[u8],
336+
compression_dictionary: Option<&[u8]>,
345337
long_term: bool,
346338
) -> Result<ArcSlice<u8>> {
347339
let (uncompressed_length, block) = self.get_compressed_block(block_index)?;
348340

349341
let buffer = decompress_into_arc(
350342
uncompressed_length,
351343
block,
352-
Some(compression_dictionary),
344+
compression_dictionary,
353345
long_term,
354346
)?;
355347
Ok(ArcSlice::from(buffer))
@@ -496,8 +488,6 @@ impl<'l> StaticSortedFileIter<'l> {
496488
LazyLookupValue::Medium {
497489
uncompressed_size,
498490
block,
499-
dictionary: &self.this.mmap
500-
[self.this.meta.value_compression_dictionary_range()],
501491
}
502492
} else {
503493
let value = self

0 commit comments

Comments
 (0)