Skip to content

Commit 9523751

Browse files
committed
fixup compute_dictionary
1 parent f7eb27d commit 9523751

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

turbopack/crates/turbo-persistence/src/static_sorted_file_builder.rs

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ fn compute_dictionary<E>(
183183
return Ok(Vec::new());
184184
}
185185

186+
let max_sample_size = max(
187+
MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY,
188+
key_compression_samples_size / 1024,
189+
);
190+
186191
let mut sample_sizes = Vec::new();
187192

188193
for entry in entries {
@@ -192,7 +197,8 @@ fn compute_dictionary<E>(
192197
}
193198
let len = get_entry_size(entry);
194199
if len >= MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY {
195-
let optimal_len = max(MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY, len / 8);
200+
let optimal_len =
201+
(len / 8).clamp(MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY, max_sample_size);
196202
let used_len = min(key_remaining, optimal_len);
197203
if len <= used_len {
198204
sample_sizes.push(len);
@@ -204,12 +210,14 @@ fn compute_dictionary<E>(
204210
}
205211
}
206212
}
207-
debug_assert!(buffer.len() == sample_sizes.iter().sum::<usize>());
208-
let result = if buffer.len() > MIN_COMPRESSION_SAMPLES_SIZE && sample_sizes.len() > 5 {
209-
zstd::dict::from_continuous(buffer, &sample_sizes, dictionary_size)?
210-
} else {
211-
Vec::new()
212-
};
213+
/// The zlib dict builder requires at least 7 samples
214+
const MIN_SAMPLE_SIZE: usize = 7;
215+
let result =
216+
if buffer.len() > MIN_COMPRESSION_SAMPLES_SIZE && sample_sizes.len() > MIN_SAMPLE_SIZE {
217+
zstd::dict::from_continuous(buffer, &sample_sizes, dictionary_size)?
218+
} else {
219+
Vec::new()
220+
};
213221
buffer.clear();
214222
Ok(result)
215223
}
@@ -221,7 +229,7 @@ fn compute_key_compression_dictionary<E: Entry>(
221229
total_key_size: usize,
222230
buffer: &mut Vec<u8>,
223231
) -> Result<Vec<u8>> {
224-
compute_dictionary(
232+
Ok(compute_dictionary(
225233
entries,
226234
total_key_size,
227235
KEY_COMPRESSION_SAMPLES_SIZE,
@@ -242,6 +250,7 @@ fn compute_key_compression_dictionary<E: Entry>(
242250
buffer,
243251
)
244252
.context("Key dictionary creation failed")
253+
.unwrap())
245254
}
246255

247256
/// Computes compression dictionaries from values of all entries

0 commit comments

Comments
 (0)