@@ -183,6 +183,11 @@ fn compute_dictionary<E>(
183183 return Ok ( Vec :: new ( ) ) ;
184184 }
185185
186+ let max_sample_size = max (
187+ MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY ,
188+ key_compression_samples_size / 1024 ,
189+ ) ;
190+
186191 let mut sample_sizes = Vec :: new ( ) ;
187192
188193 for entry in entries {
@@ -192,7 +197,8 @@ fn compute_dictionary<E>(
192197 }
193198 let len = get_entry_size ( entry) ;
194199 if len >= MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY {
195- let optimal_len = max ( MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY , len / 8 ) ;
200+ let optimal_len =
201+ ( len / 8 ) . clamp ( MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY , max_sample_size) ;
196202 let used_len = min ( key_remaining, optimal_len) ;
197203 if len <= used_len {
198204 sample_sizes. push ( len) ;
@@ -204,12 +210,14 @@ fn compute_dictionary<E>(
204210 }
205211 }
206212 }
207- debug_assert ! ( buffer. len( ) == sample_sizes. iter( ) . sum:: <usize >( ) ) ;
208- let result = if buffer. len ( ) > MIN_COMPRESSION_SAMPLES_SIZE && sample_sizes. len ( ) > 5 {
209- zstd:: dict:: from_continuous ( buffer, & sample_sizes, dictionary_size) ?
210- } else {
211- Vec :: new ( )
212- } ;
213+ /// The zlib dict builder requires at least 7 samples
214+ const MIN_SAMPLE_SIZE : usize = 7 ;
215+ let result =
216+ if buffer. len ( ) > MIN_COMPRESSION_SAMPLES_SIZE && sample_sizes. len ( ) > MIN_SAMPLE_SIZE {
217+ zstd:: dict:: from_continuous ( buffer, & sample_sizes, dictionary_size) ?
218+ } else {
219+ Vec :: new ( )
220+ } ;
213221 buffer. clear ( ) ;
214222 Ok ( result)
215223}
@@ -221,7 +229,7 @@ fn compute_key_compression_dictionary<E: Entry>(
221229 total_key_size : usize ,
222230 buffer : & mut Vec < u8 > ,
223231) -> Result < Vec < u8 > > {
224- compute_dictionary (
232+ Ok ( compute_dictionary (
225233 entries,
226234 total_key_size,
227235 KEY_COMPRESSION_SAMPLES_SIZE ,
@@ -242,6 +250,7 @@ fn compute_key_compression_dictionary<E: Entry>(
242250 buffer,
243251 )
244252 . context ( "Key dictionary creation failed" )
253+ . unwrap ( ) )
245254}
246255
247256/// Computes compression dictionaries from values of all entries
0 commit comments