lib/deflate_compress: select min_size_to_compress based on level

The cutoff for outputting uncompressed data is currently < 16 bytes for all compression levels. That isn't ideal, since the higher the compression level, the more we should bother with very small inputs; and the lower the compression level, the less we should bother. Use a formula that produces the following cutoffs: Level Cutoff ----- ------ 0 56 1 52 2 48 3 44 4 40 5 36 6 32 7 28 8 24 9 20 10 16 11 12 12 8 Update #67
ebiggers · Oct 18, 2020 · d26000c · d26000c
1 parent a2d9266
commit d26000c
Showing 1 changed file with 17 additions and 8 deletions.
diff --git a/lib/deflate_compress.c b/lib/deflate_compress.c
@@ -366,6 +366,9 @@ struct libdeflate_compressor {
 	/* The compression level with which this compressor was created.  */
 	unsigned compression_level;
 
+	/* Anything smaller than this we won't bother trying to compress.  */
+	unsigned min_size_to_compress;
+
 	/* Temporary space for Huffman code output  */
 	u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS];
 	u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS];
@@ -2688,6 +2691,9 @@ libdeflate_alloc_compressor(int compression_level)
 	struct libdeflate_compressor *c;
 	size_t size = offsetof(struct libdeflate_compressor, p);
 
+	if (compression_level < 0 || compression_level > 12)
+		return NULL;
+
 #if SUPPORT_NEAR_OPTIMAL_PARSING
 	if (compression_level >= 8)
 		size += sizeof(c->p.n);
@@ -2702,6 +2708,14 @@ libdeflate_alloc_compressor(int compression_level)
 	if (!c)
 		return NULL;
 
+	c->compression_level = compression_level;
+
+	/*
+	 * The higher the compression level, the more we should bother trying to
+	 * compress very small inputs.
+	 */
+	c->min_size_to_compress = 56 - (compression_level * 4);
+
 	switch (compression_level) {
 	case 0:
 		c->impl = deflate_compress_none;
@@ -2766,7 +2780,7 @@ libdeflate_alloc_compressor(int compression_level)
 		c->nice_match_length = 80;
 		c->p.n.num_optim_passes = 3;
 		break;
-	case 12:
+	default:
 		c->impl = deflate_compress_near_optimal;
 		c->max_search_depth = 100;
 		c->nice_match_length = 133;
@@ -2778,19 +2792,14 @@ libdeflate_alloc_compressor(int compression_level)
 		c->max_search_depth = 150;
 		c->nice_match_length = 200;
 		break;
-	case 9:
+	default:
 		c->impl = deflate_compress_lazy;
 		c->max_search_depth = 200;
 		c->nice_match_length = DEFLATE_MAX_MATCH_LEN;
 		break;
 #endif
-	default:
-		libdeflate_aligned_free(c);
-		return NULL;
 	}
 
-	c->compression_level = compression_level;
-
 	deflate_init_offset_slot_fast(c);
 	deflate_init_static_codes(c);
 
@@ -2806,7 +2815,7 @@ libdeflate_deflate_compress(struct libdeflate_compressor *c,
 		return 0;
 
 	/* For extremely small inputs just use a single uncompressed block. */
-	if (unlikely(in_nbytes < 16)) {
+	if (unlikely(in_nbytes < c->min_size_to_compress)) {
 		struct deflate_output_bitstream os;
 		deflate_init_output(&os, out, out_nbytes_avail);
 		if (in_nbytes == 0)