From 194a8db5c664b20e5ad6f16defa22b9aae039162 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Sun, 19 Mar 2023 10:02:39 +0100 Subject: [PATCH] zstd: Improve best encoder by extending backwards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SpeedBestCompression encoder now extends matches backwards before estimating their encoded size, rather than doing this after selecting the best match. This is a bit slower, but produces smaller output. Benchmarks on amd64: name old speed new speed delta Encoder_EncodeAllSimple/best-8 20.7MB/s ± 3% 19.0MB/s ± 1% -8.04% (p=0.000 n=19+18) Encoder_EncodeAllSimple4K/best-8 19.2MB/s ± 6% 17.9MB/s ± 1% -6.86% (p=0.000 n=20+20) Output sizes on Silesia and enwik9: dickens 3220994 3179697 (× 0.987179) enwik9 259846164 257481474 (× 0.990900) mozilla 16912437 16895142 (× 0.998977) mr 3502823 3473770 (× 0.991706) nci 2306320 2300580 (× 0.997511) ooffice 2896907 2888715 (× 0.997172) osdb 3390548 3368411 (× 0.993471) reymont 1657380 1639490 (× 0.989206) samba 4329898 4315020 (× 0.996564) sao 5416648 5383855 (× 0.993946) webster 9972808 9887560 (× 0.991452) xml 542277 541018 (× 0.997678) x-ray 5733121 5681186 (× 0.990941) total 319728325 317035918 (× 0.991579) Wall clock time for compressing enwik9 goes up a bit, but is still close to what is was before #776. --- zstd/enc_best.go | 53 ++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/zstd/enc_best.go b/zstd/enc_best.go index 07f657d36e..30378104c9 100644 --- a/zstd/enc_best.go +++ b/zstd/enc_best.go @@ -205,7 +205,22 @@ encodeLoop: panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) } } - cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + + l := 4 + e.matchlen(s+4, offset+4, src) + if rep < 0 { + // Extend candidate match backwards as far as possible. + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength { + s-- + offset-- + l++ + } + } + + cand := match{offset: offset, s: s, length: l, rep: rep} cand.estBits(bitsPerByte) if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { *m = cand @@ -295,25 +310,10 @@ encodeLoop: s = best.s var seq seq seq.matchLen = uint32(best.length - zstdMinMatch) - - // We might be able to match backwards. - // Extend as long as we can. - start := best.s - // We end the search early, so we don't risk 0 literals - // and have to do special offset treatment. - startLimit := nextEmit + 1 - - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 + if debugAsserts && s <= nextEmit { + panic("s <= nextEmit") } - repIndex := best.offset - for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { - repIndex-- - start-- - seq.matchLen++ - } - addLiterals(&seq, start) + addLiterals(&seq, s) // rep 0 seq.offset = uint32(best.rep) @@ -369,22 +369,9 @@ encodeLoop: panic("invalid offset") } - // Extend the n-byte match as long as possible. - l := best.length - - // Extend backwards - tMin := s - e.maxMatchOff - if tMin < 0 { - tMin = 0 - } - for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { - s-- - t-- - l++ - } - // Write our sequence var seq seq + l := best.length seq.litLen = uint32(s - nextEmit) seq.matchLen = uint32(l - zstdMinMatch) if seq.litLen > 0 {