Skip to content

Commit

Permalink
zstd: Improve best encoder by extending backwards
Browse files Browse the repository at this point in the history
The SpeedBestCompression encoder now extends matches backwards before
estimating their encoded size, rather than doing this after selecting
the best match. This is a bit slower, but produces smaller output.

Benchmarks on amd64:

name                              old speed      new speed      delta
Encoder_EncodeAllSimple/best-8    20.7MB/s ± 3%  19.0MB/s ± 1%  -8.04%  (p=0.000 n=19+18)
Encoder_EncodeAllSimple4K/best-8  19.2MB/s ± 6%  17.9MB/s ± 1%  -6.86%  (p=0.000 n=20+20)

Output sizes on Silesia and enwik9:

dickens    3220994    3179697 (× 0.987179)
enwik9   259846164  257481474 (× 0.990900)
mozilla   16912437   16895142 (× 0.998977)
mr         3502823    3473770 (× 0.991706)
nci        2306320    2300580 (× 0.997511)
ooffice    2896907    2888715 (× 0.997172)
osdb       3390548    3368411 (× 0.993471)
reymont    1657380    1639490 (× 0.989206)
samba      4329898    4315020 (× 0.996564)
sao        5416648    5383855 (× 0.993946)
webster    9972808    9887560 (× 0.991452)
xml         542277     541018 (× 0.997678)
x-ray      5733121    5681186 (× 0.990941)
total    319728325  317035918 (× 0.991579)

Wall clock time for compressing enwik9 goes up a bit, but is still close
to what is was before klauspost#776.
  • Loading branch information
greatroar committed Mar 19, 2023
1 parent 7633d62 commit 194a8db
Showing 1 changed file with 20 additions and 33 deletions.
53 changes: 20 additions & 33 deletions zstd/enc_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,22 @@ encodeLoop:
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}

l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
// Extend candidate match backwards as far as possible.
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
s--
offset--
l++
}
}

cand := match{offset: offset, s: s, length: l, rep: rep}
cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
*m = cand
Expand Down Expand Up @@ -295,25 +310,10 @@ encodeLoop:
s = best.s
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)

// We might be able to match backwards.
// Extend as long as we can.
start := best.s
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1

tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
if debugAsserts && s <= nextEmit {
panic("s <= nextEmit")
}
repIndex := best.offset
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
addLiterals(&seq, s)

// rep 0
seq.offset = uint32(best.rep)
Expand Down Expand Up @@ -369,22 +369,9 @@ encodeLoop:
panic("invalid offset")
}

// Extend the n-byte match as long as possible.
l := best.length

// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}

// Write our sequence
var seq seq
l := best.length
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
Expand Down

0 comments on commit 194a8db

Please sign in to comment.