Skip to content

Commit

Permalink
Reduce deflate table sizes
Browse files Browse the repository at this point in the history
Fixes #223

After:
```
BenchmarkCompressAllocations/level(-2)/flate-12   	   15889	     75134 ns/op	  342272 B/op	      11 allocs/op
BenchmarkCompressAllocations/level(-2)/gzip-12    	   15424	     76300 ns/op	  342448 B/op	      12 allocs/op
BenchmarkCompressAllocations/level(-1)/flate-12   	    2673	    378711 ns/op	 2448774 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(-1)/gzip-12    	    3342	    377507 ns/op	 2448949 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(0)/flate-12    	   17437	     76986 ns/op	  339968 B/op	       9 allocs/op
BenchmarkCompressAllocations/level(0)/gzip-12     	   15076	     82031 ns/op	  340144 B/op	      10 allocs/op
BenchmarkCompressAllocations/level(1)/flate-12    	    3382	    377466 ns/op	 1924486 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(1)/gzip-12     	    3436	    387788 ns/op	 1924662 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(2)/flate-12    	    1971	    591518 ns/op	 2710923 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(2)/gzip-12     	    2073	    516709 ns/op	 2711102 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(3)/flate-12    	    3250	    426246 ns/op	 2186626 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(3)/gzip-12     	    3084	    420084 ns/op	 2186802 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(4)/flate-12    	    2733	    390467 ns/op	 2186626 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(4)/gzip-12     	    3165	    400509 ns/op	 2186802 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(5)/flate-12    	    2797	    417904 ns/op	 2448774 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(5)/gzip-12     	    2455	    456214 ns/op	 2448948 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(6)/flate-12    	    2733	    471116 ns/op	 2448773 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(6)/gzip-12     	    2673	    443633 ns/op	 2448949 B/op	      15 allocs/op
BenchmarkCompressAllocations/level(7)/flate-12    	    6015	    198306 ns/op	 1006979 B/op	      13 allocs/op
BenchmarkCompressAllocations/level(7)/gzip-12     	    5728	    188045 ns/op	 1007155 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(8)/flate-12    	    6684	    195617 ns/op	 1006979 B/op	      13 allocs/op
BenchmarkCompressAllocations/level(8)/gzip-12     	    6331	    193922 ns/op	 1007155 B/op	      14 allocs/op
BenchmarkCompressAllocations/level(9)/flate-12    	    6015	    193829 ns/op	 1006980 B/op	      13 allocs/op
BenchmarkCompressAllocations/level(9)/gzip-12     	    5728	    197447 ns/op	 1007155 B/op	      14 allocs/op
```
  • Loading branch information
klauspost committed Feb 19, 2020
1 parent 2a5ec94 commit 2d8a632
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 114 deletions.
29 changes: 29 additions & 0 deletions compressible_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,3 +271,32 @@ func BenchmarkCompressAllocations(b *testing.B) {
})
}
}

func BenchmarkCompressAllocationsSingle(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
const level = 2
b.Run("flate", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := flate.NewWriter(ioutil.Discard, level)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
b.Run("gzip", func(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
w, err := gzip.NewWriterLevel(ioutil.Discard, level)
if err != nil {
b.Fatal(err)
}
w.Write(payload)
w.Close()
}
})
}
4 changes: 3 additions & 1 deletion flate/deflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ const (
maxHashOffset = 1 << 24

skipNever = math.MaxInt32

debugDeflate = false
)

type compressionLevel struct {
Expand Down Expand Up @@ -365,7 +367,7 @@ func (d *compressor) deflateLazy() {
// Sanity enables additional runtime tests.
// It's intended to be used during development
// to supplement the currently ad-hoc unit tests.
const sanity = false
const sanity = debugDeflate

if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync {
return
Expand Down
1 change: 0 additions & 1 deletion flate/fast_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ func hash(u uint32) uint32 {
}

type tableEntry struct {
val uint32
offset int32
}

Expand Down
22 changes: 11 additions & 11 deletions flate/level1.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugDecode && e.cur < 0 {
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}

Expand Down Expand Up @@ -81,12 +81,12 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}

now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash(uint32(now))

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}

Expand All @@ -96,11 +96,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
cv = uint32(now)
Expand Down Expand Up @@ -139,7 +139,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash(cv)] = tableEntry{offset: s + e.cur, val: cv}
e.table[hash(cv)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
Expand All @@ -153,14 +153,14 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
x := load6432(src, s-2)
o := e.cur + s - 2
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
currHash := hash(uint32(x))
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x)}
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != candidate.val {
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 8)
s++
break
Expand Down
28 changes: 14 additions & 14 deletions flate/level2.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin
)

if debugDecode && e.cur < 0 {
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}

Expand Down Expand Up @@ -83,12 +83,12 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
}
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}
nextHash = hash4u(uint32(now), bTableBits)

offset := s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
e.table[nextHash] = tableEntry{offset: nextS + e.cur, val: uint32(now)}
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}

Expand All @@ -98,10 +98,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS++
candidate = e.table[nextHash]
now >>= 8
e.table[nextHash] = tableEntry{offset: s + e.cur, val: cv}
e.table[nextHash] = tableEntry{offset: s + e.cur}

offset = s - (candidate.offset - e.cur)
if offset < maxMatchOffset && cv == candidate.val {
if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
break
}
cv = uint32(now)
Expand Down Expand Up @@ -148,7 +148,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+l+4) < len(src) {
cv := load3232(src, s)
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur, val: cv}
e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
Expand All @@ -157,15 +157,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, int32(i))
nextHash := hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i, val: uint32(x)}
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2, val: uint32(x)}
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
nextHash = hash4u(uint32(x), bTableBits)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4, val: uint32(x)}
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}

// We could immediately start working at s now, but to improve
Expand All @@ -178,14 +178,14 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
o := e.cur + s - 2
prevHash := hash4u(uint32(x), bTableBits)
prevHash2 := hash4u(uint32(x>>8), bTableBits)
e.table[prevHash] = tableEntry{offset: o, val: uint32(x)}
e.table[prevHash2] = tableEntry{offset: o + 1, val: uint32(x >> 8)}
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
currHash := hash4u(uint32(x>>16), bTableBits)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2, val: uint32(x >> 16)}
e.table[currHash] = tableEntry{offset: o + 2}

offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != candidate.val {
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
cv = uint32(x >> 24)
s++
break
Expand Down
54 changes: 26 additions & 28 deletions flate/level3.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin
)

if debugDecode && e.cur < 0 {
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}

Expand Down Expand Up @@ -81,22 +81,26 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
}
candidates := e.table[nextHash]
now := load3232(src, nextS)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur, val: cv}}

// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}

// Check both candidates
candidate = candidates.Cur
offset := s - (candidate.offset - e.cur)
if cv == candidate.val {
if offset > maxMatchOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
o2 := s - (candidates.Prev.offset - e.cur)
if cv != candidates.Prev.val || o2 > maxMatchOffset {
if candidate.offset < minOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}

if cv == load3232(src, candidate.offset-e.cur) {
if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
offset := s - (candidate.offset - e.cur)
o2 := s - (candidates.Prev.offset - e.cur)
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
Expand All @@ -106,11 +110,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
break
}
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
break
}
}
cv = now
Expand Down Expand Up @@ -158,7 +159,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
nextHash := hash(cv)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t, val: cv},
Cur: tableEntry{offset: e.cur + t},
}
}
goto emitRemainder
Expand All @@ -170,43 +171,40 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
prevHash := hash(uint32(x))
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 3, val: uint32(x)},
Cur: tableEntry{offset: e.cur + s - 3},
}
x >>= 8
prevHash = hash(uint32(x))

e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2, val: uint32(x)},
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
prevHash = hash(uint32(x))

e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1, val: uint32(x)},
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
currHash := hash(uint32(x))
candidates := e.table[currHash]
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur, val: cv},
Cur: tableEntry{offset: s + e.cur},
}

// Check both candidates
candidate = candidates.Cur
if cv == candidate.val {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
}
} else {
minOffset := e.cur + s - (maxMatchOffset - 4)

if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if cv == candidate.val {
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
offset := s - (candidate.offset - e.cur)
if offset <= maxMatchOffset {
continue
Expand Down
Loading

0 comments on commit 2d8a632

Please sign in to comment.