Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zstd: Minor performance tweaks #420

Merged
merged 6 commits into from
Jan 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (b *blockEnc) init() {
if cap(b.literals) < maxCompressedBlockSize {
b.literals = make([]byte, 0, maxCompressedBlockSize)
}
const defSeqs = 200
const defSeqs = 2000
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
Expand Down Expand Up @@ -426,7 +426,7 @@ func fuzzFseEncoder(data []byte) int {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
hist := enc.Histogram()
maxSym := uint8(0)
for i, v := range data {
v = v & 63
Expand Down Expand Up @@ -802,14 +802,13 @@ func (b *blockEnc) genCodes() {
// nothing to do
return
}

if len(b.sequences) > math.MaxUint16 {
panic("can only encode up to 64K sequences")
}
// No bounds checks after here:
llH := b.coders.llEnc.Histogram()[:256]
ofH := b.coders.ofEnc.Histogram()[:256]
mlH := b.coders.mlEnc.Histogram()[:256]
llH := b.coders.llEnc.Histogram()
ofH := b.coders.ofEnc.Histogram()
mlH := b.coders.mlEnc.Histogram()
for i := range llH {
llH[i] = 0
}
Expand Down
24 changes: 17 additions & 7 deletions zstd/enc_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) {
e.blk = enc
}

func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}

func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts {
if s < 0 {
Expand All @@ -131,9 +126,24 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
a := src[s:]
b := src[t:]
b = b[:len(a)]
end := int32((len(a) >> 3) << 3)
for i := int32(0); i < end; i += 8 {
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
return i + int32(bits.TrailingZeros64(diff)>>3)
}
}

// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
a = a[end:]
b = b[end:]
for i := range a {
if a[i] != b[i] {
return int32(i) + end
}
}
return int32(len(a)) + end
}

// Reset the encoding table.
Expand Down
139 changes: 9 additions & 130 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ package zstd

import (
"fmt"
"math"
"math/bits"
)

const (
Expand Down Expand Up @@ -136,20 +134,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)

// We might be able to match backwards.
Expand Down Expand Up @@ -236,20 +221,7 @@ encodeLoop:
}

// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -286,20 +258,7 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
Expand Down Expand Up @@ -418,21 +377,7 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length := 4 + e.matchlen(s+6, repIndex+4, src)

seq.matchLen = uint32(length - zstdMinMatch)

Expand Down Expand Up @@ -522,21 +467,7 @@ encodeLoop:
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -573,21 +504,7 @@ encodeLoop:
if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
Expand Down Expand Up @@ -731,19 +648,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)

seq.matchLen = uint32(length - zstdMinMatch)

Expand Down Expand Up @@ -831,20 +736,7 @@ encodeLoop:
}

// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -881,20 +773,7 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
Expand Down
12 changes: 6 additions & 6 deletions zstd/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func getEncOpts(cMax int) []testEncOpt {
addOpt("nolit", WithNoEntropyCompression(true))
addOpt("pad1k", WithEncoderPadding(1024))
addOpt("zerof", WithZeroFrames(true))
addOpt("singleseg", WithSingleSegment(true))
addOpt("1seg", WithSingleSegment(true))
}
if testing.Short() && conc == 2 {
break
Expand Down Expand Up @@ -904,10 +904,10 @@ func BenchmarkEncoder_EncodeAllXML(b *testing.B) {
}
dec.Close()

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.Log("Output size:", len(dst))
//b.Log("Output size:", len(dst))
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(len(in)))
Expand Down Expand Up @@ -994,7 +994,7 @@ func BenchmarkEncoder_EncodeAllHTML(b *testing.B) {
b.Fatal(err)
}

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.ResetTimer()
Expand All @@ -1018,7 +1018,7 @@ func BenchmarkEncoder_EncodeAllTwain(b *testing.B) {
b.Fatal(err)
}

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.ResetTimer()
Expand All @@ -1042,7 +1042,7 @@ func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
b.Fatal(err)
}

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.ResetTimer()
Expand Down
5 changes: 2 additions & 3 deletions zstd/fse_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ func (s symbolTransform) String() string {
// To indicate that you have populated the histogram call HistogramFinished
// with the value of the highest populated symbol, as well as the number of entries
// in the most populated entry. These are accepted at face value.
// The returned slice will always be length 256.
func (s *fseEncoder) Histogram() []uint32 {
return s.count[:]
func (s *fseEncoder) Histogram() *[256]uint32 {
return &s.count
}

// HistogramFinished can be called to indicate that the histogram has been populated.
Expand Down