Skip to content

Commit

Permalink
flate: Add limited window compression (#843)
Browse files Browse the repository at this point in the history
Adds a medium compressor that can operate with limited window size.

Exposed in gzip outside deflate for now.

Example sizes:

```
=== RUN   TestFileWindow/32
    gzip_test.go:349: size: 82504 bytes
=== RUN   TestFileWindow/64
    gzip_test.go:349: size: 75350 bytes
=== RUN   TestFileWindow/128
    gzip_test.go:349: size: 70668 bytes
=== RUN   TestFileWindow/256
    gzip_test.go:349: size: 69276 bytes
=== RUN   TestFileWindow/512
    gzip_test.go:349: size: 68327 bytes
=== RUN   TestFileWindow/1024
    gzip_test.go:349: size: 67876 bytes
=== RUN   TestFileWindow/2048
    gzip_test.go:349: size: 40900 bytes
=== RUN   TestFileWindow/4096
    gzip_test.go:349: size: 38684 bytes
=== RUN   TestFileWindow/8192
    gzip_test.go:349: size: 36263 bytes
=== RUN   TestFileWindow/16384
    gzip_test.go:349: size: 35434 bytes
=== RUN   TestFileWindow/32768
    gzip_test.go:349: size: 34654 bytes
--- PASS: TestFileWindow (0.03s)
```
  • Loading branch information
klauspost authored Aug 9, 2023
1 parent c1dcc38 commit b404607
Show file tree
Hide file tree
Showing 7 changed files with 631 additions and 72 deletions.
29 changes: 29 additions & 0 deletions flate/deflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package flate

import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
Expand Down Expand Up @@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflateLazy
case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize:
d.w.logNewTablePenalty = 7
d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize}
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
Expand Down Expand Up @@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
return zw, err
}

// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
const MinCustomWindowSize = 32

// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
const MaxCustomWindowSize = windowSize

// NewWriterWindow returns a new Writer compressing data with a custom window size.
// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
if windowSize < MinCustomWindowSize {
return nil, errors.New("flate: requested window size less than MinWindowSize")
}
if windowSize > MaxCustomWindowSize {
return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize")
}
var dw Writer
if err := dw.d.init(w, -windowSize); err != nil {
return nil, err
}
return &dw, nil
}

// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
Expand Down
189 changes: 122 additions & 67 deletions flate/fuzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ var fuzzStartF = flag.Int("start", HuffmanOnly, "Start fuzzing at this level")
var fuzzEndF = flag.Int("end", BestCompression, "End fuzzing at this level (inclusive)")
var fuzzMaxF = flag.Int("max", 1<<20, "Maximum input size")
var fuzzSLF = flag.Bool("sl", true, "Include stateless encodes")
var fuzzWindow = flag.Bool("windows", true, "Include windowed encodes")

func TestMain(m *testing.M) {
flag.Parse()
Expand All @@ -34,6 +35,7 @@ func FuzzEncoding(f *testing.F) {
endFuzz := *fuzzEndF
maxSize := *fuzzMaxF
stateless := *fuzzSLF
fuzzWindow := *fuzzWindow

decoder := NewReader(nil)
buf := new(bytes.Buffer)
Expand All @@ -52,77 +54,130 @@ func FuzzEncoding(f *testing.F) {
}
for level := startFuzz; level <= endFuzz; level++ {
msg := "level " + strconv.Itoa(level) + ":"
buf.Reset()
fw := encs[level-startFuzz]
fw.Reset(buf)
n, err := fw.Write(data)
if n != len(data) {
t.Fatal(msg + "short write")
}
if err != nil {
t.Fatal(msg + err.Error())
}
err = fw.Close()
if err != nil {
t.Fatal(msg + err.Error())
}
decoder.(Resetter).Reset(buf, nil)
data2, err := io.ReadAll(decoder)
if err != nil {
t.Fatal(msg + err.Error())
}
if !bytes.Equal(data, data2) {
t.Fatal(msg + "not equal")
}
// Do it again...
msg = "level " + strconv.Itoa(level) + " (reset):"
buf.Reset()
fw.Reset(buf)
n, err = fw.Write(data)
if n != len(data) {
t.Fatal(msg + "short write")
}
if err != nil {
t.Fatal(msg + err.Error())
}
err = fw.Close()
if err != nil {
t.Fatal(msg + err.Error())
}
decoder.(Resetter).Reset(buf, nil)
data2, err = io.ReadAll(decoder)
if err != nil {
t.Fatal(msg + err.Error())
}
if !bytes.Equal(data, data2) {
t.Fatal(msg + "not equal")
}
}
if !stateless {
return
}
// Split into two and use history...
buf.Reset()
err := StatelessDeflate(buf, data[:len(data)/2], false, nil)
if err != nil {
t.Error(err)
t.Run(msg, func(t *testing.T) {
buf.Reset()
fw := encs[level-startFuzz]
fw.Reset(buf)
n, err := fw.Write(data)
if n != len(data) {
t.Fatal(msg + "short write")
}
if err != nil {
t.Fatal(msg + err.Error())
}
err = fw.Close()
if err != nil {
t.Fatal(msg + err.Error())
}
decoder.(Resetter).Reset(buf, nil)
data2, err := io.ReadAll(decoder)
if err != nil {
t.Fatal(msg + err.Error())
}
if !bytes.Equal(data, data2) {
t.Fatal(msg + "not equal")
}
// Do it again...
msg = "level " + strconv.Itoa(level) + " (reset):"
buf.Reset()
fw.Reset(buf)
n, err = fw.Write(data)
if n != len(data) {
t.Fatal(msg + "short write")
}
if err != nil {
t.Fatal(msg + err.Error())
}
err = fw.Close()
if err != nil {
t.Fatal(msg + err.Error())
}
decoder.(Resetter).Reset(buf, nil)
data2, err = io.ReadAll(decoder)
if err != nil {
t.Fatal(msg + err.Error())
}
if !bytes.Equal(data, data2) {
t.Fatal(msg + "not equal")
}
})
}
if stateless {
t.Run("stateless", func(t *testing.T) {
// Split into two and use history...
buf.Reset()
err := StatelessDeflate(buf, data[:len(data)/2], false, nil)
if err != nil {
t.Error(err)
}

// Use top half as dictionary...
dict := data[:len(data)/2]
err = StatelessDeflate(buf, data[len(data)/2:], true, dict)
if err != nil {
t.Error(err)
}
// Use top half as dictionary...
dict := data[:len(data)/2]
err = StatelessDeflate(buf, data[len(data)/2:], true, dict)
if err != nil {
t.Error(err)
}

decoder.(Resetter).Reset(buf, nil)
data2, err := io.ReadAll(decoder)
if err != nil {
t.Error(err)
decoder.(Resetter).Reset(buf, nil)
data2, err := io.ReadAll(decoder)
if err != nil {
t.Error(err)
}
if !bytes.Equal(data, data2) {
//fmt.Printf("want:%x\ngot: %x\n", data1, data2)
t.Error("not equal")
}
})
}
if !bytes.Equal(data, data2) {
//fmt.Printf("want:%x\ngot: %x\n", data1, data2)
t.Error("not equal")
if fuzzWindow {
t.Run("window", func(t *testing.T) {
msg := "windowed"
buf.Reset()
fw, err := NewWriterWindow(buf, 1000)
if err != nil {
t.Fatal(msg + err.Error())
}
fw.Reset(buf)
n, err := fw.Write(data)
if n != len(data) {
t.Fatal(msg + "short write")
}
if err != nil {
t.Fatal(msg + err.Error())
}
err = fw.Close()
if err != nil {
t.Fatal(msg + err.Error())
}
decoder.(Resetter).Reset(buf, nil)
data2, err := io.ReadAll(decoder)
if err != nil {
t.Fatal(msg + err.Error())
}
if !bytes.Equal(data, data2) {
t.Fatal(msg + "not equal")
}
// Do it again...
msg = msg + " (reset):"
buf.Reset()
fw.Reset(buf)
n, err = fw.Write(data)
if n != len(data) {
t.Fatal(msg + "short write")
}
if err != nil {
t.Fatal(msg + err.Error())
}
err = fw.Close()
if err != nil {
t.Fatal(msg + err.Error())
}
decoder.(Resetter).Reset(buf, nil)
data2, err = io.ReadAll(decoder)
if err != nil {
t.Fatal(msg + err.Error())
}
})
}
})
}
Loading

0 comments on commit b404607

Please sign in to comment.