From b7ccab840e50d2c3fbfdb00c8949cc32e31cd459 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 4 Feb 2020 09:44:41 -0800 Subject: [PATCH] Add stateless dictionary support (#216) * Add stateless dictionary support Enable up to 8K dictionary for stateless compression. --- README.md | 2 + flate/flate_test.go | 25 +++++++ flate/huffman_bit_writer.go | 9 ++- flate/stateless.go | 67 +++++++++++++----- flate/testdata/huffman-null-max.dyn.expect | Bin 100 -> 101 bytes .../huffman-null-max.dyn.expect-noinput | Bin 100 -> 101 bytes flate/testdata/huffman-null-max.golden | Bin 8218 -> 8219 bytes flate/testdata/huffman-pi.dyn.expect | Bin 1750 -> 1751 bytes flate/testdata/huffman-pi.dyn.expect-noinput | Bin 1750 -> 1751 bytes flate/testdata/huffman-pi.golden | Bin 1625 -> 1626 bytes .../huffman-rand-1k.dyn.expect-noinput | Bin 1068 -> 1069 bytes .../huffman-rand-limit.dyn.expect-noinput | Bin 225 -> 226 bytes flate/testdata/huffman-rand-limit.golden | Bin 238 -> 239 bytes flate/testdata/huffman-shifts.dyn.expect | Bin 58 -> 59 bytes .../huffman-shifts.dyn.expect-noinput | Bin 58 -> 59 bytes flate/testdata/huffman-shifts.golden | Bin 1825 -> 1826 bytes .../huffman-text-shift.dyn.expect-noinput | Bin 254 -> 255 bytes flate/testdata/huffman-text-shift.golden | Bin 251 -> 252 bytes .../testdata/huffman-text.dyn.expect-noinput | 2 +- flate/testdata/huffman-text.golden | Bin 269 -> 270 bytes .../testdata/huffman-zero.dyn.expect-noinput | 2 +- flate/testdata/huffman-zero.golden | Bin 65 -> 66 bytes .../null-long-match.dyn.expect-noinput | Bin 225 -> 227 bytes gzip/gzip.go | 4 +- 24 files changed, 87 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 266d613a21..d260c388f8 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ This package provides various compression algorithms. # changelog +* Feb 4, 2020: (v1.10.0) Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216) +* Feb 3, 2020: Fix buffer overflow on repeated small block deflate. [#218](https://github.com/klauspost/compress/pull/218) * Jan 31, 2020: Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214) * Jan 28, 2020: Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s. [#186](https://github.com/klauspost/compress/pull/186) * Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056), [#206](https://github.com/klauspost/compress/pull/206). diff --git a/flate/flate_test.go b/flate/flate_test.go index 2d83d678ff..52517d888c 100644 --- a/flate/flate_test.go +++ b/flate/flate_test.go @@ -138,6 +138,31 @@ func TestRegressions(t *testing.T) { } }) } + t.Run(tt.Name+"stateless", func(t *testing.T) { + // Split into two and use history... + buf := new(bytes.Buffer) + err = StatelessDeflate(buf, data1[:len(data1)/2], false, nil) + if err != nil { + t.Error(err) + } + + // Use top half as dictionary... + dict := data1[:len(data1)/2] + err = StatelessDeflate(buf, data1[len(data1)/2:], true, dict) + if err != nil { + t.Error(err) + } + t.Log(buf.Len()) + fr1 := NewReader(buf) + data2, err := ioutil.ReadAll(fr1) + if err != nil { + t.Error(err) + } + if bytes.Compare(data1, data2) != 0 { + fmt.Printf("want:%x\ngot: %x\n", data1, data2) + t.Error("not equal") + } + }) } } diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index 9feea87a3d..56ee6dc8ba 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -177,6 +177,11 @@ func (w *huffmanBitWriter) flush() { w.nbits = 0 return } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } n := w.nbytes for w.nbits != 0 { w.bytes[n] = byte(w.bits) @@ -594,8 +599,8 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b tokens.AddEOB() } - // We cannot reuse pure huffman table. - if w.lastHuffMan && w.lastHeader > 0 { + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { // We will not try to reuse. w.writeCode(w.literalEncoding.codes[endBlockMarker]) w.lastHeader = 0 diff --git a/flate/stateless.go b/flate/stateless.go index a470511975..53e8991246 100644 --- a/flate/stateless.go +++ b/flate/stateless.go @@ -8,6 +8,8 @@ import ( const ( maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 slTableBits = 13 slTableSize = 1 << slTableBits @@ -25,11 +27,11 @@ func (s *statelessWriter) Close() error { } s.closed = true // Emit EOF block - return StatelessDeflate(s.dst, nil, true) + return StatelessDeflate(s.dst, nil, true, nil) } func (s *statelessWriter) Write(p []byte) (n int, err error) { - err = StatelessDeflate(s.dst, p, false) + err = StatelessDeflate(s.dst, p, false, nil) if err != nil { return 0, err } @@ -59,7 +61,10 @@ var bitWriterPool = sync.Pool{ // StatelessDeflate allows to compress directly to a Writer without retaining state. // When returning everything will be flushed. -func StatelessDeflate(out io.Writer, in []byte, eof bool) error { +// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { var dst tokens bw := bitWriterPool.Get().(*huffmanBitWriter) bw.reset(out) @@ -76,35 +81,53 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool) error { return bw.err } + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + for len(in) > 0 { todo := in - if len(todo) > maxStatelessBlock { - todo = todo[:maxStatelessBlock] + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] } in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } // Compress - statelessEnc(&dst, todo) + statelessEnc(&dst, todo, int16(len(dict))) isEof := eof && len(in) == 0 if dst.n == 0 { - bw.writeStoredHeader(len(todo), isEof) + bw.writeStoredHeader(len(uncompressed), isEof) if bw.err != nil { return bw.err } - bw.writeBytes(todo) - } else if int(dst.n) > len(todo)-len(todo)>>4 { + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { // If we removed less than 1/16th, huffman compress the block. - bw.writeBlockHuff(isEof, todo, false) + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) } else { - bw.writeBlockDynamic(&dst, isEof, todo, false) + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() } if bw.err != nil { return bw.err } - dst.Reset() } if !eof { - // Align. + // Align, only a stored block can do that. bw.writeStoredHeader(0, false) } bw.flush() @@ -130,7 +153,7 @@ func load6416(b []byte, i int16) uint64 { uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 } -func statelessEnc(dst *tokens, src []byte) { +func statelessEnc(dst *tokens, src []byte, startAt int16) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin @@ -144,15 +167,23 @@ func statelessEnc(dst *tokens, src []byte) { // This check isn't in the Snappy implementation, but there, the caller // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { + if len(src)-int(startAt) < minNonLiteralBlockSize { // We do not fill the token table. // This will be picked up by caller. - dst.n = uint16(len(src)) + dst.n = 0 return } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } - s := int16(1) - nextEmit := int16(0) + s := startAt + 1 + nextEmit := startAt // sLimit is when to stop looking for offset/length copies. The inputMargin // lets us use a fast path for emitLiteral in the main loop, while we are // looking for copies. diff --git a/flate/testdata/huffman-null-max.dyn.expect b/flate/testdata/huffman-null-max.dyn.expect index 493485bad7e3c9172fb618e951ac4fdcba1e8e52..0a3c71ceb3bee4671bb61c42561246285bd50ec1 100644 GIT binary patch delta 7 OcmYdEosi6A!36*b{Q?01 delta 6 NcmYdInUKuL000NW0lokL diff --git a/flate/testdata/huffman-null-max.dyn.expect-noinput b/flate/testdata/huffman-null-max.dyn.expect-noinput index 493485bad7e3c9172fb618e951ac4fdcba1e8e52..0a3c71ceb3bee4671bb61c42561246285bd50ec1 100644 GIT binary patch delta 7 OcmYdEosi6A!36*b{Q?01 delta 6 NcmYdInUKuL000NW0lokL diff --git a/flate/testdata/huffman-null-max.golden b/flate/testdata/huffman-null-max.golden index fa20e27eb4f0ef5db9a942ccac21f60ed3ce17dd..fe7b7f4fb70f74d20828ffd335ccb06696a34df5 100644 GIT binary patch delta 10 RcmbQ`Fxz2+qyp0edjJ*)1Hb?P delta 9 QcmbR3Fw0?sqyi%Y01;>cpa1{> diff --git a/flate/testdata/huffman-pi.dyn.expect b/flate/testdata/huffman-pi.dyn.expect index 722935ea21eb4005035e5645af546bd1f4db338c..11756feafbe613e8979b814493a5fd41d0483395 100644 GIT binary patch delta 10 Rcmcb{d!2W~RW_z?YycWZ1W^D0 delta 9 Qcmcc4dyRL)RW?R802KxUS^xk5 diff --git a/flate/testdata/huffman-pi.dyn.expect-noinput b/flate/testdata/huffman-pi.dyn.expect-noinput index 722935ea21eb4005035e5645af546bd1f4db338c..11756feafbe613e8979b814493a5fd41d0483395 100644 GIT binary patch delta 10 Rcmcb{d!2W~RW_z?YycWZ1W^D0 delta 9 Qcmcc4dyRL)RW?R802KxUS^xk5 diff --git a/flate/testdata/huffman-pi.golden b/flate/testdata/huffman-pi.golden index 610b64057391e10715a88715a85c21555aa6cd00..05fd911d094369c7e9213d233052e26ba312f31c 100644 GIT binary patch delta 10 Rcmcb~bBkv~1RK*=4geSY1K9uo delta 9 Qcmcb`bCYL71REnO024C;-~a#s diff --git a/flate/testdata/huffman-rand-1k.dyn.expect-noinput b/flate/testdata/huffman-rand-1k.dyn.expect-noinput index ea8d62cc0c6da3cdf5be4eae3b5b76fb34427966..5162399686d8b32a892409ff10092f29975928a3 100644 GIT binary patch delta 10 RcmZ3(v6f?lHVe~#0RR)l16lw8 delta 9 QcmZ3>v4&%VHVY#=01tBlQ2+n{ diff --git a/flate/testdata/huffman-rand-limit.dyn.expect-noinput b/flate/testdata/huffman-rand-limit.dyn.expect-noinput index e6e62989bb656a0ec47ab630435c64e4e5d46ff4..008b9afee933f30f6429fe5a1e2c426a195cafd4 100644 GIT binary patch delta 9 QcmaFJ_=s`B1Ewd;02WUK9{>OV delta 8 PcmaFF_>ghJ14ae_5ljO& diff --git a/flate/testdata/huffman-rand-limit.golden b/flate/testdata/huffman-rand-limit.golden index f3d803a84a7bb3ca4c7174a8009b3d1eab178c82..7ef674504988ad857ed31f4a47a1ea7f9f99dec6 100644 GIT binary patch delta 8 PcmaFI_?~gXJH|)=5`zP{ delta 6 NcmaFQ_>OVHI{*uA16}|C diff --git a/flate/testdata/huffman-shifts.dyn.expect b/flate/testdata/huffman-shifts.dyn.expect index 649ef3b6c16efc70c63fcc26f0e2a00a0f053178..2f4fd17add552d86b47dcb321a4f50988aac86cc 100644 GIT binary patch delta 7 OcmcDro?ywe%M1VrV*)V% delta 6 NcmcDvnqbLj1po!H0eS!c diff --git a/flate/testdata/huffman-shifts.dyn.expect-noinput b/flate/testdata/huffman-shifts.dyn.expect-noinput index 649ef3b6c16efc70c63fcc26f0e2a00a0f053178..2f4fd17add552d86b47dcb321a4f50988aac86cc 100644 GIT binary patch delta 7 OcmcDro?ywe%M1VrV*)V% delta 6 NcmcDvnqbLj1po!H0eS!c diff --git a/flate/testdata/huffman-shifts.golden b/flate/testdata/huffman-shifts.golden index bf47c1756c9df6f7788e079cb6a404b7991f0218..89c8addf0f8a4940ac23c7ac7330f5fcf22f7673 100644 GIT binary patch delta 10 RcmZ3;w}@|p0z1=d1ppHq14IA- delta 9 QcmZ3)w~%jx0y`r&01o&9LjV8( diff --git a/flate/testdata/huffman-text-shift.dyn.expect-noinput b/flate/testdata/huffman-text-shift.dyn.expect-noinput index 4d5104f0632ecd74eb415c24d6a93712a3f741c7..29788aa0a8ec587e76ecf8a55d49ab54b381bf79 100644 GIT binary patch delta 9 Qcmeyz_@8mYUnUzy02e_6%m4rY delta 8 Pcmey*_>XbIUq%K16FCEp diff --git a/flate/testdata/huffman-text-shift.golden b/flate/testdata/huffman-text-shift.golden index 92c11ad2536d8b661829b692ed463bf46d41a756..80531ad983afe811bf96c8b78d005237cf827d12 100644 GIT binary patch delta 9 Qcmey(_=j=AFQyn^AUm Œ2>T gO U+d5ʕd6_i2 \ No newline at end of file +*xqF_2>n^AUm Œ2>T gO U+d5ʕd6_i2 \ No newline at end of file diff --git a/flate/testdata/huffman-text.golden b/flate/testdata/huffman-text.golden index 7deac6dd1bebdb7ac03446dc6b5acd9c4e8155b5..b440e84d53a9d43aa0edb5c13d861d5a49f7831a 100644 GIT binary patch delta 10 RcmeBW>SNl#!^rfP9RLu60^Sfx%!^p@801Tu7-v9sr diff --git a/flate/testdata/huffman-zero.dyn.expect-noinput b/flate/testdata/huffman-zero.dyn.expect-noinput index 9d0aa15667..cefc1d3f66 100644 --- a/flate/testdata/huffman-zero.dyn.expect-noinput +++ b/flate/testdata/huffman-zero.dyn.expect-noinput @@ -1 +1 @@ -@hm۶m۶m۶m۶m۶6rk \ No newline at end of file +@hm۶m۶m۶m۶m۶6rk \ No newline at end of file diff --git a/flate/testdata/huffman-zero.golden b/flate/testdata/huffman-zero.golden index e9d8b968149741d9e03c9fee4c3caf50f7b4aa1b..f0dacf2b4947dc8d55fd159448a07ec89dd5c2da 100644 GIT binary patch delta 7 OcmZ>Cn&80nK^_1JF9Kx% delta 6 NcmZ>AoZ!I7000Hq0aXA1 diff --git a/flate/testdata/null-long-match.dyn.expect-noinput b/flate/testdata/null-long-match.dyn.expect-noinput index cce0fdbbcd05e45caebfa6dc175caad53e71c417..14167a3344bcdc219ee9fd904418ae61298e584d 100644 GIT binary patch delta 9 QcmaFJ_?U6RL#E>l02Unt0RR91 delta 6 NcmaFN_>ghJLjVfp12_Nx diff --git a/gzip/gzip.go b/gzip/gzip.go index ed0cc148f8..6794cf48f4 100644 --- a/gzip/gzip.go +++ b/gzip/gzip.go @@ -207,7 +207,7 @@ func (z *Writer) Write(p []byte) (int, error) { z.size += uint32(len(p)) z.digest = crc32.Update(z.digest, crc32.IEEETable, p) if z.level == StatelessCompression { - return len(p), flate.StatelessDeflate(z.w, p, false) + return len(p), flate.StatelessDeflate(z.w, p, false, nil) } n, z.err = z.compressor.Write(p) return n, z.err @@ -255,7 +255,7 @@ func (z *Writer) Close() error { } } if z.level == StatelessCompression { - z.err = flate.StatelessDeflate(z.w, nil, true) + z.err = flate.StatelessDeflate(z.w, nil, true, nil) } else { z.err = z.compressor.Close() }