From 8b78fa8c2c354cd9755f0a865a8c8354052faa02 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Sat, 24 Dec 2022 13:50:30 +0100 Subject: [PATCH] zstd: Don't allocate dataStorage when using byteBuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Applications that don't read from files don't need the multi-KiB buffers allocated by blockDec.reset, so their peak memory usage can be reduced a bit. This slows down Decoder.Reset, though only by a few ns per call. The allocations part of benchstat's output shows only noise, because these allocations don't actually show up due to caching (verified by putting a print statement near the allocations in the old cod). name old time/op new time/op delta Decoder_DecoderReset/kppkn.gtb.zst/stream-8 395µs ± 5% 393µs ± 2% ~ (p=0.912 n=10+10) Decoder_DecoderReset/kppkn.gtb.zst/stream-single-8 41.2ns ± 0% 43.2ns ± 1% +4.66% (p=0.000 n=10+10) Decoder_DecoderReset/kppkn.gtb.zst/buffer-8 391µs ± 0% 390µs ± 0% ~ (p=0.063 n=10+10) Decoder_DecoderReset/kppkn.gtb.zst/buffer-single-8 390µs ± 0% 390µs ± 0% ~ (p=0.280 n=10+10) Decoder_DecoderReset/geo.protodata.zst/stream-8 188µs ± 3% 190µs ± 2% ~ (p=0.105 n=10+10) Decoder_DecoderReset/geo.protodata.zst/stream-single-8 41.2ns ± 0% 43.2ns ± 1% +4.73% (p=0.000 n=10+10) Decoder_DecoderReset/geo.protodata.zst/buffer-8 96.1µs ± 0% 95.7µs ± 0% -0.40% (p=0.008 n=9+9) Decoder_DecoderReset/geo.protodata.zst/buffer-single-8 95.8µs ± 0% 95.6µs ± 1% ~ (p=0.068 n=9+10) Decoder_DecoderReset/plrabn12.txt.zst/stream-8 609µs ± 4% 609µs ± 4% ~ (p=0.905 n=10+9) Decoder_DecoderReset/plrabn12.txt.zst/stream-single-8 41.2ns ± 0% 43.2ns ± 1% +4.79% (p=0.000 n=10+9) Decoder_DecoderReset/plrabn12.txt.zst/buffer-8 1.30ms ± 0% 1.29ms ± 0% -0.21% (p=0.004 n=10+10) Decoder_DecoderReset/plrabn12.txt.zst/buffer-single-8 1.30ms ± 0% 1.30ms ± 0% ~ (p=0.549 n=10+9) Decoder_DecoderReset/lcet10.txt.zst/stream-8 523µs ± 2% 518µs ± 3% ~ (p=0.408 n=8+10) Decoder_DecoderReset/lcet10.txt.zst/stream-single-8 41.2ns ± 0% 43.2ns ± 1% +4.71% (p=0.000 n=10+10) Decoder_DecoderReset/lcet10.txt.zst/buffer-8 954µs ± 0% 954µs ± 0% ~ (p=0.796 n=10+10) Decoder_DecoderReset/lcet10.txt.zst/buffer-single-8 954µs ± 0% 953µs ± 0% ~ (p=0.604 n=9+10) Decoder_DecoderReset/asyoulik.txt.zst/stream-8 339µs ± 3% 327µs ± 4% -3.71% (p=0.002 n=10+10) Decoder_DecoderReset/asyoulik.txt.zst/stream-single-8 41.2ns ± 1% 43.2ns ± 1% +4.77% (p=0.000 n=10+9) Decoder_DecoderReset/asyoulik.txt.zst/buffer-8 338µs ± 0% 338µs ± 1% ~ (p=0.912 n=10+10) Decoder_DecoderReset/asyoulik.txt.zst/buffer-single-8 338µs ± 0% 338µs ± 0% ~ (p=1.000 n=9+9) Decoder_DecoderReset/alice29.txt.zst/stream-8 363µs ± 3% 359µs ± 2% ~ (p=0.278 n=10+9) Decoder_DecoderReset/alice29.txt.zst/stream-single-8 41.2ns ± 0% 43.1ns ± 1% +4.62% (p=0.000 n=10+10) Decoder_DecoderReset/alice29.txt.zst/buffer-8 411µs ± 0% 411µs ± 1% ~ (p=1.000 n=10+10) Decoder_DecoderReset/alice29.txt.zst/buffer-single-8 411µs ± 0% 411µs ± 0% ~ (p=0.321 n=8+9) Decoder_DecoderReset/html_x_4.zst/stream-8 239µs ± 3% 244µs ± 1% ~ (p=0.028 n=10+9) Decoder_DecoderReset/html_x_4.zst/stream-single-8 41.6ns ± 3% 43.2ns ± 0% +3.80% (p=0.000 n=9+10) Decoder_DecoderReset/html_x_4.zst/buffer-8 263µs ± 1% 263µs ± 1% ~ (p=0.971 n=10+10) Decoder_DecoderReset/html_x_4.zst/buffer-single-8 263µs ± 1% 263µs ± 0% ~ (p=0.529 n=10+10) Decoder_DecoderReset/paper-100k.pdf.zst/stream-8 80.1µs ± 3% 80.1µs ± 2% ~ (p=0.370 n=9+8) Decoder_DecoderReset/paper-100k.pdf.zst/stream-single-8 41.2ns ± 1% 43.1ns ± 0% +4.58% (p=0.000 n=10+10) Decoder_DecoderReset/paper-100k.pdf.zst/buffer-8 22.9µs ± 2% 22.7µs ± 0% ~ (p=0.015 n=10+8) Decoder_DecoderReset/paper-100k.pdf.zst/buffer-single-8 22.7µs ± 0% 22.6µs ± 0% ~ (p=0.021 n=8+10) Decoder_DecoderReset/fireworks.jpeg.zst/stream-8 59.4µs ± 1% 59.7µs ± 3% ~ (p=0.079 n=9+8) Decoder_DecoderReset/fireworks.jpeg.zst/stream-single-8 41.2ns ± 0% 43.2ns ± 0% +4.70% (p=0.000 n=6+10) Decoder_DecoderReset/fireworks.jpeg.zst/buffer-8 13.2µs ± 0% 13.1µs ± 0% -0.96% (p=0.000 n=10+9) Decoder_DecoderReset/fireworks.jpeg.zst/buffer-single-8 13.2µs ± 0% 13.0µs ± 0% -0.91% (p=0.000 n=10+10) Decoder_DecoderReset/urls.10K.zst/stream-8 497µs ± 2% 503µs ± 2% ~ (p=0.143 n=10+10) Decoder_DecoderReset/urls.10K.zst/stream-single-8 41.3ns ± 0% 43.2ns ± 0% +4.75% (p=0.000 n=9+9) Decoder_DecoderReset/urls.10K.zst/buffer-8 1.12ms ± 0% 1.12ms ± 0% ~ (p=0.123 n=10+10) Decoder_DecoderReset/urls.10K.zst/buffer-single-8 1.12ms ± 0% 1.12ms ± 0% +0.21% (p=0.002 n=9+10) Decoder_DecoderReset/html.zst/stream-8 176µs ± 4% 179µs ± 3% ~ (p=0.113 n=9+10) Decoder_DecoderReset/html.zst/stream-single-8 41.2ns ± 0% 43.1ns ± 0% +4.72% (p=0.000 n=10+10) Decoder_DecoderReset/html.zst/buffer-8 104µs ± 0% 104µs ± 0% ~ (p=0.024 n=9+9) Decoder_DecoderReset/html.zst/buffer-single-8 103µs ± 1% 104µs ± 0% +0.65% (p=0.000 n=10+9) Decoder_DecoderReset/comp-data.bin.zst/stream-8 41.1µs ± 3% 42.6µs ± 4% +3.62% (p=0.001 n=10+9) Decoder_DecoderReset/comp-data.bin.zst/stream-single-8 41.2ns ± 0% 43.2ns ± 1% +4.77% (p=0.000 n=10+9) Decoder_DecoderReset/comp-data.bin.zst/buffer-8 10.1µs ± 0% 10.1µs ± 1% ~ (p=0.288 n=10+10) Decoder_DecoderReset/comp-data.bin.zst/buffer-single-8 10.1µs ± 0% 10.1µs ± 0% ~ (p=0.033 n=10+9) Decoder_DecodeAll/kppkn.gtb.zst-8 390µs ± 0% 390µs ± 0% ~ (p=0.842 n=9+10) Decoder_DecodeAll/geo.protodata.zst-8 95.6µs ± 0% 95.5µs ± 1% ~ (p=0.739 n=10+10) Decoder_DecodeAll/plrabn12.txt.zst-8 1.30ms ± 0% 1.29ms ± 0% ~ (p=0.497 n=9+10) Decoder_DecodeAll/lcet10.txt.zst-8 954µs ± 0% 954µs ± 0% ~ (p=1.000 n=10+9) Decoder_DecodeAll/asyoulik.txt.zst-8 338µs ± 0% 338µs ± 0% ~ (p=0.912 n=10+10) Decoder_DecodeAll/alice29.txt.zst-8 409µs ± 0% 410µs ± 1% ~ (p=0.052 n=10+10) Decoder_DecodeAll/html_x_4.zst-8 263µs ± 0% 264µs ± 0% +0.36% (p=0.008 n=10+9) Decoder_DecodeAll/paper-100k.pdf.zst-8 22.5µs ± 1% 22.6µs ± 1% ~ (p=0.127 n=10+10) Decoder_DecodeAll/fireworks.jpeg.zst-8 13.0µs ± 0% 12.8µs ± 1% -1.31% (p=0.000 n=9+10) Decoder_DecodeAll/urls.10K.zst-8 1.12ms ± 0% 1.12ms ± 1% ~ (p=0.043 n=10+10) Decoder_DecodeAll/html.zst-8 103µs ± 0% 103µs ± 0% ~ (p=0.043 n=10+10) Decoder_DecodeAll/comp-data.bin.zst-8 10.0µs ± 0% 10.0µs ± 1% ~ (p=1.000 n=9+10) --- zstd/blockdec.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/zstd/blockdec.go b/zstd/blockdec.go index 6b9929ddf5..5af8056cd8 100644 --- a/zstd/blockdec.go +++ b/zstd/blockdec.go @@ -192,17 +192,18 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } // Read block data. - if cap(b.dataStorage) < cSize { - if b.lowMem || cSize > maxCompressedBlockSize { - b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) - } else { - b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) + if bb, ok := br.(*byteBuf); ok { + b.data, err = bb.readBig(cSize, nil) + } else { + if cap(b.dataStorage) < cSize { + if b.lowMem || cSize > maxCompressedBlockSize { + b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) + } else { + b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) + } } + b.data, err = br.readBig(cSize, b.dataStorage) } - if cap(b.dst) <= maxSize { - b.dst = make([]byte, 0, maxSize+1) - } - b.data, err = br.readBig(cSize, b.dataStorage) if err != nil { if debugDecoder { println("Reading block:", err, "(", cSize, ")", len(b.data)) @@ -210,6 +211,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } return err } + if cap(b.dst) <= maxSize { + b.dst = make([]byte, 0, maxSize+1) + } return nil }