From 6016eed2894713a69e145136ef27cdb13bf3fef2 Mon Sep 17 00:00:00 2001 From: "@RubenKelevra" Date: Wed, 7 Jul 2021 03:56:44 +0200 Subject: [PATCH] buzhash: reduce target size and cutoff size Fixes https://github.com/ipfs/go-ipfs/issues/7966 --- buzhash.go | 28 +++++++++++++++++++++------- buzhash_test.go | 8 +++++--- parse.go | 5 ++++- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/buzhash.go b/buzhash.go index 83ab019..f8f121a 100644 --- a/buzhash.go +++ b/buzhash.go @@ -8,9 +8,11 @@ import ( ) const ( - buzMin = 128 << 10 - buzMax = 512 << 10 - buzMask = 1<<17 - 1 + buzMinDefault = 16 << 10 + buzMaxDefault = 64 << 10 + buzMinLegacy = 128 << 10 + buzMaxLegacy = 512 << 10 + buzMask = 1<<17 - 1 ) type Buzhash struct { @@ -18,13 +20,20 @@ type Buzhash struct { buf []byte n int - err error + err error + buzLegacy bool } -func NewBuzhash(r io.Reader) *Buzhash { +func NewBuzhash(r io.Reader, buzLegacy bool) *Buzhash { + buzMax := buzMaxDefault + if buzLegacy { + buzMax = buzMaxLegacy + } + return &Buzhash{ - r: r, - buf: pool.Get(buzMax), + r: r, + buf: pool.Get(buzMax), + buzLegacy: buzLegacy, } } @@ -37,6 +46,11 @@ func (b *Buzhash) NextBytes() ([]byte, error) { return nil, b.err } + buzMin := buzMinDefault + if b.buzLegacy { + buzMin = buzMinLegacy + } + n, err := io.ReadFull(b.r, b.buf[b.n:]) if err != nil { if err == io.ErrUnexpectedEOF || err == io.EOF { diff --git a/buzhash_test.go b/buzhash_test.go index 05ad7c3..abb78c4 100644 --- a/buzhash_test.go +++ b/buzhash_test.go @@ -17,10 +17,12 @@ func testBuzhashChunking(t *testing.T, buf []byte) (chunkCount int) { t.Fatal(err) } - r := NewBuzhash(bytes.NewReader(buf)) + r := NewBuzhash(bytes.NewReader(buf), false) var chunks [][]byte + buzMin := buzMinDefault + for { chunk, err := r.NextBytes() if err != nil { @@ -62,14 +64,14 @@ func TestBuzhashChunking(t *testing.T) { func TestBuzhashChunkReuse(t *testing.T) { newBuzhash := func(r io.Reader) Splitter { - return NewBuzhash(r) + return NewBuzhash(r, false) } testReuse(t, newBuzhash) } func BenchmarkBuzhash2(b *testing.B) { benchmarkChunker(b, func(r io.Reader) Splitter { - return NewBuzhash(r) + return NewBuzhash(r, false) }) } diff --git a/parse.go b/parse.go index dee8304..32a9e81 100644 --- a/parse.go +++ b/parse.go @@ -48,7 +48,10 @@ func FromString(r io.Reader, chunker string) (Splitter, error) { return parseRabinString(r, chunker) case chunker == "buzhash": - return NewBuzhash(r), nil + return NewBuzhash(r, false), nil + + case chunker == "buzhash-legacy": + return NewBuzhash(r, true), nil default: return nil, fmt.Errorf("unrecognized chunker option: %s", chunker)