From 4257e9c7d09f42cf99610110c61bc1fb384f187c Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Jun 2022 23:41:32 +0200 Subject: [PATCH 01/37] fix: don't OOM if the header size is too big --- util/util.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/util/util.go b/util/util.go index 08048f33..320ecf89 100644 --- a/util/util.go +++ b/util/util.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "encoding/binary" + "errors" "fmt" "io" @@ -11,6 +12,13 @@ import ( mh "github.com/multiformats/go-multihash" ) +// MaxAllowedHeaderSize hint about how big the header red are allowed to be. +// This value is a hint to avoid OOMs, a parser that cannot OOM because it is +// streaming for example, isn't forced to follow that value. +// Deprecated: You should use v2#NewReader instead since it allows for options +// to be passed in. +var MaxAllowedHeaderSize uint = 1024 + var cidv0Pref = []byte{0x12, 0x20} type BytesReader interface { @@ -112,6 +120,10 @@ func LdRead(r *bufio.Reader) ([]byte, error) { return nil, err } + if l > uint64(MaxAllowedHeaderSize) { // Don't OOM + return nil, errors.New("malformed car; header is bigger than util.MaxAllowedHeaderSize") + } + buf := make([]byte, l) if _, err := io.ReadFull(r, buf); err != nil { return nil, err From 6c256c2d1d8cafa6895714f596aed3faa8ad7b35 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Jun 2022 23:42:38 +0200 Subject: [PATCH 02/37] fix: do bound check while checking for CIDv0 --- util/util.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/util/util.go b/util/util.go index 320ecf89..fbd6a86b 100644 --- a/util/util.go +++ b/util/util.go @@ -28,9 +28,13 @@ type BytesReader interface { // TODO: this belongs in the go-cid package func ReadCid(buf []byte) (cid.Cid, int, error) { - if bytes.Equal(buf[:2], cidv0Pref) { - c, err := cid.Cast(buf[:34]) - return c, 34, err + if len(buf) >= 2 && bytes.Equal(buf[:2], cidv0Pref) { + i := 34 + if len(buf) < i { + i = len(buf) + } + c, err := cid.Cast(buf[:i]) + return c, i, err } br := bytes.NewReader(buf) From 31439684f7b1613ee7498a5b9a71d73720d3abf8 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Jun 2022 23:43:10 +0200 Subject: [PATCH 03/37] test: add fuzzing of NewCarReader --- car_test.go | 6 ++-- fuzz_test.go | 35 +++++++++++++++++++ ...133bae39a14164874ed8abdee1f6a6795311a0e546 | 2 ++ ...30cc13b5570849c89b3dbf5bc0152abc66c9642f3e | 2 ++ 4 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 fuzz_test.go create mode 100644 testdata/fuzz/FuzzCarReader/21a90a70853c333c6b9ddc133bae39a14164874ed8abdee1f6a6795311a0e546 create mode 100644 testdata/fuzz/FuzzCarReader/5857e57e4072c6b0d8684030cc13b5570849c89b3dbf5bc0152abc66c9642f3e diff --git a/car_test.go b/car_test.go index 9ae30909..3c6340be 100644 --- a/car_test.go +++ b/car_test.go @@ -75,9 +75,11 @@ func TestRoundtrip(t *testing.T) { } } +// fixture is a clean single-block, single-root CAR +const fixtureStr = "3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5" + func TestEOFHandling(t *testing.T) { - // fixture is a clean single-block, single-root CAR - fixture, err := hex.DecodeString("3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5") + fixture, err := hex.DecodeString(fixtureStr) if err != nil { t.Fatal(err) } diff --git a/fuzz_test.go b/fuzz_test.go new file mode 100644 index 00000000..8ac04bbd --- /dev/null +++ b/fuzz_test.go @@ -0,0 +1,35 @@ +//go:build go1.18 +// +build go1.18 + +package car_test + +import ( + "bytes" + "encoding/hex" + "io" + "testing" + + car "github.com/ipld/go-car" +) + +func FuzzCarReader(f *testing.F) { + fixture, err := hex.DecodeString(fixtureStr) + if err != nil { + f.Fatal(err) + } + f.Add(fixture) + + f.Fuzz(func(t *testing.T, data []byte) { + r, err := car.NewCarReader(bytes.NewReader(data)) + if err != nil { + return + } + + for { + _, err = r.Next() + if err == io.EOF { + return + } + } + }) +} diff --git a/testdata/fuzz/FuzzCarReader/21a90a70853c333c6b9ddc133bae39a14164874ed8abdee1f6a6795311a0e546 b/testdata/fuzz/FuzzCarReader/21a90a70853c333c6b9ddc133bae39a14164874ed8abdee1f6a6795311a0e546 new file mode 100644 index 00000000..a7ab1d51 --- /dev/null +++ b/testdata/fuzz/FuzzCarReader/21a90a70853c333c6b9ddc133bae39a14164874ed8abdee1f6a6795311a0e546 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("\xe0\xe0\xe0\xe0\xa7\x06\folLʔ<#oK\x19g#H\x96\b\xed\xb4*\x8b\x8f\xa8\vgversion\x19") diff --git a/testdata/fuzz/FuzzCarReader/5857e57e4072c6b0d8684030cc13b5570849c89b3dbf5bc0152abc66c9642f3e b/testdata/fuzz/FuzzCarReader/5857e57e4072c6b0d8684030cc13b5570849c89b3dbf5bc0152abc66c9642f3e new file mode 100644 index 00000000..3e680cf6 --- /dev/null +++ b/testdata/fuzz/FuzzCarReader/5857e57e4072c6b0d8684030cc13b5570849c89b3dbf5bc0152abc66c9642f3e @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte(":\xa2eroots\x81\xd80X%\x00\x0100 00000000000000000000000000000000gversion\x01\x010") From c1331057b48b536b5949c5fb8fd9ed09de77cc15 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 01:04:46 +0200 Subject: [PATCH 04/37] fix: v2 don't OOM if the header size is too big --- v2/internal/carv1/util/util.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/v2/internal/carv1/util/util.go b/v2/internal/carv1/util/util.go index 6b949561..dd543ac5 100644 --- a/v2/internal/carv1/util/util.go +++ b/v2/internal/carv1/util/util.go @@ -1,6 +1,7 @@ package util import ( + "errors" "io" internalio "github.com/ipld/go-car/v2/internal/io" @@ -73,6 +74,11 @@ func LdRead(r io.Reader, zeroLenAsEOF bool) ([]byte, error) { return nil, io.EOF } + const maxAllowedHeaderSize = 1024 * 1024 + if l > maxAllowedHeaderSize { // Don't OOM + return nil, errors.New("invalid input, too big header") + } + buf := make([]byte, l) if _, err := io.ReadFull(r, buf); err != nil { return nil, err From e36135f6c29e3450efe98b429a129207d15ba6e1 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 01:05:04 +0200 Subject: [PATCH 05/37] test: v2 add fuzzing to BlockReader --- v2/fuzz_test.go | 58 +++++++++++++++++++ ...1ba9f5138a38c882a7fa06456595998e740a9f5a14 | 2 + 2 files changed, 60 insertions(+) create mode 100644 v2/fuzz_test.go create mode 100644 v2/testdata/fuzz/FuzzBlockReader/c3c7eedeb4968a5b3131371ba9f5138a38c882a7fa06456595998e740a9f5a14 diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go new file mode 100644 index 00000000..82155ae9 --- /dev/null +++ b/v2/fuzz_test.go @@ -0,0 +1,58 @@ +//go:build go1.18 +// +build go1.18 + +package car_test + +import ( + "bytes" + "encoding/hex" + "io" + "os" + "path/filepath" + "testing" + + car "github.com/ipld/go-car/v2" +) + +// v1FixtureStr is a clean carv1 single-block, single-root CAR +const v1FixtureStr = "3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5" + +func FuzzBlockReader(f *testing.F) { + fixture, err := hex.DecodeString(v1FixtureStr) + if err != nil { + f.Fatal(err) + } + f.Add(fixture) + files, err := filepath.Glob("testdata/*.car") + if err != nil { + f.Fatal(err) + } + for _, fname := range files { + func() { + file, err := os.Open(fname) + if err != nil { + f.Fatal(err) + } + defer file.Close() + data, err := io.ReadAll(file) + if err != nil { + f.Fatal(err) + } + f.Add(data) + }() + } + + f.Fuzz(func(t *testing.T, data []byte) { + r, err := car.NewBlockReader(bytes.NewReader(data)) + if err != nil { + return + } + + for { + _, err = r.Next() + if err == io.EOF { + return + } + } + }) +} diff --git a/v2/testdata/fuzz/FuzzBlockReader/c3c7eedeb4968a5b3131371ba9f5138a38c882a7fa06456595998e740a9f5a14 b/v2/testdata/fuzz/FuzzBlockReader/c3c7eedeb4968a5b3131371ba9f5138a38c882a7fa06456595998e740a9f5a14 new file mode 100644 index 00000000..ae9262d4 --- /dev/null +++ b/v2/testdata/fuzz/FuzzBlockReader/c3c7eedeb4968a5b3131371ba9f5138a38c882a7fa06456595998e740a9f5a14 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("\xff\x80\xaa\x95\xa6sion\x01,\x01q\x12 \x15\x1f\xe9\xe7 Date: Fri, 17 Jun 2022 01:50:16 +0200 Subject: [PATCH 06/37] fix: v2 don't accept overflowing offsets while reading v2 headers --- v2/car.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/v2/car.go b/v2/car.go index f2885d9d..19473136 100644 --- a/v2/car.go +++ b/v2/car.go @@ -2,6 +2,7 @@ package car import ( "encoding/binary" + "errors" "io" ) @@ -166,8 +167,16 @@ func (h *Header) ReadFrom(r io.Reader) (int64, error) { if err != nil { return n, err } - h.DataOffset = binary.LittleEndian.Uint64(buf[:8]) - h.DataSize = binary.LittleEndian.Uint64(buf[8:16]) - h.IndexOffset = binary.LittleEndian.Uint64(buf[16:]) + dataOffset := binary.LittleEndian.Uint64(buf[:8]) + dataSize := binary.LittleEndian.Uint64(buf[8:16]) + indexOffset := binary.LittleEndian.Uint64(buf[16:]) + if int64(dataOffset) < 0 || + int64(dataSize) < 0 || + int64(indexOffset) < 0 { + return n, errors.New("malformed car, overflowing offsets") + } + h.DataOffset = dataOffset + h.DataSize = dataSize + h.IndexOffset = indexOffset return n, nil } From f5b91b9feaa5046effed05085f005cc2e5c1b749 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 01:23:10 +0200 Subject: [PATCH 07/37] test: v2 add fuzzing to Reader --- v2/fuzz_test.go | 25 ++++++++++++++++++- ...9ac8fb6717ecba6e7e591a1ab111514f30e4b3594e | 2 ++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 v2/testdata/fuzz/FuzzReader/e1d7f87ee37f48386642fa9ac8fb6717ecba6e7e591a1ab111514f30e4b3594e diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go index 82155ae9..a11dafce 100644 --- a/v2/fuzz_test.go +++ b/v2/fuzz_test.go @@ -12,12 +12,13 @@ import ( "testing" car "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/index" ) // v1FixtureStr is a clean carv1 single-block, single-root CAR const v1FixtureStr = "3aa265726f6f747381d82a58250001711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e012c01711220151fe9e73c6267a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80ba165646f646779f5" -func FuzzBlockReader(f *testing.F) { +func seedWithCarFiles(f *testing.F) { fixture, err := hex.DecodeString(v1FixtureStr) if err != nil { f.Fatal(err) @@ -41,6 +42,10 @@ func FuzzBlockReader(f *testing.F) { f.Add(data) }() } +} + +func FuzzBlockReader(f *testing.F) { + seedWithCarFiles(f) f.Fuzz(func(t *testing.T, data []byte) { r, err := car.NewBlockReader(bytes.NewReader(data)) @@ -56,3 +61,21 @@ func FuzzBlockReader(f *testing.F) { } }) } + +func FuzzReader(f *testing.F) { + seedWithCarFiles(f) + + f.Fuzz(func(t *testing.T, data []byte) { + subject, err := car.NewReader(bytes.NewReader(data)) + if err != nil { + return + } + + subject.Roots() + ir := subject.IndexReader() + if ir != nil { + index.ReadFrom(ir) + } + car.GenerateIndex(subject.DataReader()) + }) +} diff --git a/v2/testdata/fuzz/FuzzReader/e1d7f87ee37f48386642fa9ac8fb6717ecba6e7e591a1ab111514f30e4b3594e b/v2/testdata/fuzz/FuzzReader/e1d7f87ee37f48386642fa9ac8fb6717ecba6e7e591a1ab111514f30e4b3594e new file mode 100644 index 00000000..36168e1c --- /dev/null +++ b/v2/testdata/fuzz/FuzzReader/e1d7f87ee37f48386642fa9ac8fb6717ecba6e7e591a1ab111514f30e4b3594e @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0\xa1gversion\x0200000000000000000000000\xb70000000\x8100000000") From 6dc2ea12011575212f2e9188d98038919b97a9de Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 01:56:12 +0200 Subject: [PATCH 08/37] fix: v2 don't allocate indexes too big --- v2/index/indexsorted.go | 5 +++++ ...1356ad2fe2217292374cd93b65a482fec3a43e6021fe87f5607bd8857 | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 v2/testdata/fuzz/FuzzReader/fe93cec1356ad2fe2217292374cd93b65a482fec3a43e6021fe87f5607bd8857 diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 2c05a922..8eb0651e 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -3,6 +3,7 @@ package index import ( "bytes" "encoding/binary" + "errors" "fmt" "io" "sort" @@ -67,6 +68,10 @@ func (s *singleWidthIndex) Unmarshal(r io.Reader) error { if err := binary.Read(r, binary.LittleEndian, &s.len); err != nil { return err } + const maxSingleWidthIndexSize = 1024 * 1024 + if s.len > maxSingleWidthIndexSize { + return errors.New("single width index is too big") + } s.index = make([]byte, s.len) s.len /= uint64(s.width) _, err := io.ReadFull(r, s.index) diff --git a/v2/testdata/fuzz/FuzzReader/fe93cec1356ad2fe2217292374cd93b65a482fec3a43e6021fe87f5607bd8857 b/v2/testdata/fuzz/FuzzReader/fe93cec1356ad2fe2217292374cd93b65a482fec3a43e6021fe87f5607bd8857 new file mode 100644 index 00000000..1552def1 --- /dev/null +++ b/v2/testdata/fuzz/FuzzReader/fe93cec1356ad2fe2217292374cd93b65a482fec3a43e6021fe87f5607bd8857 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("\n\xa1gversion\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x003\x00\x00\x00\x00\x00\x00\x00\x1c\x01\x00\x00\x00\x00\x00\x00O\x01\x00\x00\x00\x00\x00\x00:\xa2eroots\x81\xd8*X%\x00\x01p\x12 \n4Hq\xde\xe6\xc7T \x7fYl\xfc\x95\xae)Ԡ\xa2\x1ep#\x92Z]a% mԪ\xb4gversion\x010\x01U\x12 \xa9H\x90O/\x0fG\x9b\x8f\x81\x97iK0\x18K\r.\xd1\xc1\xcd*\x1e\xc0\xfb\x85ҙ\xa1\x92\xa4Ghello \x80\xffrld\nY\x01p\x12 \xcb\xce\x128i\xf8U\xaf\x03\xccIu,\x1b\x05\x83 \xb7\xb8\xac\x1c\xeb|\x8eJ\xcbg\x1d\xf6y\x81\x12/\n$\x01U\x12 \xa9H\x90O/\x0fG\x9b\x8f\x81\x97iK0\x18K\r.\xd1\xc1\xcd*\x1e\xc0\xfb\x85ҙ\xa1\x92\xa4G\x12\x05b.txt\x18\f\n\x02\b\x01U\x01p\x12 \n4Hq\xde\xe6\xc7T \x7fYl\xfc\x95\xae)Ԡ\xa2\x1ep#\x92Z]a% mԪ\xb4\x12+\n$\x01p\x12 \xcb\xce\x128i\xf8U\xaf\x03\xccIu,\x1b\x05\x83 \xb7\xb8\xac\x1c\xeb|\x8eJ\xcbg\x1d\xf6y\x81\x12\x01a\x18\x00\n\x02\b\x01\x81\b\x12\x00\x00\n4Hq\xde\xe6\xc7T \x7fYl\xfc\x95\xae)Ԡ\xa2\x1ep#\x92Z]a% \x86\x86\x86\x86\x86\x86\x86\x00\x00\x00\x00\x00\xa9H\x90O/\x0fG\x9b\x8f\x81\x97iK0\x18K\r.\xd1\xc1\xcd*\x1e\xc0\xfb\x85ҙ\xa1\x92\xa4G;\x00\x00\x00\x00\x00\x00\x00\xcb\xce\x128i\xf8U\xaf\x03\xccIu,\x1b\x05\x83 \xb7\xb8\xac\x1c\xeb|\x8eJ\xcbg\x1d\xf6y\x81l\x00\x00\x00\x00\x00\x00\x00") From 67ff54f2ae111708fbcf7089bd1eab7056343957 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 02:01:37 +0200 Subject: [PATCH 09/37] fix: v2 don't divide by zero in width indexes --- v2/index/indexsorted.go | 3 +++ ...d07c02728da32cdaa17e13646f587ea278d888a655f683d6e42f19c0316 | 2 ++ 2 files changed, 5 insertions(+) create mode 100644 v2/testdata/fuzz/FuzzReader/c640dd07c02728da32cdaa17e13646f587ea278d888a655f683d6e42f19c0316 diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 8eb0651e..9e58a7cb 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -65,6 +65,9 @@ func (s *singleWidthIndex) Unmarshal(r io.Reader) error { if err := binary.Read(r, binary.LittleEndian, &s.width); err != nil { return err } + if s.width == 0 { + return errors.New("malformed car width index cannot be 0") + } if err := binary.Read(r, binary.LittleEndian, &s.len); err != nil { return err } diff --git a/v2/testdata/fuzz/FuzzReader/c640dd07c02728da32cdaa17e13646f587ea278d888a655f683d6e42f19c0316 b/v2/testdata/fuzz/FuzzReader/c640dd07c02728da32cdaa17e13646f587ea278d888a655f683d6e42f19c0316 new file mode 100644 index 00000000..5af80cf1 --- /dev/null +++ b/v2/testdata/fuzz/FuzzReader/c640dd07c02728da32cdaa17e13646f587ea278d888a655f683d6e42f19c0316 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("0\xa1gversion\x0200000000000000000000000000000000O\x01\x00\x00\x00\x00\x00\x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\x81\b0000000000000000\x00\x00\x00\x0000\x00\x00\x00\x00\x00\x00") From 4e24d908fd7e5b8582058550abd674a84fcb79fa Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 03:27:53 +0200 Subject: [PATCH 10/37] test: v2 add fuzzing of the index --- v2/fuzz_test.go | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go index a11dafce..2b74fb6b 100644 --- a/v2/fuzz_test.go +++ b/v2/fuzz_test.go @@ -79,3 +79,36 @@ func FuzzReader(f *testing.F) { car.GenerateIndex(subject.DataReader()) }) } + +func FuzzIndex(f *testing.F) { + files, err := filepath.Glob("testdata/*.car") + if err != nil { + f.Fatal(err) + } + for _, fname := range files { + func() { + file, err := os.Open(fname) + if err != nil { + f.Fatal(err) + } + defer file.Close() + subject, err := car.NewReader(file) + if err != nil { + return + } + index := subject.IndexReader() + if index == nil { + return + } + data, err := io.ReadAll(index) + if err != nil { + f.Fatal(err) + } + f.Add(data) + }() + } + + f.Fuzz(func(t *testing.T, data []byte) { + index.ReadFrom(bytes.NewReader(data)) + }) +} From 2eea2889659dc4cc432795f2ef1b4b6d8c729ab8 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 03:36:50 +0200 Subject: [PATCH 11/37] ci: add fuzzing on CI --- .github/workflows/go-fuzz.yml | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/go-fuzz.yml diff --git a/.github/workflows/go-fuzz.yml b/.github/workflows/go-fuzz.yml new file mode 100644 index 00000000..4b51f69c --- /dev/null +++ b/.github/workflows/go-fuzz.yml @@ -0,0 +1,50 @@ +on: [push, pull_request] +name: Go Fuzz + +jobs: + v1: + strategy: + fail-fast: true + matrix: + target: [ "CarReader" ] + runs-on: ubuntu-latest + name: Fuzz V1 ${{ matrix.target }} + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: actions/setup-go@v2 + with: + go-version: 1.18.x + - name: Go information + run: | + go version + go env + - name: Run Fuzzing for 1m + uses: protocol/multiple-go-modules@v1.2 + with: + run: go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . + v2: + strategy: + fail-fast: true + matrix: + target: [ "BlockReader", "Reader", "Index" ] + runs-on: ubuntu-latest + name: Fuzz V2 ${{ matrix.target }} + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - uses: actions/setup-go@v2 + with: + go-version: 1.18.x + - name: Go information + run: | + go version + go env + - name: Run Fuzzing for 1m + uses: protocol/multiple-go-modules@v1.2 + with: + run: | + cd v2 + go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . From f8735e6e9071076b3d0811ef506edb666b138e41 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 17 Jun 2022 19:57:56 +0200 Subject: [PATCH 12/37] feat: Refactor indexes to put storage considerations on consumers There is no way I can make a safe implementation of the parser by slurping thing into memory, indexes people use are just too big. So I made a new API which force consumers to manage that. They can choose to use a bytes.Reader, *os.File, mmaped thing, ... --- v2/blockstore/insertionindex.go | 24 ++++ v2/blockstore/readonly.go | 13 ++- v2/blockstore/readwrite.go | 23 ++-- v2/blockstore/readwrite_test.go | 23 ++-- v2/fuzz_test.go | 10 +- v2/index/index.go | 50 +++++--- v2/index/indexsorted.go | 165 ++++++++++++++++++++++----- v2/index/indexsorted_test.go | 3 +- v2/index/mhindexsorted.go | 59 +++++++++- v2/index/mhindexsorted_test.go | 2 +- v2/index/testutil/equal_index.go | 65 +++++++++++ v2/index_gen_test.go | 7 +- v2/internal/errsort/search.go | 54 +++++++++ v2/internal/io/fullReaderAt.go | 20 ++++ v2/internal/io/offset_read_seeker.go | 124 ++++++++++++++++---- v2/reader.go | 4 +- v2/reader_test.go | 11 +- v2/writer_test.go | 3 +- 18 files changed, 549 insertions(+), 111 deletions(-) create mode 100644 v2/index/testutil/equal_index.go create mode 100644 v2/internal/errsort/search.go create mode 100644 v2/internal/io/fullReaderAt.go diff --git a/v2/blockstore/insertionindex.go b/v2/blockstore/insertionindex.go index e8575ee1..95971aa2 100644 --- a/v2/blockstore/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -9,6 +9,7 @@ import ( "github.com/ipfs/go-cid" "github.com/ipld/go-car/v2/index" + internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" "github.com/petar/GoLLRB/llrb" @@ -121,6 +122,20 @@ func (ii *insertionIndex) Marshal(w io.Writer) (uint64, error) { return l, err } +func (ii *insertionIndex) ForEach(f func(multihash.Multihash, uint64) error) error { + var errr error + ii.items.AscendGreaterOrEqual(ii.items.Min(), func(i llrb.Item) bool { + r := i.(recordDigest).Record + err := f(r.Cid.Hash(), r.Offset) + if err != nil { + errr = err + return false + } + return true + }) + return errr +} + func (ii *insertionIndex) Unmarshal(r io.Reader) error { var length int64 if err := binary.Read(r, binary.LittleEndian, &length); err != nil { @@ -137,6 +152,15 @@ func (ii *insertionIndex) Unmarshal(r io.Reader) error { return nil } +func (ii *insertionIndex) UnmarshalLazyRead(r io.ReaderAt) (int64, error) { + rdr := internalio.NewOffsetReadSeeker(r, 0) + err := ii.Unmarshal(rdr) + if err != nil { + return 0, err + } + return rdr.Seek(0, io.SeekCurrent) +} + func (ii *insertionIndex) Codec() multicodec.Code { return insertionIndexCodec } diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index f0a15e78..141088b7 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -182,8 +182,11 @@ func OpenReadOnly(path string, opts ...carv2.Option) (*ReadOnly, error) { } func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) { - bcid, data, err := util.ReadNode(internalio.NewOffsetReadSeeker(b.backing, idx), b.opts.ZeroLengthSectionAsEOF) - return bcid, data, err + r, err := internalio.NewOffsetReadSeekerWithError(b.backing, idx) + if err != nil { + return cid.Cid{}, nil, err + } + return util.ReadNode(r, b.opts.ZeroLengthSectionAsEOF) } // DeleteBlock is unsupported and always errors. @@ -441,7 +444,11 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { } } - thisItemForNxt := rdr.Offset() + thisItemForNxt, err := rdr.Seek(0, io.SeekCurrent) + if err != nil { + maybeReportError(ctx, err) + return + } _, c, err := cid.CidFromReader(rdr) if err != nil { maybeReportError(ctx, err) diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 090633c0..de43999f 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -86,12 +86,12 @@ func AllowDuplicatePuts(allow bool) carv2.Option { // successfully. On resumption the roots argument and WithDataPadding option must match the // previous instantiation of ReadWrite blockstore that created the file. More explicitly, the file // resuming from must: -// 1. start with a complete CARv2 car.Pragma. -// 2. contain a complete CARv1 data header with root CIDs matching the CIDs passed to the -// constructor, starting at offset optionally padded by WithDataPadding, followed by zero or -// more complete data sections. If any corrupt data sections are present the resumption will fail. -// Note, if set previously, the blockstore must use the same WithDataPadding option as before, -// since this option is used to locate the CARv1 data payload. +// 1. start with a complete CARv2 car.Pragma. +// 2. contain a complete CARv1 data header with root CIDs matching the CIDs passed to the +// constructor, starting at offset optionally padded by WithDataPadding, followed by zero or +// more complete data sections. If any corrupt data sections are present the resumption will fail. +// Note, if set previously, the blockstore must use the same WithDataPadding option as before, +// since this option is used to locate the CARv1 data payload. // // Note, resumption should be used with WithCidDeduplication, so that blocks that are successfully // written into the file are not re-written. Unless, the user explicitly wants duplicate blocks. @@ -139,7 +139,10 @@ func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.Option) (*ReadWri offset = 0 } rwbs.dataWriter = internalio.NewOffsetWriter(rwbs.f, offset) - v1r := internalio.NewOffsetReadSeeker(rwbs.f, offset) + v1r, err := internalio.NewOffsetReadSeekerWithError(rwbs.f, offset) + if err != nil { + return nil, err + } rwbs.ronly.backing = v1r rwbs.ronly.idx = rwbs.idx rwbs.ronly.carv2Closer = rwbs.f @@ -190,7 +193,11 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid) error { // Check if file was finalized by trying to read the CARv2 header. // We check because if finalized the CARv1 reader behaviour needs to be adjusted since // EOF will not signify end of CARv1 payload. i.e. index is most likely present. - _, err = headerInFile.ReadFrom(internalio.NewOffsetReadSeeker(b.f, carv2.PragmaSize)) + r, err := internalio.NewOffsetReadSeekerWithError(b.f, carv2.PragmaSize) + if err != nil { + return err + } + _, err = headerInFile.ReadFrom(r) // If reading CARv2 header succeeded, and CARv1 offset in header is not zero then the file is // most-likely finalized. Check padding and truncate the file to remove index. diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 86ab06ea..bc78083a 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -842,20 +842,15 @@ func TestOpenReadWrite_WritesIdentityCIDsWhenOptionIsEnabled(t *testing.T) { expectedOffset := len(object) + 1 // Assert index is iterable and has exactly one record with expected multihash and offset. - switch idx := gotIdx.(type) { - case index.IterableIndex: - var i int - err := idx.ForEach(func(mh multihash.Multihash, offset uint64) error { - i++ - require.Equal(t, idmh, mh) - require.Equal(t, uint64(expectedOffset), offset) - return nil - }) - require.NoError(t, err) - require.Equal(t, 1, i) - default: - require.Failf(t, "unexpected index type", "wanted %v but got %v", multicodec.CarMultihashIndexSorted, idx.Codec()) - } + var count int + err = gotIdx.ForEach(func(mh multihash.Multihash, offset uint64) error { + count++ + require.Equal(t, idmh, mh) + require.Equal(t, uint64(expectedOffset), offset) + return nil + }) + require.NoError(t, err) + require.Equal(t, 1, count) } func TestOpenReadWrite_ErrorsWhenWritingTooLargeOfACid(t *testing.T) { diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go index 2b74fb6b..8187457b 100644 --- a/v2/fuzz_test.go +++ b/v2/fuzz_test.go @@ -96,11 +96,15 @@ func FuzzIndex(f *testing.F) { if err != nil { return } - index := subject.IndexReader() - if index == nil { + indexRdr := subject.IndexReader() + if indexRdr == nil { return } - data, err := io.ReadAll(index) + _, n, err := index.ReadFromWithSize(indexRdr) + if err != nil { + return + } + data, err := io.ReadAll(io.NewSectionReader(indexRdr, 0, n)) if err != nil { f.Fatal(err) } diff --git a/v2/index/index.go b/v2/index/index.go index 10195b43..204bc122 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -45,8 +45,14 @@ type ( // Marshal encodes the index in serial form. Marshal(w io.Writer) (uint64, error) // Unmarshal decodes the index from its serial form. + // Deprecated: This function is slurpy and will copy everything into memory. Unmarshal(r io.Reader) error + // UnmarshalLazyRead is the safe alternative to to Unmarshal. + // Instead of slurping it will keep a reference to the the io.ReaderAt passed in + // and ask for data as needed. + UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) + // Load inserts a number of records into the index. // Note that Index will load all given records. Any filtering of the records such as // exclusion of CIDs with multihash.IDENTITY code must occur prior to calling this function. @@ -66,18 +72,6 @@ type ( // meaning that no callbacks happen, // ErrNotFound is returned. GetAll(cid.Cid, func(uint64) bool) error - } - - // IterableIndex extends Index in cases where the Index is able to - // provide an iterator for getting the list of all multihashes in the - // index. - // - // Note that it is possible for an index to contain multiple offsets for - // a given multihash. - // - // See: IterableIndex.ForEach, Index.GetAll. - IterableIndex interface { - Index // ForEach takes a callback function that will be called // on each entry in the index. The arguments to the callback are @@ -93,6 +87,12 @@ type ( // The order of calls to the given function is deterministic, but entirely index-specific. ForEach(func(multihash.Multihash, uint64) error) error } + + // IterableIndex is an index which support iterating over it's elements + // Deprecated: IterableIndex has been moved into Index. Just use Index now. + IterableIndex interface { + Index + } ) // GetFirst is a wrapper over Index.GetAll, returning the offset for the first @@ -136,18 +136,30 @@ func WriteTo(idx Index, w io.Writer) (uint64, error) { // ReadFrom reads index from r. // The reader decodes the index by reading the first byte to interpret the encoding. // Returns error if the encoding is not known. -func ReadFrom(r io.Reader) (Index, error) { - code, err := varint.ReadUvarint(internalio.ToByteReader(r)) +func ReadFrom(r io.ReaderAt) (Index, error) { + idx, _, err := ReadFromWithSize(r) + return idx, err +} + +// ReadFromWithSize is just like ReadFrom but return the size of the Index. +// The size is only valid when err != nil. +func ReadFromWithSize(r io.ReaderAt) (Index, int64, error) { + code, err := varint.ReadUvarint(internalio.NewOffsetReadSeeker(r, 0)) if err != nil { - return nil, err + return nil, 0, err } codec := multicodec.Code(code) idx, err := New(codec) if err != nil { - return nil, err + return nil, 0, err + } + rdr, err := internalio.NewOffsetReadSeekerWithError(r, int64(varint.UvarintSize(code))) + if err != nil { + return nil, 0, err } - if err := idx.Unmarshal(r); err != nil { - return nil, err + n, err := idx.UnmarshalLazyRead(rdr) + if err != nil { + return nil, 0, err } - return idx, nil + return idx, n, nil } diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 9e58a7cb..16367ed5 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -8,12 +8,19 @@ import ( "io" "sort" + "github.com/ipld/go-car/v2/internal/errsort" + internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" ) +type sizedReaderAt interface { + io.ReaderAt + Size() int64 +} + var _ Index = (*multiWidthIndex)(nil) type ( @@ -25,7 +32,7 @@ type ( singleWidthIndex struct { width uint32 len uint64 // in struct, len is #items. when marshaled, it's saved as #bytes. - index []byte + index sizedReaderAt } multiWidthIndex map[uint32]singleWidthIndex ) @@ -53,36 +60,72 @@ func (s *singleWidthIndex) Marshal(w io.Writer) (uint64, error) { return 0, err } l += 4 - if err := binary.Write(w, binary.LittleEndian, int64(len(s.index))); err != nil { + sz := s.index.Size() + if err := binary.Write(w, binary.LittleEndian, sz); err != nil { return l, err } l += 8 - n, err := w.Write(s.index) + n, err := io.Copy(w, io.NewSectionReader(s.index, 0, sz)) return l + uint64(n), err } +// Unmarshal decodes the index from its serial form. +// Deprecated: This function is slurpy and will copy the index in memory. func (s *singleWidthIndex) Unmarshal(r io.Reader) error { - if err := binary.Read(r, binary.LittleEndian, &s.width); err != nil { + var width uint32 + if err := binary.Read(r, binary.LittleEndian, &width); err != nil { return err } - if s.width == 0 { - return errors.New("malformed car width index cannot be 0") + var dataLen uint64 + if err := binary.Read(r, binary.LittleEndian, &dataLen); err != nil { + return err } - if err := binary.Read(r, binary.LittleEndian, &s.len); err != nil { + + if err := s.checkUnmarshalLengths(width, dataLen, 0); err != nil { + return err + } + + buf := make([]byte, dataLen) + if _, err := io.ReadFull(r, buf); err != nil { return err } - const maxSingleWidthIndexSize = 1024 * 1024 - if s.len > maxSingleWidthIndexSize { - return errors.New("single width index is too big") + s.index = bytes.NewReader(buf) + return nil +} + +func (s *singleWidthIndex) UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) { + var b [12]byte + _, err = internalio.FullReadAt(r, b[:], 0) + if err != nil { + return 0, err + } + + width := binary.LittleEndian.Uint32(b[:4]) + dataLen := binary.LittleEndian.Uint64(b[4:12]) + if err := s.checkUnmarshalLengths(width, dataLen, uint64(len(b))); err != nil { + return 0, err } - s.index = make([]byte, s.len) - s.len /= uint64(s.width) - _, err := io.ReadFull(r, s.index) - return err + s.index = io.NewSectionReader(r, int64(len(b)), int64(dataLen)) + return int64(dataLen) + int64(len(b)), nil } -func (s *singleWidthIndex) Less(i int, digest []byte) bool { - return bytes.Compare(digest[:], s.index[i*int(s.width):((i+1)*int(s.width)-8)]) <= 0 +func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra uint64) error { + if width <= 8 { + return errors.New("malformed index; width must be bigger than 8") + } + if int32(width) < 0 { + return errors.New("index too big; singleWidthIndex width is overflowing int32") + } + oldDataLen, dataLen := dataLen, dataLen+extra + if oldDataLen > dataLen { + return errors.New("index too big; singleWidthIndex len is overflowing") + } + if int64(dataLen) < 0 { + return errors.New("index too big; singleWidthIndex len is overflowing int64") + } + s.width = width + s.len = dataLen / uint64(width) + return nil } func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { @@ -94,18 +137,35 @@ func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { } func (s *singleWidthIndex) getAll(d []byte, fn func(uint64) bool) error { - idx := sort.Search(int(s.len), func(i int) bool { - return s.Less(i, d) + digestLen := int64(s.width) - 8 + b := make([]byte, digestLen) + idxI, err := errsort.Search(int(s.len), func(i int) (bool, error) { + digestStart := int64(i) * int64(s.width) + _, err := internalio.FullReadAt(s.index, b, digestStart) + if err != nil { + return false, err + } + return bytes.Compare(d, b) <= 0, nil }) + if err != nil { + return err + } + idx := int64(idxI) var any bool for ; uint64(idx) < s.len; idx++ { - digestStart := idx * int(s.width) - offsetEnd := (idx + 1) * int(s.width) + digestStart := idx * int64(s.width) + offsetEnd := digestStart + int64(s.width) digestEnd := offsetEnd - 8 - if bytes.Equal(d[:], s.index[digestStart:digestEnd]) { + digestLen := digestEnd - digestStart + b := make([]byte, offsetEnd-digestStart) + _, err := internalio.FullReadAt(s.index, b, digestStart) + if err != nil { + return err + } + if bytes.Equal(d, b[:digestLen]) { any = true - offset := binary.LittleEndian.Uint64(s.index[digestEnd:offsetEnd]) + offset := binary.LittleEndian.Uint64(b[digestLen:]) if !fn(offset) { // User signalled to stop searching; therefore, break. break @@ -139,13 +199,19 @@ func (s *singleWidthIndex) Load(items []Record) error { } func (s *singleWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { - segmentCount := len(s.index) / int(s.width) - for i := 0; i < segmentCount; i++ { - digestStart := i * int(s.width) - offsetEnd := (i + 1) * int(s.width) + segmentCount := s.index.Size() / int64(s.width) + for i := int64(0); i < segmentCount; i++ { + digestStart := i * int64(s.width) + offsetEnd := digestStart + int64(s.width) digestEnd := offsetEnd - 8 - digest := s.index[digestStart:digestEnd] - offset := binary.LittleEndian.Uint64(s.index[digestEnd:offsetEnd]) + digestLen := digestEnd - digestStart + b := make([]byte, offsetEnd-digestStart) + _, err := internalio.FullReadAt(s.index, b, digestStart) + if err != nil { + return err + } + digest := b[:digestLen] + offset := binary.LittleEndian.Uint64(b[digestLen:]) if err := f(digest, offset); err != nil { return err } @@ -212,6 +278,37 @@ func (m *multiWidthIndex) Unmarshal(r io.Reader) error { return nil } +func (m *multiWidthIndex) UnmarshalLazyRead(r io.ReaderAt) (sum int64, err error) { + var b [4]byte + _, err = internalio.FullReadAt(r, b[:], 0) + if err != nil { + return 0, err + } + count := binary.LittleEndian.Uint32(b[:4]) + if int32(count) < 0 { + return 0, errors.New("index too big; multiWidthIndex count is overflowing int32") + } + sum += int64(len(b)) + for ; count > 0; count-- { + s := singleWidthIndex{} + or, err := internalio.NewOffsetReadSeekerWithError(r, sum) + if err != nil { + return 0, err + } + n, err := s.UnmarshalLazyRead(or) + if err != nil { + return 0, err + } + oldSum := sum + sum += n + if sum < oldSum { + return 0, errors.New("index too big; multiWidthIndex len is overflowing int64") + } + (*m)[s.width] = s + } + return sum, nil +} + func (m *multiWidthIndex) Load(items []Record) error { // Split cids on their digest length idxs := make(map[int][]digestRecord) @@ -241,13 +338,23 @@ func (m *multiWidthIndex) Load(items []Record) error { s := singleWidthIndex{ width: uint32(rcrdWdth), len: uint64(len(lst)), - index: compact, + index: bytes.NewReader(compact), } (*m)[uint32(width)+8] = s } return nil } +func (m *multiWidthIndex) ForEach(f func(multihash.Multihash, uint64) error) error { + return m.forEachDigest(func(digest []byte, offset uint64) error { + mh, err := multihash.Cast(digest) + if err != nil { + return err + } + return f(mh, offset) + }) +} + func (m *multiWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { sizes := make([]uint32, 0, len(*m)) for k := range *m { diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index 5c1ee449..f7e038a0 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -1,6 +1,7 @@ package index import ( + "bytes" "encoding/binary" "testing" @@ -51,7 +52,7 @@ func TestSingleWidthIndex_GetAll(t *testing.T) { subject := &singleWidthIndex{ width: 9, len: uint64(l), - index: buf, + index: bytes.NewReader(buf), } var foundCount int diff --git a/v2/index/mhindexsorted.go b/v2/index/mhindexsorted.go index 55975b8e..0200f700 100644 --- a/v2/index/mhindexsorted.go +++ b/v2/index/mhindexsorted.go @@ -2,17 +2,18 @@ package index import ( "encoding/binary" + "errors" "io" "sort" "github.com/ipfs/go-cid" + internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" ) var ( - _ Index = (*MultihashIndexSorted)(nil) - _ IterableIndex = (*MultihashIndexSorted)(nil) + _ Index = (*MultihashIndexSorted)(nil) ) type ( @@ -46,6 +47,29 @@ func (m *multiWidthCodedIndex) Unmarshal(r io.Reader) error { return m.multiWidthIndex.Unmarshal(r) } +func (m *multiWidthCodedIndex) UnmarshalLazyRead(r io.ReaderAt) (int64, error) { + var b [8]byte + _, err := internalio.FullReadAt(r, b[:], 0) + if err != nil { + return 0, err + } + m.code = binary.LittleEndian.Uint64(b[:8]) + rdr, err := internalio.NewOffsetReadSeekerWithError(r, int64(len(b))) + if err != nil { + return 0, err + } + sum, err := m.multiWidthIndex.UnmarshalLazyRead(rdr) + if err != nil { + return 0, err + } + oldSum := sum + sum += int64(len(b)) + if sum < oldSum { + return 0, errors.New("index too big; multiWidthCodedIndex len is overflowing") + } + return sum, nil +} + func (m *multiWidthCodedIndex) forEach(f func(mh multihash.Multihash, offset uint64) error) error { return m.multiWidthIndex.forEachDigest(func(digest []byte, offset uint64) error { mh, err := multihash.Encode(digest, m.code) @@ -107,6 +131,37 @@ func (m *MultihashIndexSorted) Unmarshal(r io.Reader) error { return nil } +func (m *MultihashIndexSorted) UnmarshalLazyRead(r io.ReaderAt) (sum int64, err error) { + var b [4]byte + _, err = internalio.FullReadAt(r, b[:], 0) + if err != nil { + return 0, err + } + sum += int64(len(b)) + count := binary.LittleEndian.Uint32(b[:4]) + if int32(count) < 0 { + return 0, errors.New("index too big; MultihashIndexSorted count is overflowing int32") + } + for ; count > 0; count-- { + mwci := newMultiWidthCodedIndex() + or, err := internalio.NewOffsetReadSeekerWithError(r, sum) + if err != nil { + return 0, err + } + n, err := mwci.UnmarshalLazyRead(or) + if err != nil { + return 0, err + } + oldSum := sum + sum += n + if sum < oldSum { + return 0, errors.New("index too big; MultihashIndexSorted sum is overflowing int64") + } + m.put(mwci) + } + return sum, nil +} + func (m *MultihashIndexSorted) put(mwci *multiWidthCodedIndex) { (*m)[mwci.code] = mwci } diff --git a/v2/index/mhindexsorted_test.go b/v2/index/mhindexsorted_test.go index 79fc9c5f..d97d1e6f 100644 --- a/v2/index/mhindexsorted_test.go +++ b/v2/index/mhindexsorted_test.go @@ -58,7 +58,7 @@ func TestMultiWidthCodedIndex_StableIterate(t *testing.T) { err = subject.Load(records) require.NoError(t, err) - iterable := subject.(index.IterableIndex) + iterable := subject.(index.Index) mh := make([]multihash.Multihash, 0, len(records)) require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error { mh = append(mh, m) diff --git a/v2/index/testutil/equal_index.go b/v2/index/testutil/equal_index.go new file mode 100644 index 00000000..1314d25e --- /dev/null +++ b/v2/index/testutil/equal_index.go @@ -0,0 +1,65 @@ +package testutil + +import ( + "sync" + "testing" + + "github.com/ipld/go-car/v2/index" + + "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/require" +) + +// insertUint64 perform one round of insertion sort on the last element +func insertUint64(s []uint64) { + switch len(s) { + case 0, 1: + return + default: + cur := s[len(s)-1] + for j := len(s) - 1; j > 0; { + j-- + if cur >= s[j] { + s[j+1] = cur + break + } + s[j+1] = s[j] + } + } +} + +func AssertIndenticalIndexes(t *testing.T, a, b index.Index) { + var wg sync.Mutex + wg.Lock() + // key is multihash.Multihash.HexString + var aCount uint + aMap := make(map[string][]uint64) + go func() { + defer wg.Unlock() + a.ForEach(func(mh multihash.Multihash, off uint64) error { + aCount++ + str := mh.HexString() + slice, _ := aMap[str] + slice = append(slice, off) + insertUint64(slice) + aMap[str] = slice + return nil + }) + }() + + var bCount uint + bMap := make(map[string][]uint64) + a.ForEach(func(mh multihash.Multihash, off uint64) error { + bCount++ + str := mh.HexString() + slice, _ := bMap[str] + slice = append(slice, off) + insertUint64(slice) + bMap[str] = slice + return nil + }) + wg.Lock() + + require.Equal(t, aCount, bCount) + require.Equal(t, aMap, bMap) +} diff --git a/v2/index_gen_test.go b/v2/index_gen_test.go index 11f0530f..64a73adc 100644 --- a/v2/index_gen_test.go +++ b/v2/index_gen_test.go @@ -8,6 +8,7 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" @@ -103,7 +104,11 @@ func TestGenerateIndex(t *testing.T) { if tt.wantIndexer != nil { want = tt.wantIndexer(t) } - require.Equal(t, want, got) + if want == nil { + require.Nil(t, got) + } else { + testutil.AssertIndenticalIndexes(t, want, got) + } } }) t.Run("GenerateIndexFromFile_"+tt.name, func(t *testing.T) { diff --git a/v2/internal/errsort/search.go b/v2/internal/errsort/search.go new file mode 100644 index 00000000..fb886179 --- /dev/null +++ b/v2/internal/errsort/search.go @@ -0,0 +1,54 @@ +/* +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +package errsort + +// Search is like sort.Search but accepts an erroring closure. +// If it errors the search is terminated immediately +func Search(n int, f func(int) (bool, error)) (int, error) { + // Define f(-1) == false and f(n) == true. + // Invariant: f(i-1) == false, f(j) == true. + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + less, err := f(h) + if err != nil { + return 0, err + } + if !less { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i. + return i, nil +} diff --git a/v2/internal/io/fullReaderAt.go b/v2/internal/io/fullReaderAt.go new file mode 100644 index 00000000..57f26685 --- /dev/null +++ b/v2/internal/io/fullReaderAt.go @@ -0,0 +1,20 @@ +package io + +import "io" + +func FullReadAt(r io.ReaderAt, b []byte, off int64) (sum int64, err error) { + for int64(len(b)) > sum { + n, err := r.ReadAt(b[sum:], off+sum) + sum += int64(n) + if err != nil { + if err == io.EOF { + if sum < int64(len(b)) { + return sum, io.ErrUnexpectedEOF + } + return sum, nil + } + return sum, err + } + } + return sum, nil +} diff --git a/v2/internal/io/offset_read_seeker.go b/v2/internal/io/offset_read_seeker.go index 4b701351..bbdcf4c6 100644 --- a/v2/internal/io/offset_read_seeker.go +++ b/v2/internal/io/offset_read_seeker.go @@ -1,63 +1,123 @@ package io -import "io" +import ( + "errors" + "io" +) var ( - _ io.ReaderAt = (*OffsetReadSeeker)(nil) - _ io.ReadSeeker = (*OffsetReadSeeker)(nil) + _ io.ReaderAt = (*offsetReadSeeker)(nil) + _ io.ReadSeeker = (*offsetReadSeeker)(nil) ) -// OffsetReadSeeker implements Read, and ReadAt on a section +// offsetReadSeeker implements Read, and ReadAt on a section // of an underlying io.ReaderAt. -// The main difference between io.SectionReader and OffsetReadSeeker is that +// The main difference between io.SectionReader and offsetReadSeeker is that // NewOffsetReadSeeker does not require the user to know the number of readable bytes. // // It also partially implements Seek, where the implementation panics if io.SeekEnd is passed. -// This is because, OffsetReadSeeker does not know the end of the file therefore cannot seek relative +// This is because, offsetReadSeeker does not know the end of the file therefore cannot seek relative // to it. -type OffsetReadSeeker struct { +type offsetReadSeeker struct { r io.ReaderAt base int64 off int64 + b [1]byte // avoid alloc in ReadByte +} + +type ReadSeekerAt interface { + io.Reader + io.ReaderAt + io.Seeker + io.ByteReader } -// NewOffsetReadSeeker returns an OffsetReadSeeker that reads from r +// NewOffsetReadSeeker returns an ReadSeekerAt that reads from r // starting offset offset off and stops with io.EOF when r reaches its end. // The Seek function will panic if whence io.SeekEnd is passed. -func NewOffsetReadSeeker(r io.ReaderAt, off int64) *OffsetReadSeeker { - return &OffsetReadSeeker{r, off, off} +func NewOffsetReadSeeker(r io.ReaderAt, off int64) ReadSeekerAt { + nr, err := NewOffsetReadSeekerWithError(r, off) + if err != nil { + return erroringReader{err} + } + return nr +} + +func NewOffsetReadSeekerWithError(r io.ReaderAt, off int64) (ReadSeekerAt, error) { + if or, ok := r.(*offsetReadSeeker); ok { + oldBase := or.base + newBase := or.base + off + if newBase < oldBase { + return nil, errors.New("NewOffsetReadSeeker overflow int64") + } + return &offsetReadSeeker{ + r: or.r, + base: newBase, + off: newBase, + }, nil + } + return &offsetReadSeeker{ + r: r, + base: off, + off: off, + }, nil } -func (o *OffsetReadSeeker) Read(p []byte) (n int, err error) { +func (o *offsetReadSeeker) Read(p []byte) (n int, err error) { n, err = o.r.ReadAt(p, o.off) - o.off += int64(n) + oldOffset := o.off + off := oldOffset + int64(n) + if off < oldOffset { + return 0, errors.New("ReadAt offset overflow") + } + o.off = off return } -func (o *OffsetReadSeeker) ReadAt(p []byte, off int64) (n int, err error) { +func (o *offsetReadSeeker) ReadAt(p []byte, off int64) (n int, err error) { if off < 0 { return 0, io.EOF } + oldOffset := off off += o.base + if off < oldOffset { + return 0, errors.New("ReadAt offset overflow") + } return o.r.ReadAt(p, off) } -func (o *OffsetReadSeeker) ReadByte() (byte, error) { - b := []byte{0} - _, err := o.Read(b) - return b[0], err +func (o *offsetReadSeeker) ReadByte() (byte, error) { + _, err := o.Read(o.b[:]) + return o.b[0], err } -func (o *OffsetReadSeeker) Offset() int64 { +func (o *offsetReadSeeker) Offset() int64 { return o.off } -func (o *OffsetReadSeeker) Seek(offset int64, whence int) (int64, error) { +func (o *offsetReadSeeker) Seek(offset int64, whence int) (int64, error) { switch whence { case io.SeekStart: - o.off = offset + o.base + oldOffset := offset + off := offset + o.base + if off < oldOffset { + return 0, errors.New("Seek offset overflow") + } + o.off = off case io.SeekCurrent: - o.off += offset + oldOffset := o.off + if offset < 0 { + if -offset > oldOffset { + return 0, errors.New("Seek offset underflow") + } + o.off = oldOffset + offset + } else { + off := oldOffset + offset + if off < oldOffset { + return 0, errors.New("Seek offset overflow") + } + o.off = off + } case io.SeekEnd: panic("unsupported whence: SeekEnd") } @@ -65,6 +125,26 @@ func (o *OffsetReadSeeker) Seek(offset int64, whence int) (int64, error) { } // Position returns the current position of this reader relative to the initial offset. -func (o *OffsetReadSeeker) Position() int64 { +func (o *offsetReadSeeker) Position() int64 { return o.off - o.base } + +type erroringReader struct { + err error +} + +func (e erroringReader) Read(_ []byte) (int, error) { + return 0, e.err +} + +func (e erroringReader) ReadAt(_ []byte, n int64) (int, error) { + return 0, e.err +} + +func (e erroringReader) ReadByte() (byte, error) { + return 0, e.err +} + +func (e erroringReader) Seek(_ int64, _ int) (int64, error) { + return 0, e.err +} diff --git a/v2/reader.go b/v2/reader.go index 9394a736..6651b290 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -109,11 +109,11 @@ func (r *Reader) DataReader() SectionReader { // IndexReader provides an io.Reader containing the index for the data payload if the index is // present. Otherwise, returns nil. // Note, this function will always return nil if the backing payload represents a CARv1. -func (r *Reader) IndexReader() io.Reader { +func (r *Reader) IndexReader() io.ReaderAt { if r.Version == 1 || !r.Header.HasIndex() { return nil } - return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) + return io.NewSectionReader(r.r, int64(r.Header.IndexOffset), int64(r.Header.DataSize)-int64(r.Header.IndexOffset)) } // Close closes the underlying reader if it was opened by OpenReader. diff --git a/v2/reader_test.go b/v2/reader_test.go index a0c6e3cd..f653b60b 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -7,6 +7,7 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/stretchr/testify/require" ) @@ -173,7 +174,7 @@ func TestReader_WithCarV2Consistency(t *testing.T) { require.NoError(t, err) wantIndex, err := carv2.GenerateIndex(subject.DataReader()) require.NoError(t, err) - require.Equal(t, wantIndex, gotIndex) + testutil.AssertIndenticalIndexes(t, wantIndex, gotIndex) }) } } @@ -186,8 +187,8 @@ func TestOpenReader_DoesNotPanicForReadersCreatedBeforeClosure(t *testing.T) { require.NoError(t, subject.Close()) buf := make([]byte, 1) - panicTest := func(r io.Reader) { - _, err := r.Read(buf) + panicTest := func(r io.ReaderAt) { + _, err := r.ReadAt(buf, 0) require.EqualError(t, err, "mmap: closed") } @@ -203,8 +204,8 @@ func TestOpenReader_DoesNotPanicForReadersCreatedAfterClosure(t *testing.T) { iReaderAfterClosure := subject.IndexReader() buf := make([]byte, 1) - panicTest := func(r io.Reader) { - _, err := r.Read(buf) + panicTest := func(r io.ReaderAt) { + _, err := r.ReadAt(buf, 0) require.EqualError(t, err, "mmap: closed") } diff --git a/v2/writer_test.go b/v2/writer_test.go index 11b5ff12..2044e17b 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/stretchr/testify/require" @@ -56,7 +57,7 @@ func TestWrapV1(t *testing.T) { require.NoError(t, err) gotIdx, err := index.ReadFrom(subject.IndexReader()) require.NoError(t, err) - require.Equal(t, wantIdx, gotIdx) + testutil.AssertIndenticalIndexes(t, wantIdx, gotIdx) } func TestExtractV1(t *testing.T) { From 6e4e20879d01500ee9a8ca9b3a8633305d6eb43d Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 17:30:11 +1000 Subject: [PATCH 13/37] fix index comparisons --- v2/blockstore/readwrite_test.go | 3 ++- v2/index/example_test.go | 24 +++++++++++++++++++----- v2/index/index_test.go | 3 ++- v2/index/testutil/equal_index.go | 8 +++++--- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index bc78083a..0ccd53e0 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -21,6 +21,7 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/blockstore" "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" @@ -519,7 +520,7 @@ func TestBlockstoreResumption(t *testing.T) { require.NoError(t, err) wantIdx, err := carv2.GenerateIndex(v2r.DataReader()) require.NoError(t, err) - require.Equal(t, wantIdx, gotIdx) + testutil.AssertIndenticalIndexes(t, wantIdx, gotIdx) } func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) { diff --git a/v2/index/example_test.go b/v2/index/example_test.go index 3e484afb..c6f83ea2 100644 --- a/v2/index/example_test.go +++ b/v2/index/example_test.go @@ -5,10 +5,10 @@ import ( "io" "io/ioutil" "os" - "reflect" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" + "github.com/multiformats/go-multihash" ) // ExampleReadFrom unmarshalls an index from an indexed CARv2 file, and for each root CID prints the @@ -94,13 +94,27 @@ func ExampleWriteTo() { panic(err) } - // Expect indices to be equal. - if reflect.DeepEqual(idx, reReadIdx) { - fmt.Printf("Saved index file matches the index embedded in CARv2 at %v.\n", src) - } else { + // Expect indices to be equal - collect all of the multihashes and their + // offsets from the first and compare to the second + mha := make(map[string]uint64, 0) + _ = idx.ForEach(func(mh multihash.Multihash, off uint64) error { + mha[mh.HexString()] = off + return nil + }) + var count int + _ = reReadIdx.ForEach(func(mh multihash.Multihash, off uint64) error { + count++ + if expectedOffset, ok := mha[mh.HexString()]; !ok || expectedOffset != off { + panic("expected to get the same index as the CARv2 file") + } + return nil + }) + if count != len(mha) { panic("expected to get the same index as the CARv2 file") } + fmt.Printf("Saved index file matches the index embedded in CARv2 at %v.\n", src) + // Output: // Saved index file matches the index embedded in CARv2 at ../testdata/sample-wrapped-v2.car. } diff --git a/v2/index/index_test.go b/v2/index/index_test.go index 267beb03..883bd5d6 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -8,6 +8,7 @@ import ( "testing" blocks "github.com/ipfs/go-block-format" + "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/ipld/go-car/v2/internal/carv1/util" "github.com/multiformats/go-multicodec" @@ -112,7 +113,7 @@ func TestWriteTo(t *testing.T) { require.NoError(t, err) // Assert they are equal - require.Equal(t, wantIdx, gotIdx) + testutil.AssertIndenticalIndexes(t, wantIdx, gotIdx) } func TestMarshalledIndexStartsWithCodec(t *testing.T) { diff --git a/v2/index/testutil/equal_index.go b/v2/index/testutil/equal_index.go index 1314d25e..798985b9 100644 --- a/v2/index/testutil/equal_index.go +++ b/v2/index/testutil/equal_index.go @@ -4,12 +4,14 @@ import ( "sync" "testing" - "github.com/ipld/go-car/v2/index" - "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" ) +type Index interface { + ForEach(func(multihash.Multihash, uint64) error) error +} + // insertUint64 perform one round of insertion sort on the last element func insertUint64(s []uint64) { switch len(s) { @@ -28,7 +30,7 @@ func insertUint64(s []uint64) { } } -func AssertIndenticalIndexes(t *testing.T, a, b index.Index) { +func AssertIndenticalIndexes(t *testing.T, a, b Index) { var wg sync.Mutex wg.Lock() // key is multihash.Multihash.HexString From 3eabc2d8f794da6aa88b4a5020413fe5c6a6ce58 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 19:24:31 +1000 Subject: [PATCH 14/37] fix: revert to internalio.NewOffsetReadSeeker in Reader#IndexReader --- v2/reader.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2/reader.go b/v2/reader.go index 6651b290..fa94e672 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -113,7 +113,7 @@ func (r *Reader) IndexReader() io.ReaderAt { if r.Version == 1 || !r.Header.HasIndex() { return nil } - return io.NewSectionReader(r.r, int64(r.Header.IndexOffset), int64(r.Header.DataSize)-int64(r.Header.IndexOffset)) + return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) } // Close closes the underlying reader if it was opened by OpenReader. From 04a85e75b8cf7214c3ab74e834f80f956a4760b6 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 19:29:52 +1000 Subject: [PATCH 15/37] fix: staticcheck catches --- v2/index/mhindexsorted_test.go | 5 ++--- v2/index/testutil/equal_index.go | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/v2/index/mhindexsorted_test.go b/v2/index/mhindexsorted_test.go index d97d1e6f..7704d3a2 100644 --- a/v2/index/mhindexsorted_test.go +++ b/v2/index/mhindexsorted_test.go @@ -58,16 +58,15 @@ func TestMultiWidthCodedIndex_StableIterate(t *testing.T) { err = subject.Load(records) require.NoError(t, err) - iterable := subject.(index.Index) mh := make([]multihash.Multihash, 0, len(records)) - require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error { + require.NoError(t, subject.ForEach(func(m multihash.Multihash, _ uint64) error { mh = append(mh, m) return nil })) for i := 0; i < 10; i++ { candidate := make([]multihash.Multihash, 0, len(records)) - require.NoError(t, iterable.ForEach(func(m multihash.Multihash, _ uint64) error { + require.NoError(t, subject.ForEach(func(m multihash.Multihash, _ uint64) error { candidate = append(candidate, m) return nil })) diff --git a/v2/index/testutil/equal_index.go b/v2/index/testutil/equal_index.go index 798985b9..c5da756a 100644 --- a/v2/index/testutil/equal_index.go +++ b/v2/index/testutil/equal_index.go @@ -41,7 +41,7 @@ func AssertIndenticalIndexes(t *testing.T, a, b Index) { a.ForEach(func(mh multihash.Multihash, off uint64) error { aCount++ str := mh.HexString() - slice, _ := aMap[str] + slice := aMap[str] slice = append(slice, off) insertUint64(slice) aMap[str] = slice @@ -54,7 +54,7 @@ func AssertIndenticalIndexes(t *testing.T, a, b Index) { a.ForEach(func(mh multihash.Multihash, off uint64) error { bCount++ str := mh.HexString() - slice, _ := bMap[str] + slice := bMap[str] slice = append(slice, off) insertUint64(slice) bMap[str] = slice From 77de9fe624840fecfb2a93526a9f9821eb086a6d Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 19:34:06 +1000 Subject: [PATCH 16/37] fix: don't use multiple-go-modules for fuzzing --- .github/workflows/go-fuzz.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/go-fuzz.yml b/.github/workflows/go-fuzz.yml index 4b51f69c..3aa7f853 100644 --- a/.github/workflows/go-fuzz.yml +++ b/.github/workflows/go-fuzz.yml @@ -21,7 +21,6 @@ jobs: go version go env - name: Run Fuzzing for 1m - uses: protocol/multiple-go-modules@v1.2 with: run: go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . v2: @@ -43,7 +42,6 @@ jobs: go version go env - name: Run Fuzzing for 1m - uses: protocol/multiple-go-modules@v1.2 with: run: | cd v2 From dfbe3f7a2c80d24c56d5eb3fe7d8b171fa5219bd Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 14:28:43 +1000 Subject: [PATCH 17/37] fix: use CidFromReader() which has overread and OOM protection --- util/util.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/util.go b/util/util.go index fbd6a86b..59aa5e28 100644 --- a/util/util.go +++ b/util/util.go @@ -26,7 +26,7 @@ type BytesReader interface { io.ByteReader } -// TODO: this belongs in the go-cid package +// Deprecated: ReadCid shouldn't be used directly, use CidFromReader from go-cid func ReadCid(buf []byte) (cid.Cid, int, error) { if len(buf) >= 2 && bytes.Equal(buf[:2], cidv0Pref) { i := 34 @@ -70,7 +70,7 @@ func ReadNode(br *bufio.Reader) (cid.Cid, []byte, error) { return cid.Cid{}, nil, err } - c, n, err := ReadCid(data) + n, c, err := cid.CidFromReader(bytes.NewReader(data)) if err != nil { return cid.Cid{}, nil, err } From 3940bf53f99d1e3e6988e62cf97adbd899f6a11c Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 14:45:58 +1000 Subject: [PATCH 18/37] feat: MaxAllowedSectionSize default to 32M --- util/util.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/util/util.go b/util/util.go index 59aa5e28..af2f38e4 100644 --- a/util/util.go +++ b/util/util.go @@ -12,12 +12,11 @@ import ( mh "github.com/multiformats/go-multihash" ) -// MaxAllowedHeaderSize hint about how big the header red are allowed to be. -// This value is a hint to avoid OOMs, a parser that cannot OOM because it is -// streaming for example, isn't forced to follow that value. -// Deprecated: You should use v2#NewReader instead since it allows for options -// to be passed in. -var MaxAllowedHeaderSize uint = 1024 +// MaxAllowedSectionSize dictates the maximum number of bytes that a CARv1 header +// or section is allowed to occupy without causing a decode to error. +// This cannot be supplied as an option, only adjusted as a global. You should +// use v2#NewReader instead since it allows for options to be passed in. +var MaxAllowedSectionSize uint = 32 << 20 // 32MiB var cidv0Pref = []byte{0x12, 0x20} @@ -124,8 +123,8 @@ func LdRead(r *bufio.Reader) ([]byte, error) { return nil, err } - if l > uint64(MaxAllowedHeaderSize) { // Don't OOM - return nil, errors.New("malformed car; header is bigger than util.MaxAllowedHeaderSize") + if l > uint64(MaxAllowedSectionSize) { // Don't OOM + return nil, errors.New("malformed car; header is bigger than util.MaxAllowedSectionSize") } buf := make([]byte, l) From ec34902e6ef595c9c8e7c0dc999103aa53ae7553 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 21 Jun 2022 16:56:14 +1000 Subject: [PATCH 19/37] feat: MaxAllowed{Header,Section}Size option --- v2/block_reader.go | 10 +++--- v2/block_reader_test.go | 55 +++++++++++++++++++++++++++++++++ v2/blockstore/readonly.go | 12 +++---- v2/blockstore/readwrite.go | 6 ++-- v2/blockstore/readwrite_test.go | 2 +- v2/car_test.go | 2 +- v2/index/index_test.go | 2 +- v2/index_gen.go | 12 +++---- v2/index_gen_test.go | 2 +- v2/internal/carv1/car.go | 34 ++++++++++++-------- v2/internal/carv1/util/util.go | 14 +++++---- v2/options.go | 48 ++++++++++++++++++++++++++-- v2/options_test.go | 12 +++++-- v2/reader.go | 9 +++--- v2/writer.go | 12 ++++--- 15 files changed, 176 insertions(+), 56 deletions(-) diff --git a/v2/block_reader.go b/v2/block_reader.go index 456d8d31..a74e3996 100644 --- a/v2/block_reader.go +++ b/v2/block_reader.go @@ -30,9 +30,11 @@ type BlockReader struct { // // See BlockReader.Next func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) { + options := ApplyOptions(opts...) + // Read CARv1 header or CARv2 pragma. // Both are a valid CARv1 header, therefore are read as such. - pragmaOrV1Header, err := carv1.ReadHeader(r) + pragmaOrV1Header, err := carv1.ReadHeader(r, options.MaxAllowedHeaderSize) if err != nil { return nil, err } @@ -40,7 +42,7 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) { // Populate the block reader version and options. br := &BlockReader{ Version: pragmaOrV1Header.Version, - opts: ApplyOptions(opts...), + opts: options, } // Expect either version 1 or 2. @@ -92,7 +94,7 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) { br.r = io.LimitReader(r, dataSize) // Populate br.Roots by reading the inner CARv1 data payload header. - header, err := carv1.ReadHeader(br.r) + header, err := carv1.ReadHeader(br.r, options.MaxAllowedHeaderSize) if err != nil { return nil, err } @@ -120,7 +122,7 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) { // immediately upon encountering a zero-length section without reading any further bytes from the // underlying io.Reader. func (br *BlockReader) Next() (blocks.Block, error) { - c, data, err := util.ReadNode(br.r, br.opts.ZeroLengthSectionAsEOF) + c, data, err := util.ReadNode(br.r, br.opts.ZeroLengthSectionAsEOF, br.opts.MaxAllowedSectionSize) if err != nil { return nil, err } diff --git a/v2/block_reader_test.go b/v2/block_reader_test.go index afffc806..384a6ed8 100644 --- a/v2/block_reader_test.go +++ b/v2/block_reader_test.go @@ -1,12 +1,17 @@ package car_test import ( + "bytes" + "encoding/hex" "io" "os" "testing" + "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/internal/carv1" + mh "github.com/multiformats/go-multihash" + "github.com/multiformats/go-varint" "github.com/stretchr/testify/require" ) @@ -104,6 +109,56 @@ func TestBlockReader_WithCarV1Consistency(t *testing.T) { } } +func TestMaxSectionLength(t *testing.T) { + // headerHex is the zero-roots CARv1 header + const headerHex = "11a265726f6f7473806776657273696f6e01" + headerBytes, _ := hex.DecodeString(headerHex) + // 8 MiB block of zeros + block := make([]byte, 8<<20) + // CID for that block + pfx := cid.NewPrefixV1(cid.Raw, mh.SHA2_256) + cid, err := pfx.Sum(block) + require.NoError(t, err) + + // construct CAR + var buf bytes.Buffer + buf.Write(headerBytes) + buf.Write(varint.ToUvarint(uint64(len(cid.Bytes()) + len(block)))) + buf.Write(cid.Bytes()) + buf.Write(block) + + // try to read it + car, err := carv2.NewBlockReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + // error should occur on first section read + _, err = car.Next() + require.EqualError(t, err, "invalid section data, length of read beyond allowable maximum") + + // successful read by expanding the max section size + car, err = carv2.NewBlockReader(bytes.NewReader(buf.Bytes()), carv2.MaxAllowedSectionSize((8<<20)+40)) + require.NoError(t, err) + // can now read block and get our 8 MiB zeroed byte array + readBlock, err := car.Next() + require.NoError(t, err) + require.True(t, bytes.Equal(block, readBlock.RawData())) +} + +func TestMaxHeaderLength(t *testing.T) { + // headerHex is the is a 5 root CARv1 header + const headerHex = "de01a265726f6f747385d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b501d82a58250001711220785197229dc8bb1152945da58e2348f7e279eeded06cc2ca736d0e879858b5016776657273696f6e01" + headerBytes, _ := hex.DecodeString(headerHex) + c, _ := cid.Decode("bafyreidykglsfhoixmivffc5uwhcgshx4j465xwqntbmu43nb2dzqwfvae") + + // successful read + car, err := carv2.NewBlockReader(bytes.NewReader(headerBytes)) + require.NoError(t, err) + require.ElementsMatch(t, []cid.Cid{c, c, c, c, c}, car.Roots) + + // unsuccessful read, low allowable max header length (length - 3 because there are 2 bytes in the length varint prefix) + _, err = carv2.NewBlockReader(bytes.NewReader(headerBytes), carv2.MaxAllowedHeaderSize(uint64(len(headerBytes)-3))) + require.EqualError(t, err, "invalid header data, length of read beyond allowable maximum") +} + func requireReaderFromPath(t *testing.T, path string) io.Reader { f, err := os.Open(path) require.NoError(t, err) diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 141088b7..c7c9127b 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -98,7 +98,7 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.Option) (*R opts: carv2.ApplyOptions(opts...), } - version, err := readVersion(backing) + version, err := readVersion(backing, opts...) if err != nil { return nil, err } @@ -135,7 +135,7 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.Option) (*R } } -func readVersion(at io.ReaderAt) (uint64, error) { +func readVersion(at io.ReaderAt, opts ...carv2.Option) (uint64, error) { var rr io.Reader switch r := at.(type) { case io.Reader: @@ -143,7 +143,7 @@ func readVersion(at io.ReaderAt) (uint64, error) { default: rr = internalio.NewOffsetReadSeeker(r, 0) } - return carv2.ReadVersion(rr) + return carv2.ReadVersion(rr, opts...) } func generateIndex(at io.ReaderAt, opts ...carv2.Option) (index.Index, error) { @@ -186,7 +186,7 @@ func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) { if err != nil { return cid.Cid{}, nil, err } - return util.ReadNode(r, b.opts.ZeroLengthSectionAsEOF) + return util.ReadNode(r, b.opts.ZeroLengthSectionAsEOF, b.opts.MaxAllowedSectionSize) } // DeleteBlock is unsupported and always errors. @@ -401,7 +401,7 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { // TODO we may use this walk for populating the index, and we need to be able to iterate keys in this way somewhere for index generation. In general though, when it's asked for all keys from a blockstore with an index, we should iterate through the index when possible rather than linear reads through the full car. rdr := internalio.NewOffsetReadSeeker(b.backing, 0) - header, err := carv1.ReadHeader(rdr) + header, err := carv1.ReadHeader(rdr, b.opts.MaxAllowedHeaderSize) if err != nil { b.mu.RUnlock() // don't hold the mutex forever return nil, fmt.Errorf("error reading car header: %w", err) @@ -491,7 +491,7 @@ func (b *ReadOnly) HashOnRead(bool) { // Roots returns the root CIDs of the backing CAR. func (b *ReadOnly) Roots() ([]cid.Cid, error) { - header, err := carv1.ReadHeader(internalio.NewOffsetReadSeeker(b.backing, 0)) + header, err := carv1.ReadHeader(internalio.NewOffsetReadSeeker(b.backing, 0), b.opts.MaxAllowedHeaderSize) if err != nil { return nil, fmt.Errorf("error reading car header: %w", err) } diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index de43999f..0fad9893 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -169,11 +169,11 @@ func (b *ReadWrite) initWithRoots(v2 bool, roots []cid.Cid) error { return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter) } -func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid) error { +func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Option) error { // On resumption it is expected that the CARv2 Pragma, and the CARv1 header is successfully written. // Otherwise we cannot resume from the file. // Read pragma to assert if b.f is indeed a CARv2. - version, err := carv2.ReadVersion(b.f) + version, err := carv2.ReadVersion(b.f, opts...) if err != nil { // The file is not a valid CAR file and cannot resume from it. // Or the write must have failed before pragma was written. @@ -224,7 +224,7 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid) error { // Use the given CARv1 padding to instantiate the CARv1 reader on file. v1r := internalio.NewOffsetReadSeeker(b.ronly.backing, 0) - header, err := carv1.ReadHeader(v1r) + header, err := carv1.ReadHeader(v1r, b.opts.MaxAllowedHeaderSize) if err != nil { // Cannot read the CARv1 header; the file is most likely corrupt. return fmt.Errorf("error reading car header: %w", err) diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 0ccd53e0..3dc52866 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -836,7 +836,7 @@ func TestOpenReadWrite_WritesIdentityCIDsWhenOptionIsEnabled(t *testing.T) { require.NoError(t, err) // Determine expected offset as the length of header plus one - header, err := carv1.ReadHeader(r.DataReader()) + header, err := carv1.ReadHeader(r.DataReader(), carv1.DefaultMaxAllowedHeaderSize) require.NoError(t, err) object, err := cbor.DumpObject(header) require.NoError(t, err) diff --git a/v2/car_test.go b/v2/car_test.go index 64519b29..9e113259 100644 --- a/v2/car_test.go +++ b/v2/car_test.go @@ -37,7 +37,7 @@ func TestCarV2PragmaLength(t *testing.T) { } func TestCarV2PragmaIsValidCarV1Header(t *testing.T) { - v1h, err := carv1.ReadHeader(bytes.NewReader(carv2.Pragma)) + v1h, err := carv1.ReadHeader(bytes.NewReader(carv2.Pragma), carv1.DefaultMaxAllowedHeaderSize) assert.NoError(t, err, "cannot decode pragma as CBOR with CARv1 header structure") assert.Equal(t, &carv1.CarHeader{ Roots: nil, diff --git a/v2/index/index_test.go b/v2/index/index_test.go index 883bd5d6..f895bc2f 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -78,7 +78,7 @@ func TestReadFrom(t *testing.T) { require.NoError(t, err) // Read the fame at offset and assert the frame corresponds to the expected block. - gotCid, gotData, err := util.ReadNode(crf, false) + gotCid, gotData, err := util.ReadNode(crf, false, carv1.DefaultMaxAllowedSectionSize) require.NoError(t, err) gotBlock, err := blocks.NewBlockWithCid(gotData, gotCid) require.NoError(t, err) diff --git a/v2/index_gen.go b/v2/index_gen.go index 10c86dc3..33ba7800 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -37,8 +37,11 @@ func GenerateIndex(v1r io.Reader, opts ...Option) (index.Index, error) { // Note, the index is re-generated every time even if r is in CARv2 format and already has an index. // To read existing index when available see ReadOrGenerateIndex. func LoadIndex(idx index.Index, r io.Reader, opts ...Option) error { + // Parse Options. + o := ApplyOptions(opts...) + reader := internalio.ToByteReadSeeker(r) - pragma, err := carv1.ReadHeader(r) + pragma, err := carv1.ReadHeader(r, o.MaxAllowedHeaderSize) if err != nil { return fmt.Errorf("error reading car header: %w", err) } @@ -78,7 +81,7 @@ func LoadIndex(idx index.Index, r io.Reader, opts ...Option) error { dataOffset = int64(v2h.DataOffset) // Read the inner CARv1 header to skip it and sanity check it. - v1h, err := carv1.ReadHeader(reader) + v1h, err := carv1.ReadHeader(reader, o.MaxAllowedHeaderSize) if err != nil { return err } @@ -104,9 +107,6 @@ func LoadIndex(idx index.Index, r io.Reader, opts ...Option) error { // CARv2 header. sectionOffset -= dataOffset - // Parse Options. - o := ApplyOptions(opts...) - records := make([]index.Record, 0) for { // Read the section's length. @@ -188,7 +188,7 @@ func GenerateIndexFromFile(path string, opts ...Option) (index.Index, error) { // given reader to fulfill index lookup. func ReadOrGenerateIndex(rs io.ReadSeeker, opts ...Option) (index.Index, error) { // Read version. - version, err := ReadVersion(rs) + version, err := ReadVersion(rs, opts...) if err != nil { return nil, err } diff --git a/v2/index_gen_test.go b/v2/index_gen_test.go index 64a73adc..cae76ddf 100644 --- a/v2/index_gen_test.go +++ b/v2/index_gen_test.go @@ -256,7 +256,7 @@ func generateMultihashSortedIndex(t *testing.T, path string) *index.MultihashInd require.NoError(t, err) t.Cleanup(func() { require.NoError(t, f.Close()) }) reader := internalio.ToByteReadSeeker(f) - header, err := carv1.ReadHeader(reader) + header, err := carv1.ReadHeader(reader, carv1.DefaultMaxAllowedHeaderSize) require.NoError(t, err) require.Equal(t, uint64(1), header.Version) diff --git a/v2/internal/carv1/car.go b/v2/internal/carv1/car.go index 48b7c86b..f62899b7 100644 --- a/v2/internal/carv1/car.go +++ b/v2/internal/carv1/car.go @@ -14,6 +14,9 @@ import ( "github.com/ipfs/go-merkledag" ) +const DefaultMaxAllowedHeaderSize uint64 = 32 << 20 // 32MiB +const DefaultMaxAllowedSectionSize uint64 = 8 << 20 // 8MiB + func init() { cbor.RegisterCborType(CarHeader{}) } @@ -56,9 +59,12 @@ func WriteCar(ctx context.Context, ds format.NodeGetter, roots []cid.Cid, w io.W return nil } -func ReadHeader(r io.Reader) (*CarHeader, error) { - hb, err := util.LdRead(r, false) +func ReadHeader(r io.Reader, maxReadBytes uint64) (*CarHeader, error) { + hb, err := util.LdRead(r, false, maxReadBytes) if err != nil { + if err == util.ErrSectionTooLarge { + err = util.ErrHeaderTooLarge + } return nil, err } @@ -106,21 +112,22 @@ func (cw *carWriter) writeNode(ctx context.Context, nd format.Node) error { } type CarReader struct { - r io.Reader - Header *CarHeader - zeroLenAsEOF bool + r io.Reader + Header *CarHeader + zeroLenAsEOF bool + maxAllowedSectionSize uint64 } func NewCarReaderWithZeroLengthSectionAsEOF(r io.Reader) (*CarReader, error) { - return newCarReader(r, true) + return NewCarReaderWithoutDefaults(r, true, DefaultMaxAllowedHeaderSize, DefaultMaxAllowedSectionSize) } func NewCarReader(r io.Reader) (*CarReader, error) { - return newCarReader(r, false) + return NewCarReaderWithoutDefaults(r, false, DefaultMaxAllowedHeaderSize, DefaultMaxAllowedSectionSize) } -func newCarReader(r io.Reader, zeroLenAsEOF bool) (*CarReader, error) { - ch, err := ReadHeader(r) +func NewCarReaderWithoutDefaults(r io.Reader, zeroLenAsEOF bool, maxAllowedHeaderSize uint64, maxAllowedSectionSize uint64) (*CarReader, error) { + ch, err := ReadHeader(r, maxAllowedHeaderSize) if err != nil { return nil, err } @@ -134,14 +141,15 @@ func newCarReader(r io.Reader, zeroLenAsEOF bool) (*CarReader, error) { } return &CarReader{ - r: r, - Header: ch, - zeroLenAsEOF: zeroLenAsEOF, + r: r, + Header: ch, + zeroLenAsEOF: zeroLenAsEOF, + maxAllowedSectionSize: maxAllowedSectionSize, }, nil } func (cr *CarReader) Next() (blocks.Block, error) { - c, data, err := util.ReadNode(cr.r, cr.zeroLenAsEOF) + c, data, err := util.ReadNode(cr.r, cr.zeroLenAsEOF, cr.maxAllowedSectionSize) if err != nil { return nil, err } diff --git a/v2/internal/carv1/util/util.go b/v2/internal/carv1/util/util.go index dd543ac5..7963812e 100644 --- a/v2/internal/carv1/util/util.go +++ b/v2/internal/carv1/util/util.go @@ -11,13 +11,16 @@ import ( cid "github.com/ipfs/go-cid" ) +var ErrSectionTooLarge = errors.New("invalid section data, length of read beyond allowable maximum") +var ErrHeaderTooLarge = errors.New("invalid header data, length of read beyond allowable maximum") + type BytesReader interface { io.Reader io.ByteReader } -func ReadNode(r io.Reader, zeroLenAsEOF bool) (cid.Cid, []byte, error) { - data, err := LdRead(r, zeroLenAsEOF) +func ReadNode(r io.Reader, zeroLenAsEOF bool, maxReadBytes uint64) (cid.Cid, []byte, error) { + data, err := LdRead(r, zeroLenAsEOF, maxReadBytes) if err != nil { return cid.Cid{}, nil, err } @@ -62,7 +65,7 @@ func LdSize(d ...[]byte) uint64 { return sum + uint64(s) } -func LdRead(r io.Reader, zeroLenAsEOF bool) ([]byte, error) { +func LdRead(r io.Reader, zeroLenAsEOF bool, maxReadBytes uint64) ([]byte, error) { l, err := varint.ReadUvarint(internalio.ToByteReader(r)) if err != nil { // If the length of bytes read is non-zero when the error is EOF then signal an unclean EOF. @@ -74,9 +77,8 @@ func LdRead(r io.Reader, zeroLenAsEOF bool) ([]byte, error) { return nil, io.EOF } - const maxAllowedHeaderSize = 1024 * 1024 - if l > maxAllowedHeaderSize { // Don't OOM - return nil, errors.New("invalid input, too big header") + if l > maxReadBytes { // Don't OOM + return nil, ErrSectionTooLarge } buf := make([]byte, l) diff --git a/v2/options.go b/v2/options.go index d2923b21..d2e526c4 100644 --- a/v2/options.go +++ b/v2/options.go @@ -6,11 +6,29 @@ import ( "github.com/ipld/go-car/v2/index" "github.com/ipld/go-ipld-prime/traversal" "github.com/multiformats/go-multicodec" + + "github.com/ipld/go-car/v2/internal/carv1" ) // DefaultMaxIndexCidSize specifies the maximum size in byptes accepted as a section CID by CARv2 index. const DefaultMaxIndexCidSize = 2 << 10 // 2 KiB +// DefaultMaxAllowedHeaderSize specifies the default maximum size that a CARv1 +// decode (including within a CARv2 container) will allow a header to be without +// erroring. This is to prevent OOM errors where a header prefix includes a +// too-large size specifier. +// Currently set to 32 MiB. +const DefaultMaxAllowedHeaderSize = carv1.DefaultMaxAllowedHeaderSize + +// DefaultMaxAllowedHeaderSize specifies the default maximum size that a CARv1 +// decode (including within a CARv2 container) will allow a section to be +// without erroring. This is to prevent OOM errors where a section prefix +// includes a too-large size specifier. +// Typically IPLD blocks should be under 2 MiB (ideally under 1 MiB), so unless +// atypical data is expected, this should not be a large value. +// Currently set to 8 MiB. +const DefaultMaxAllowedSectionSize = carv1.DefaultMaxAllowedSectionSize + // Option describes an option which affects behavior when interacting with CAR files. type Option func(*Options) @@ -42,14 +60,20 @@ type Options struct { MaxTraversalLinks uint64 WriteAsCarV1 bool TraversalPrototypeChooser traversal.LinkTargetNodePrototypeChooser + + MaxAllowedHeaderSize uint64 + MaxAllowedSectionSize uint64 } // ApplyOptions applies given opts and returns the resulting Options. // This function should not be used directly by end users; it's only exposed as a // side effect of Option. func ApplyOptions(opt ...Option) Options { - var opts Options - opts.MaxTraversalLinks = math.MaxInt64 //default: traverse all + opts := Options{ + MaxTraversalLinks: math.MaxInt64, //default: traverse all + MaxAllowedHeaderSize: carv1.DefaultMaxAllowedHeaderSize, + MaxAllowedSectionSize: carv1.DefaultMaxAllowedSectionSize, + } for _, o := range opt { o(&opts) } @@ -128,3 +152,23 @@ func WithTraversalPrototypeChooser(t traversal.LinkTargetNodePrototypeChooser) O o.TraversalPrototypeChooser = t } } + +// MaxAllowedHeaderSize overrides the default maximum size (of 32 MiB) that a +// CARv1 decode (including within a CARv2 container) will allow a header to be +// without erroring. +func MaxAllowedHeaderSize(max uint64) Option { + return func(o *Options) { + o.MaxAllowedHeaderSize = max + } +} + +// MaxAllowedSectionSize overrides the default maximum size (of 8 MiB) that a +// CARv1 decode (including within a CARv2 container) will allow a header to be +// without erroring. +// Typically IPLD blocks should be under 2 MiB (ideally under 1 MiB), so unless +// atypical data is expected, this should not be a large value. +func MaxAllowedSectionSize(max uint64) Option { + return func(o *Options) { + o.MaxAllowedSectionSize = max + } +} diff --git a/v2/options_test.go b/v2/options_test.go index 7e060acf..6daa473f 100644 --- a/v2/options_test.go +++ b/v2/options_test.go @@ -12,9 +12,11 @@ import ( func TestApplyOptions_SetsExpectedDefaults(t *testing.T) { require.Equal(t, carv2.Options{ - IndexCodec: multicodec.CarMultihashIndexSorted, - MaxIndexCidSize: carv2.DefaultMaxIndexCidSize, - MaxTraversalLinks: math.MaxInt64, + IndexCodec: multicodec.CarMultihashIndexSorted, + MaxIndexCidSize: carv2.DefaultMaxIndexCidSize, + MaxTraversalLinks: math.MaxInt64, + MaxAllowedHeaderSize: 32 << 20, + MaxAllowedSectionSize: 8 << 20, }, carv2.ApplyOptions()) } @@ -30,6 +32,8 @@ func TestApplyOptions_AppliesOptions(t *testing.T) { BlockstoreAllowDuplicatePuts: true, BlockstoreUseWholeCIDs: true, MaxTraversalLinks: math.MaxInt64, + MaxAllowedHeaderSize: 101, + MaxAllowedSectionSize: 202, }, carv2.ApplyOptions( carv2.UseDataPadding(123), @@ -38,6 +42,8 @@ func TestApplyOptions_AppliesOptions(t *testing.T) { carv2.ZeroLengthSectionAsEOF(true), carv2.MaxIndexCidSize(789), carv2.StoreIdentityCIDs(true), + carv2.MaxAllowedHeaderSize(101), + carv2.MaxAllowedSectionSize(202), blockstore.AllowDuplicatePuts(true), blockstore.UseWholeCIDs(true), )) diff --git a/v2/reader.go b/v2/reader.go index fa94e672..40c5d8c8 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -51,7 +51,7 @@ func NewReader(r io.ReaderAt, opts ...Option) (*Reader, error) { or := internalio.NewOffsetReadSeeker(r, 0) var err error - cr.Version, err = ReadVersion(or) + cr.Version, err = ReadVersion(or, opts...) if err != nil { return nil, err } @@ -75,7 +75,7 @@ func (r *Reader) Roots() ([]cid.Cid, error) { if r.roots != nil { return r.roots, nil } - header, err := carv1.ReadHeader(r.DataReader()) + header, err := carv1.ReadHeader(r.DataReader(), r.opts.MaxAllowedHeaderSize) if err != nil { return nil, err } @@ -126,8 +126,9 @@ func (r *Reader) Close() error { // ReadVersion reads the version from the pragma. // This function accepts both CARv1 and CARv2 payloads. -func ReadVersion(r io.Reader) (uint64, error) { - header, err := carv1.ReadHeader(r) +func ReadVersion(r io.Reader, opts ...Option) (uint64, error) { + o := ApplyOptions(opts...) + header, err := carv1.ReadHeader(r, o.MaxAllowedHeaderSize) if err != nil { return 0, err } diff --git a/v2/writer.go b/v2/writer.go index e82c4b3b..f859dabe 100644 --- a/v2/writer.go +++ b/v2/writer.go @@ -105,7 +105,7 @@ func WrapV1(src io.ReadSeeker, dst io.Writer, opts ...Option) error { // for example, it should use copy_file_range on recent Linux versions. // This API should be preferred over copying directly via Reader.DataReader, // as it should allow for better performance while always being at least as efficient. -func ExtractV1File(srcPath, dstPath string) (err error) { +func ExtractV1File(srcPath, dstPath string, opts ...Option) (err error) { src, err := os.Open(srcPath) if err != nil { return err @@ -115,7 +115,7 @@ func ExtractV1File(srcPath, dstPath string) (err error) { defer src.Close() // Detect CAR version. - version, err := ReadVersion(src) + version, err := ReadVersion(src, opts...) if err != nil { return err } @@ -220,7 +220,7 @@ func AttachIndex(path string, idx index.Index, offset uint64) error { // // Note that the roots are only replaced if their total serialized size exactly matches the total // serialized size of existing roots in CAR file. -func ReplaceRootsInFile(path string, roots []cid.Cid) (err error) { +func ReplaceRootsInFile(path string, roots []cid.Cid, opts ...Option) (err error) { f, err := os.OpenFile(path, os.O_RDWR, 0o666) if err != nil { return err @@ -232,8 +232,10 @@ func ReplaceRootsInFile(path string, roots []cid.Cid) (err error) { } }() + options := ApplyOptions(opts...) + // Read header or pragma; note that both are a valid CARv1 header. - header, err := carv1.ReadHeader(f) + header, err := carv1.ReadHeader(f, options.MaxAllowedHeaderSize) if err != nil { return err } @@ -267,7 +269,7 @@ func ReplaceRootsInFile(path string, roots []cid.Cid) (err error) { return err } var innerV1Header *carv1.CarHeader - innerV1Header, err = carv1.ReadHeader(f) + innerV1Header, err = carv1.ReadHeader(f, options.MaxAllowedHeaderSize) if err != nil { return err } From 3c9f1d2cb6d5f183c087b25abbe0024a5d8453da Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Wed, 22 Jun 2022 16:01:51 +1000 Subject: [PATCH 20/37] fix: explicitly disable serialization of insertionindex --- v2/blockstore/insertionindex.go | 44 ++++++--------------------------- v2/blockstore/readonly.go | 1 + v2/go.mod | 1 - v2/go.sum | 1 - v2/index/index.go | 1 + 5 files changed, 9 insertions(+), 39 deletions(-) diff --git a/v2/blockstore/insertionindex.go b/v2/blockstore/insertionindex.go index 95971aa2..192eb5c3 100644 --- a/v2/blockstore/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -2,20 +2,21 @@ package blockstore import ( "bytes" - "encoding/binary" "errors" "fmt" "io" "github.com/ipfs/go-cid" "github.com/ipld/go-car/v2/index" - internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" "github.com/petar/GoLLRB/llrb" - cbor "github.com/whyrusleeping/cbor/go" ) +// This index is intended to be efficient for random-access, in-memory lookups +// and is not intended to be an index type that is attached to a CARv2. +// See flatten() for conversion of this data to a known, existing index type. + var ( errUnsupported = errors.New("not supported") insertionIndexCodec = multicodec.Code(0x300003) @@ -105,21 +106,7 @@ func (ii *insertionIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { } func (ii *insertionIndex) Marshal(w io.Writer) (uint64, error) { - l := uint64(0) - if err := binary.Write(w, binary.LittleEndian, int64(ii.items.Len())); err != nil { - return l, err - } - l += 8 - - var err error - iter := func(i llrb.Item) bool { - if err = cbor.Encode(w, i.(recordDigest).Record); err != nil { - return false - } - return true - } - ii.items.AscendGreaterOrEqual(ii.items.Min(), iter) - return l, err + return 0, fmt.Errorf("unimplemented, index type not intended for serialization") } func (ii *insertionIndex) ForEach(f func(multihash.Multihash, uint64) error) error { @@ -137,28 +124,11 @@ func (ii *insertionIndex) ForEach(f func(multihash.Multihash, uint64) error) err } func (ii *insertionIndex) Unmarshal(r io.Reader) error { - var length int64 - if err := binary.Read(r, binary.LittleEndian, &length); err != nil { - return err - } - d := cbor.NewDecoder(r) - for i := int64(0); i < length; i++ { - var rec index.Record - if err := d.Decode(&rec); err != nil { - return err - } - ii.items.InsertNoReplace(newRecordDigest(rec)) - } - return nil + return fmt.Errorf("unimplemented, index type not intended for deserialization") } func (ii *insertionIndex) UnmarshalLazyRead(r io.ReaderAt) (int64, error) { - rdr := internalio.NewOffsetReadSeeker(r, 0) - err := ii.Unmarshal(rdr) - if err != nil { - return 0, err - } - return rdr.Seek(0, io.SeekCurrent) + return 0, fmt.Errorf("unimplemented, index type not intended for deserialization") } func (ii *insertionIndex) Codec() multicodec.Code { diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index c7c9127b..307486d7 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -47,6 +47,7 @@ type ReadOnly struct { // The backing containing the data payload in CARv1 format. backing io.ReaderAt + // The CARv1 content index. idx index.Index diff --git a/v2/go.mod b/v2/go.mod index b2686a7c..3ac37f97 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -18,7 +18,6 @@ require ( github.com/multiformats/go-varint v0.0.6 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 github.com/stretchr/testify v1.7.0 - github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 golang.org/x/exp v0.0.0-20210615023648-acb5c1269671 ) diff --git a/v2/go.sum b/v2/go.sum index 1b7eb9f9..d81ea5eb 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -880,7 +880,6 @@ github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2r github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a h1:G++j5e0OC488te356JvdhaM8YS6nMsjLAYF7JxCv07w= github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= -github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 h1:5HZfQkwe0mIfyDmc1Em5GqlNRzcdtlv4HTNmdpt7XH0= github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11/go.mod h1:Wlo/SzPmxVp6vXpGt/zaXhHH0fn4IxgqZc82aKg6bpQ= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 h1:WXhVOwj2USAXB5oMDwRl3piOux2XMV9TANaYxXHdkoE= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158/go.mod h1:Xj/M2wWU+QdTdRbu/L/1dIZY8/Wb2K9pAhtroQuxJJI= diff --git a/v2/index/index.go b/v2/index/index.go index 204bc122..eeff16ad 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -44,6 +44,7 @@ type ( // Marshal encodes the index in serial form. Marshal(w io.Writer) (uint64, error) + // Unmarshal decodes the index from its serial form. // Deprecated: This function is slurpy and will copy everything into memory. Unmarshal(r io.Reader) error From 3a1b4e83aefd3a1e3c6237350babd575855b061a Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 23 Jun 2022 21:04:12 +1000 Subject: [PATCH 21/37] fix: tighter constraint of singleWidthIndex width, add index recommentation docs --- v2/index/index.go | 11 ++++++++--- v2/index/indexsorted.go | 5 +++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/v2/index/index.go b/v2/index/index.go index eeff16ad..ca2c2490 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -49,9 +49,12 @@ type ( // Deprecated: This function is slurpy and will copy everything into memory. Unmarshal(r io.Reader) error - // UnmarshalLazyRead is the safe alternative to to Unmarshal. - // Instead of slurping it will keep a reference to the the io.ReaderAt passed in - // and ask for data as needed. + // UnmarshalLazyRead lazily decodes the index from its serial form. It is a + // safer alternative to to Unmarshal, particularly when reading index data + // from untrusted sources (which is not recommended) but also in more + // constrained memory environments. + // Instead of slurping UnmarshalLazyRead will keep a reference to the the + // io.ReaderAt passed in and ask for data as needed. UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) // Load inserts a number of records into the index. @@ -137,6 +140,7 @@ func WriteTo(idx Index, w io.Writer) (uint64, error) { // ReadFrom reads index from r. // The reader decodes the index by reading the first byte to interpret the encoding. // Returns error if the encoding is not known. +// Attempting to read index data from untrusted sources is not recommended. func ReadFrom(r io.ReaderAt) (Index, error) { idx, _, err := ReadFromWithSize(r) return idx, err @@ -144,6 +148,7 @@ func ReadFrom(r io.ReaderAt) (Index, error) { // ReadFromWithSize is just like ReadFrom but return the size of the Index. // The size is only valid when err != nil. +// Attempting to read index data from untrusted sources is not recommended. func ReadFromWithSize(r io.ReaderAt) (Index, int64, error) { code, err := varint.ReadUvarint(internalio.NewOffsetReadSeeker(r, 0)) if err != nil { diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 16367ed5..b9f9a654 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -113,8 +113,9 @@ func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra ui if width <= 8 { return errors.New("malformed index; width must be bigger than 8") } - if int32(width) < 0 { - return errors.New("index too big; singleWidthIndex width is overflowing int32") + const maxWidth = 32 << 20 // 32MiB, to ~match the go-cid maximum + if width > maxWidth { + return errors.New("index too big; singleWidthIndex width is larger than allowed maximum") } oldDataLen, dataLen := dataLen, dataLen+extra if oldDataLen > dataLen { From 8a5b3306f5f6c99f0a11fafd97e72cf8894804aa Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Thu, 30 Jun 2022 11:35:57 +0100 Subject: [PATCH 22/37] Fix testutil assertion logic and update index generation tests Update index generation tests to assert indices are identical. Fix minor typo in the test utility name and a bug where the check was not using both index instances to assert they are identical. Also refactor the use of lock in favour of wait group for better readability of the assertion logic. --- v2/blockstore/readwrite_test.go | 2 +- v2/index/index_test.go | 2 +- v2/index/testutil/equal_index.go | 18 ++++--- v2/index_gen_test.go | 89 ++++++++++++-------------------- v2/reader_test.go | 2 +- v2/writer_test.go | 2 +- 6 files changed, 49 insertions(+), 66 deletions(-) diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 3dc52866..6f64ac72 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -520,7 +520,7 @@ func TestBlockstoreResumption(t *testing.T) { require.NoError(t, err) wantIdx, err := carv2.GenerateIndex(v2r.DataReader()) require.NoError(t, err) - testutil.AssertIndenticalIndexes(t, wantIdx, gotIdx) + testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) } func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) { diff --git a/v2/index/index_test.go b/v2/index/index_test.go index f895bc2f..92774d7c 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -113,7 +113,7 @@ func TestWriteTo(t *testing.T) { require.NoError(t, err) // Assert they are equal - testutil.AssertIndenticalIndexes(t, wantIdx, gotIdx) + testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) } func TestMarshalledIndexStartsWithCodec(t *testing.T) { diff --git a/v2/index/testutil/equal_index.go b/v2/index/testutil/equal_index.go index c5da756a..43d0b3e9 100644 --- a/v2/index/testutil/equal_index.go +++ b/v2/index/testutil/equal_index.go @@ -30,15 +30,17 @@ func insertUint64(s []uint64) { } } -func AssertIndenticalIndexes(t *testing.T, a, b Index) { - var wg sync.Mutex - wg.Lock() +func AssertIdenticalIndexes(t *testing.T, a, b Index) { + var wg sync.WaitGroup // key is multihash.Multihash.HexString var aCount uint + var aErr error aMap := make(map[string][]uint64) + wg.Add(1) + go func() { - defer wg.Unlock() - a.ForEach(func(mh multihash.Multihash, off uint64) error { + defer wg.Done() + aErr = a.ForEach(func(mh multihash.Multihash, off uint64) error { aCount++ str := mh.HexString() slice := aMap[str] @@ -51,7 +53,7 @@ func AssertIndenticalIndexes(t *testing.T, a, b Index) { var bCount uint bMap := make(map[string][]uint64) - a.ForEach(func(mh multihash.Multihash, off uint64) error { + bErr := b.ForEach(func(mh multihash.Multihash, off uint64) error { bCount++ str := mh.HexString() slice := bMap[str] @@ -60,7 +62,9 @@ func AssertIndenticalIndexes(t *testing.T, a, b Index) { bMap[str] = slice return nil }) - wg.Lock() + wg.Wait() + require.NoError(t, aErr) + require.NoError(t, bErr) require.Equal(t, aCount, bCount) require.Equal(t, aMap, bMap) diff --git a/v2/index_gen_test.go b/v2/index_gen_test.go index cae76ddf..43a9c2ac 100644 --- a/v2/index_gen_test.go +++ b/v2/index_gen_test.go @@ -1,6 +1,7 @@ package car_test import ( + "github.com/stretchr/testify/assert" "io" "os" "testing" @@ -14,25 +15,25 @@ import ( "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" "github.com/multiformats/go-varint" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestGenerateIndex(t *testing.T) { - tests := []struct { + type testCase struct { name string carPath string opts []carv2.Option wantIndexer func(t *testing.T) index.Index wantErr bool - }{ + } + tests := []testCase{ { name: "CarV1IsIndexedAsExpected", carPath: "testdata/sample-v1.car", wantIndexer: func(t *testing.T) index.Index { v1, err := os.Open("testdata/sample-v1.car") require.NoError(t, err) - defer v1.Close() + t.Cleanup(func() { assert.NoError(t, v1.Close()) }) want, err := carv2.GenerateIndex(v1) require.NoError(t, err) return want @@ -44,7 +45,7 @@ func TestGenerateIndex(t *testing.T) { wantIndexer: func(t *testing.T) index.Index { v2, err := os.Open("testdata/sample-wrapped-v2.car") require.NoError(t, err) - defer v2.Close() + t.Cleanup(func() { assert.NoError(t, v2.Close()) }) reader, err := carv2.NewReader(v2) require.NoError(t, err) want, err := index.ReadFrom(reader.IndexReader()) @@ -59,7 +60,7 @@ func TestGenerateIndex(t *testing.T) { wantIndexer: func(t *testing.T) index.Index { v1, err := os.Open("testdata/sample-v1-with-zero-len-section.car") require.NoError(t, err) - defer v1.Close() + t.Cleanup(func() { assert.NoError(t, v1.Close()) }) want, err := carv2.GenerateIndex(v1, carv2.ZeroLengthSectionAsEOF(true)) require.NoError(t, err) return want @@ -72,7 +73,7 @@ func TestGenerateIndex(t *testing.T) { wantIndexer: func(t *testing.T) index.Index { v1, err := os.Open("testdata/sample-v1-with-zero-len-section2.car") require.NoError(t, err) - defer v1.Close() + t.Cleanup(func() { assert.NoError(t, v1.Close()) }) want, err := carv2.GenerateIndex(v1, carv2.ZeroLengthSectionAsEOF(true)) require.NoError(t, err) return want @@ -90,71 +91,49 @@ func TestGenerateIndex(t *testing.T) { wantErr: true, }, } + + requireWant := func(tt testCase, got index.Index, gotErr error) { + if tt.wantErr { + require.Error(t, gotErr) + } else { + require.NoError(t, gotErr) + var want index.Index + if tt.wantIndexer != nil { + want = tt.wantIndexer(t) + } + if want == nil { + require.Nil(t, got) + } else { + testutil.AssertIdenticalIndexes(t, want, got) + } + } + } + for _, tt := range tests { t.Run("ReadOrGenerateIndex_"+tt.name, func(t *testing.T) { carFile, err := os.Open(tt.carPath) require.NoError(t, err) t.Cleanup(func() { assert.NoError(t, carFile.Close()) }) - got, err := carv2.ReadOrGenerateIndex(carFile, tt.opts...) - if tt.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - var want index.Index - if tt.wantIndexer != nil { - want = tt.wantIndexer(t) - } - if want == nil { - require.Nil(t, got) - } else { - testutil.AssertIndenticalIndexes(t, want, got) - } - } + got, gotErr := carv2.ReadOrGenerateIndex(carFile, tt.opts...) + requireWant(tt, got, gotErr) }) t.Run("GenerateIndexFromFile_"+tt.name, func(t *testing.T) { - got, err := carv2.GenerateIndexFromFile(tt.carPath, tt.opts...) - if tt.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - var want index.Index - if tt.wantIndexer != nil { - want = tt.wantIndexer(t) - } - require.Equal(t, want, got) - } + got, gotErr := carv2.GenerateIndexFromFile(tt.carPath, tt.opts...) + requireWant(tt, got, gotErr) }) t.Run("LoadIndex_"+tt.name, func(t *testing.T) { carFile, err := os.Open(tt.carPath) require.NoError(t, err) got, err := index.New(multicodec.CarMultihashIndexSorted) require.NoError(t, err) - err = carv2.LoadIndex(got, carFile, tt.opts...) - if tt.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - var want index.Index - if tt.wantIndexer != nil { - want = tt.wantIndexer(t) - } - require.Equal(t, want, got) - } + gotErr := carv2.LoadIndex(got, carFile, tt.opts...) + requireWant(tt, got, gotErr) }) t.Run("GenerateIndex_"+tt.name, func(t *testing.T) { carFile, err := os.Open(tt.carPath) require.NoError(t, err) - got, err := carv2.GenerateIndex(carFile, tt.opts...) - if tt.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - var want index.Index - if tt.wantIndexer != nil { - want = tt.wantIndexer(t) - } - require.Equal(t, want, got) - } + got, gotErr := carv2.GenerateIndex(carFile, tt.opts...) + requireWant(tt, got, gotErr) }) } } diff --git a/v2/reader_test.go b/v2/reader_test.go index f653b60b..c010445f 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -174,7 +174,7 @@ func TestReader_WithCarV2Consistency(t *testing.T) { require.NoError(t, err) wantIndex, err := carv2.GenerateIndex(subject.DataReader()) require.NoError(t, err) - testutil.AssertIndenticalIndexes(t, wantIndex, gotIndex) + testutil.AssertIdenticalIndexes(t, wantIndex, gotIndex) }) } } diff --git a/v2/writer_test.go b/v2/writer_test.go index 2044e17b..1bf3ca3d 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -57,7 +57,7 @@ func TestWrapV1(t *testing.T) { require.NoError(t, err) gotIdx, err := index.ReadFrom(subject.IndexReader()) require.NoError(t, err) - testutil.AssertIndenticalIndexes(t, wantIdx, gotIdx) + testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) } func TestExtractV1(t *testing.T) { From fd7281bd7945fd8c6568a75e913dbac2d0e19f55 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Thu, 30 Jun 2022 14:42:26 +0100 Subject: [PATCH 23/37] Use a fix code as the multihash code for `CarIndexSorted` Previous changes added the `ForEach` interface to `Index` type which enables iteration through the index by multihash and offset. However, not all index types contain enough information to construct the full multihash. Namely, `CarIndexSorted` only stores the digest portion of the multihashes. In order to implement `ForEach` for this index type correctly uses the `uint64` max value as the code in the multihash and document the behaviour where the iterations over this index type should not rely on the returned code. Note that the max value is used as a code that doesn't match any existing multicodec.Code to avoid misleading users. --- v2/index/index.go | 7 ++++--- v2/index/indexsorted.go | 9 ++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/v2/index/index.go b/v2/index/index.go index ca2c2490..74b7fbcf 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -5,14 +5,12 @@ import ( "fmt" "io" + "github.com/ipfs/go-cid" internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" - "github.com/multiformats/go-varint" - - "github.com/ipfs/go-cid" ) // CarIndexNone is a sentinal value used as a multicodec code for the index indicating no index. @@ -82,6 +80,9 @@ type ( // the multihash of the element, and the offset in the car file // where the element appears. // + // Note that index with codec multicodec.CarIndexSorted does not store the multihash code. + // The multihashes passed to ForEach on this index type should only rely on the digest part. + // // If the callback returns a non-nil error, the iteration is aborted, // and the ForEach function returns the error to the user. // diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index b9f9a654..1e7ed3f6 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "math" "sort" "github.com/ipld/go-car/v2/internal/errsort" @@ -348,7 +349,13 @@ func (m *multiWidthIndex) Load(items []Record) error { func (m *multiWidthIndex) ForEach(f func(multihash.Multihash, uint64) error) error { return m.forEachDigest(func(digest []byte, offset uint64) error { - mh, err := multihash.Cast(digest) + // multicodec.CarIndexSorted does not contain the multihash code; it only contains the digest. + // To implement ForEach on this index kind we encode the digest with multihash code math.MaxUint64. + // The CarIndexSorted documents this behaviour and the user should not take the multihash code + // as the actual code of the multihashes of CAR blocks. + // The rationale for using math.MaxUint64 is to avoid using a reserved multihash code that could + // become error-prone later, including 0x00 which is a valid multihash code for IDENTITY. + mh, err := multihash.Encode(digest, math.MaxUint64) if err != nil { return err } From e6d416c1e4df06287d64da9ebe878e2a080a6677 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Thu, 30 Jun 2022 16:07:28 +0100 Subject: [PATCH 24/37] Remove support for `ForEach` enumeration from car-index-sorted This index type does not store enough information to satisfy `ForEach`. It only contains the digest of mulithashes and not their code. Instead of some partial functionality simply return an error when `ForEach` is called on this function type. Because, there is no valid use for this index type and the user should ber regenerating the index to the newer `car-multihash-index-sorted` anyway. Update tests to include samples of both types and assert IO operations and index generation for both formats. --- v2/index/index.go | 6 +- v2/index/index_test.go | 62 +++++++++++++----- v2/index/indexsorted.go | 17 +---- v2/index/indexsorted_test.go | 12 ++++ .../sample-multihash-index-sorted.carindex | Bin 0 -> 41750 bytes 5 files changed, 65 insertions(+), 32 deletions(-) create mode 100644 v2/testdata/sample-multihash-index-sorted.carindex diff --git a/v2/index/index.go b/v2/index/index.go index 74b7fbcf..3a2b3f1d 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -80,8 +80,10 @@ type ( // the multihash of the element, and the offset in the car file // where the element appears. // - // Note that index with codec multicodec.CarIndexSorted does not store the multihash code. - // The multihashes passed to ForEach on this index type should only rely on the digest part. + // Note that index with codec multicodec.CarIndexSorted does not support ForEach enumeration. + // Because this index type only contains the multihash digest and not the code. + // Calling ForEach on this index type will result in error. + // Use multicodec.CarMultihashIndexSorted index type instead. // // If the callback returns a non-nil error, the iteration is aborted, // and the ForEach function returns the error to the user. diff --git a/v2/index/index_test.go b/v2/index/index_test.go index 92774d7c..0d380caf 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -55,6 +55,13 @@ func TestReadFrom(t *testing.T) { subject, err := ReadFrom(idxf) require.NoError(t, err) + idxf2, err := os.Open("../testdata/sample-multihash-index-sorted.carindex") + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, idxf2.Close()) }) + + subjectInAltFormat, err := ReadFrom(idxf) + require.NoError(t, err) + crf, err := os.Open("../testdata/sample-v1.car") require.NoError(t, err) t.Cleanup(func() { require.NoError(t, crf.Close()) }) @@ -68,11 +75,17 @@ func TestReadFrom(t *testing.T) { } require.NoError(t, err) + wantCid := wantBlock.Cid() // Get offset from the index for a CID and assert it exists - gotOffset, err := GetFirst(subject, wantBlock.Cid()) + gotOffset, err := GetFirst(subject, wantCid) require.NoError(t, err) require.NotZero(t, gotOffset) + // Get offset from the index in alternative format for a CID and assert it exists + gotOffset2, err := GetFirst(subjectInAltFormat, wantCid) + require.NoError(t, err) + require.NotZero(t, gotOffset2) + // Seek to the offset on CARv1 file _, err = crf.Seek(int64(gotOffset), io.SeekStart) require.NoError(t, err) @@ -88,7 +101,7 @@ func TestReadFrom(t *testing.T) { func TestWriteTo(t *testing.T) { // Read sample index on file - idxf, err := os.Open("../testdata/sample-index.carindex") + idxf, err := os.Open("../testdata/sample-multihash-index-sorted.carindex") require.NoError(t, err) t.Cleanup(func() { require.NoError(t, idxf.Close()) }) @@ -117,18 +130,37 @@ func TestWriteTo(t *testing.T) { } func TestMarshalledIndexStartsWithCodec(t *testing.T) { - // Read sample index on file - idxf, err := os.Open("../testdata/sample-index.carindex") - require.NoError(t, err) - t.Cleanup(func() { require.NoError(t, idxf.Close()) }) - - // Unmarshall to get expected index - wantIdx, err := ReadFrom(idxf) - require.NoError(t, err) - // Assert the first two bytes are the corresponding multicodec code. - buf := new(bytes.Buffer) - _, err = WriteTo(wantIdx, buf) - require.NoError(t, err) - require.Equal(t, varint.ToUvarint(uint64(multicodec.CarIndexSorted)), buf.Bytes()[:2]) + tests := []struct { + path string + codec multicodec.Code + }{ + { + path: "../testdata/sample-multihash-index-sorted.carindex", + codec: multicodec.CarMultihashIndexSorted, + }, + { + path: "../testdata/sample-index.carindex", + codec: multicodec.CarIndexSorted, + }, + } + for _, test := range tests { + test := test + t.Run(test.codec.String(), func(t *testing.T) { + // Read sample index on file + idxf, err := os.Open(test.path) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, idxf.Close()) }) + + // Unmarshall to get expected index + wantIdx, err := ReadFrom(idxf) + require.NoError(t, err) + + // Assert the first two bytes are the corresponding multicodec code. + buf := new(bytes.Buffer) + _, err = WriteTo(wantIdx, buf) + require.NoError(t, err) + require.Equal(t, varint.ToUvarint(uint64(test.codec)), buf.Bytes()[:2]) + }) + } } diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 1e7ed3f6..60d0e87d 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "math" "sort" "github.com/ipld/go-car/v2/internal/errsort" @@ -347,20 +346,8 @@ func (m *multiWidthIndex) Load(items []Record) error { return nil } -func (m *multiWidthIndex) ForEach(f func(multihash.Multihash, uint64) error) error { - return m.forEachDigest(func(digest []byte, offset uint64) error { - // multicodec.CarIndexSorted does not contain the multihash code; it only contains the digest. - // To implement ForEach on this index kind we encode the digest with multihash code math.MaxUint64. - // The CarIndexSorted documents this behaviour and the user should not take the multihash code - // as the actual code of the multihashes of CAR blocks. - // The rationale for using math.MaxUint64 is to avoid using a reserved multihash code that could - // become error-prone later, including 0x00 which is a valid multihash code for IDENTITY. - mh, err := multihash.Encode(digest, math.MaxUint64) - if err != nil { - return err - } - return f(mh, offset) - }) +func (m *multiWidthIndex) ForEach(func(multihash.Multihash, uint64) error) error { + return fmt.Errorf("%s does not support ForEach enumeration; use %s instead", multicodec.CarIndexSorted, multicodec.CarMultihashIndexSorted) } func (m *multiWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index f7e038a0..8e1a4527 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -7,9 +7,21 @@ import ( "github.com/ipfs/go-merkledag" "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" ) +func TestSortedIndex_ErrorsOnForEach(t *testing.T) { + subject, err := New(multicodec.CarIndexSorted) + require.NoError(t, err) + err = subject.ForEach(func(multihash.Multihash, uint64) error { return nil }) + require.Error(t, err) + require.Equal(t, + "car-index-sorted does not support ForEach enumeration; use car-multihash-index-sorted instead", + err.Error(), + ) +} + func TestSortedIndexCodec(t *testing.T) { require.Equal(t, multicodec.CarIndexSorted, newSorted().Codec()) } diff --git a/v2/testdata/sample-multihash-index-sorted.carindex b/v2/testdata/sample-multihash-index-sorted.carindex new file mode 100644 index 0000000000000000000000000000000000000000..ddf8a5fb27bedb35b876822e92f2c6bb9bac7cdb GIT binary patch literal 41750 zcmXtg1yCMc(=6`p?h@SH2@u@f-Ggf&xVyVUaMxhLgS)#14<20Z`~CY|s**~ox_YL2 z=FIM%Jxd@QI2afhS>u0y{rB^~e}DS#&tuX5{`cQc)`d`gg@j4{TZ^Zan_RjYF8t3~ zF1`A|DVIMyK9xqHdC&kZcUnJ5^Op)DPHyeDUoavtS z4j2H3*LOmX?YaSTM1!4dF#f zy0(I>Mpme@|Ef9;`DuGR93KJY3y>dpO2I%ThqJfkK_g>%khnVO_aY=Ej+!-Y zn#`QkVh~?R6t3=xsOU#C6QaDaW*-l6R2cbJxz&%d29}yhG1QxtS8J(M8WUDX*#wd6b z?xjJdCUOGAg|L86=WR$5bNmP48}c;rXATp0#!j+3&#NO%Bp7Z>Z2uba|9pXn^NlIR zQV};|_BXt)6F~gYaZYB-Vkzi#2FqPl<&#>BG4U z);{j)yh=}V@70pD4F&L(IXuHo+S8WV^gfkOKtE_Q>ixpb3$&JvX!xDvq-mA62Fh!XV)^Bk{LzR~6e{E$`&Q2ns~xrn}*Ga%4V7a3KDD)&=N=G!ZOpXpX?AnCkIl zWH#5TkaR$^yWC@_bTXDZ*>YMZc>CW<0Rw}gf1wTx^{JSRg#Fw8L6$}HqlDktfl5m% zU1{1e%NXx@1keFx&<{B?a9dIuh{7W<<7oeg`q3#wM39ulA+V97rlFnr6NK}k9LFS< z&iMn`CM0XTqq3`i^O`6`7l9K3e}&vhgoEh|!qxC77cYga=U-H{pm@WOPM9EK#zA8K zl)q!Y*CoHb(XRn?K(*VCZ~Ah;Tl%69;53#(RIh0zcK+oZA9?fkrA+!y8hmKNd(-FtOO9NPTX2GgdhOo zLUZJEYd9(_D73ObmJ@!I*G3TcOcDwamC{m2ZWBe3cK-%+KwB^~RDzQ$utKj5+Q(4N zXg*VY>=)tBs!x&9E0cXL1;+r^2ehNW{kCpIz@Ysj?V*cQDD1g1M*f6VvwN9E<>9C0 zKW*9|9sBz=jmGOV7Y!D>2NsPFLr?!&X(Ct>+SLd5))8iFmv;a@7#OH*EeIxNxkGa8 z8MD^If?a!esx5ztt7CCE~#6?7EnKju4w#-P?B4>QR$|%4;Sed$w(pM?} z7Y%ME&dN{zY%(+Ol(ckWTT!7>Pf5L{8lpgcuBu&G*Mmu=&C7^C_sWW5HlP;gt^CRe2=WF;&hRq?? zn&Dr*?A3_=tj~6RCkEk~S{4xmCWR2OUcM-)U2?z1eHSgFW9;Kru1n9+nWpgZ1L1Da zP?5s!2t;D15ha7Q4f`o<2ZS0?N6aSiE+nsz7BZ88{NTmw=O?ZIacY`y)hidFi;!b3 zdE!Mgc-bzmqB8w%JzDSq<^g<;N>F7DRN0}a6*iU_t%|iIVl7Jf9((soaZiCkAes3o z;4}R6%~HTb!RPYtUrmPTiw~TG(Xc51M8skRQTWP7__mLi*^4 zLw<$e2?yO)LXW!2U`6o{Mb!%kq_}nt;Ch5`9>hbBgOXOMSwfSi7`Q20{8mZw&2Po% zQA>p)kr$DEAIJ|8ea(2R*D36AzcLw(;%~Aql8%#hm|i3+2e`W2wI7z|4A7^D{B*Ij zbg?Uo(5+V2g*7mQ=lV6wL`$ZdnqnyL4rV^G$Ur?1;~&h=CCN>%J!^b-=0lxCD}9XE zBN;sZpp2tU5C>SZ8~}O|TSyL=W771_{@4pR`X*HJ=+fm^J$U=FFeTtIFiaxSSOR=V zkQME!&F^%%_j$jD8_+-Wk3hAIzie`lnx5MaX7R9jckjKb(_*m(Y7owmp}^N z&pqDH%RpS@<9S|Hw1&b)H}({c!#D=Ah?UU{2*TI;`Hk~YdObqJB30bfvVWH&E1@B2Rz_6bJaz@oW)6MDy@XzDgM>5SPePrZs}{y%P9 zT*f$t(AlFFP5Aj-8%d}UxBHqqNJRA{i6ubsNtzANfyz`U^CRRkeHFfyo^=?zd-*AF z3;H3>OpoctcmH7Kq{Ru~L;W&qsVsWN8h)-KLHSL)2zSBP?Y=7OmDa=nq5DqqK8FXy zrx?Y-OfBzM*_gG?w614OQTk_PPKxHnKbWPxyeJm0_ym-W_;O3%6^ z=V*^nX_j24Ub0gs4eV-lWPRO0eUA>O{MX(6gS?Y@Q@3sd-hn-~h0xudmgIH{QTa;Z11y}Rj%!?@vhscPwZoqBk_Ya5oyg1K>wld=$USypv3Fx zOzwUDuCq%+G>h?%a}i2m2wc7=1iszZ8PJPS?kP+a=ee|~-)Ut`q|m;9I6%W0M?N>Z zN9l;qnLKKt1Mp#@2IbM}m)VOwRvCPsPI@v_iJ?KWAFx%u97RavlI4 z&Vn=QVLyn(Ev6qv3Cu~%LonEQhr@U#bc4Im;A1|uX@c)Kp=I#D?D4xO< zqrL5*R{Y-nYREO#TAP=a0{DzYy}U|4lI_?*N!Ien{qSSI!r1$tB<5+r<#^Q9FDLn@ z7Z6VFaYgtHDgOJ*RPN${xp@ds?< zRy9$kbZ~YZDmb^*P*o0%{<;?M4{IkBZ(V?wiZJ9T<>#mY^KT|piDK9WF}Wdq62>nu z)r2D;+@W=Uu1RuB{aur>;F>QIoRT-8d^BBt7*UuFc}mU;XEIP_G`fGLgF47nq3X zxhaQmhb^4oPtj0-f4CaC@$TLMG^rt-mmY@>=$hpxY+ac<_WF#ic;5*2q^SP@dhv+8 z9U!#uwT7JrBI*aWMW`_c$UrI5lrQFhQGc=_b@rQRVl4cPEvmNklQW zxfVdzUwnpIa=NyfBY2i2a3f(0GdXH^$h@~BXV}Yn`aCCd?@%m2FFr@M@{}Cv*v{k2 z*X+ywSx;dmcol@e@rTrSDk9f}XEIz+ep%eaihr4l9;#gkonu*0F20QMpQ%@klNj2n zXw4n)gc}0&#V@@VSNUwq&0Xzb+HcZ4JS{Mh3XhWDd=a*b{+4=xTzUoK`@;y8ibc0& zh?#agzs90ZQ7v-F&!Hxw625QS!6@@%zzWEZ0Pjb|;eHNOs&LR$67ScRxCro)z%j~q z!M->JLg8kD%?qIa2-Lue#1YAnK1srEJxj%`lDOr}3_@%xVm%KYQTH^De^CSBOyNv7 z%e60=H0l*2B7f>`Wf?R=)DSdGXAILGY0Vsp>jF9my2QlA`$vu}7@XK8{{6d-duk<@ zuKqFMcoDiohDZi|eFyXvA!2QrN@#&YLU;JhyDa%R;yZ=v?jOQJlX0sFe=D;>k9weP zgfw+?myka8{)RRuJkt$v5@i*>5!gsh@OdD3N#E2%s6`CQoySFEI-@o{P~l_Z9q@(d*G|1@z5lem(}P(# z{H^-f*+y7zj&~4+`~x-OPJGfTA24@`!6-PTTr<9fw9|Y%MaeH>S{|7G(CdFAxwWm8 zwAN$5Py*o~iwrVW2hMnNJ>gvrey(^ytCq5we^h36jf!3-zbf0E0r?RlpC*?)4;pL8 z-Q7=J+7j%I+);^}Cg!B|rq0Sf-)j5Z0QDfI^}qG|rFHa>bJ-Q0C75H|DVu(j3`vF* zW3e=*GtS7`0Qf>IMOh|5QmA&Vw9iv`rU5zz=iGn+T%-Dnj(y^>}R1z#^Sg4chmr>umP;LKrFqDHF zvtw4cxvAXRgir#=kAz}kDf_mFA#Wzw09{vH@2~wyL#`~I77JBUKJq7|#W+wsNVtiP zUD9wN!?U~9N@NgXxKguq<(&7IA&d+*3Yp}czyyK(NCXsrB}L@o!oH8pB*{dT*c8lF zX&ojI*(CDM4rOq3pNN5Qis_vhbcuQ>74Hc{KBt^hBgS_{i=rDg!|WmPGf)pQ*oj_sR>U)A=I)%^`ju~v zy{W`ct)>t%i~i&T`PUQpVj!GKh4JiO5K~*Ry14iceD;Mz+-w&9>ew$@35iBmlaDX-930zQ*-VrVnMmJ~2LyVmfHbFmw)i8p-wtk3XH)3rTP zJHle62kJ(ymvnjf&>uIhXytnm!2&fD`|X+*_NmlK!U|KDm%D4aB7+S>4RD zXExkmUuanf2Ga{-t($9rS(JtRRD$>Nw{>J56yf zmic}>9bcgP-U?-S0ffuzix-o@nwTv?-7_;r)(aKn9h^c6ssF>O#F>??pRA4s!qrqN zS0uJaAL|+7N2`wi^K|J*?G#rMp-@s7(u#0zn|K4^u7vWlRqPZ*G0TprpTBl~*Q2p_ zzMVu9f+IfBU24d<0^Pq*-m0LA<)J~>rJNBe>bt@}ZD%&Y8lYBnwDpKF5<{GwQ~-Qb zGRR>Vrr!o}*iz_Zq4kkbV6L4N^6%a>mv$FRBks&%DggheoX21gRmij7S*O1&D!D%5 z-Lr4X$h!>x=(YT*cze0W3k|WLHq407+7qXfK3oibg!@oe-1e7h-Vziut8QrQr4hqf(fY8?-M)P3K5shDkPi@Xc}E zRDo!B;xdmPS5f=@>kA@Lw4JNrFAX3+Y7e8){sc?9;xV%iq$= z3Mk(jNK}A+q0YC8Rhnvf6p#c7CcoPVy@UX}a=XDD#&Qkf8x4>33{qF?#v`DY`qt>z4v}*8fb(LL;?BL>9Gy>7kwCLA zso;Czc&-k^Z*-8}f0IZJ3Ep?Xq+EBKjSZ(?idVy~SRkK_biVZ4zEJya5rS}U>!jQU zZRQB2zotcNI1D640u^6b$?QoRY}f_(IvOlK0dZ+43wPaiS=S7lh?0W)gLOi_Y=&he zCwc$3Ea`7?rt1o<0X|v^=u1?m2?FSx7s>Gx=)M)dUS5I3y!K=)Z74Up+lZ=L;Q5l4 zedrbA5>eaQt^*tGLJZu8TS#E%?%SH0FuwQBn&SLkH!!b0RmF}Gw799s?>vpzRidkt z_SA~vbgri!7!Wq{wtLVlIstJ%jZk12^)T)6AewKgI`?PYo4EDJ4s~B~Mhc(yDjA4F zuL3^Pp;#umKF~d5B?eCCDG@1uwT*qW_NOz)MeyXqK$I?c;PFT^$X}`t>NQsd5P}J0L&CB&g(REa=1b4DH;I9&hUq2N9~| zhvNWP`dEJG!yu&zQ(!(bajT*{uCxS=i!m0*G+M|jTJqOMjG^P6BALY*$k9THQUU9O zDL48f^EC{q@E?M^Rqc2q>snTh9#hmx;jkAD)v8JM(*fW!(~2iORTmmlRDl!`(oPrT zpB)4Y7@@|;G6-?Dp5n;2Mh_rAX2q9xKA&HgCwd3u<+irH4$VfonB)4EQYcucdHVJE za}0o9W)1m}Nx$hI0-endRzdCIKdGBD8@uYOof@FF7y5_HRA)fAoOaXm2c`78x9G;d zekoegNJy|*HLY_!2>E!-#a`M_E`ZO>9c0$6>m&h4V5+>Hhj+u_xz&-|YT$a`V4tvm zAGhs5Pyl_;Jbn;(8&P<3!d;qN_c7$-f5#W--jf26mmqrRt6jb!ISuG#p0tmb&+vc1 z9*LlnAgf#{G-!eC!bS?j%Z|3Ei|84{-Us+tDEdY>yZq#z(8wb4LiC|-uvRJ*-^{oP zL~3nkl8Y3fK>PeGTuhx#o-UQEdmcOpA~i{LNVd4g;sV$fulhBjk#8JdXh3|5y1W+4 z@S7MrWx|&gXju3V5cS(%Y)L0CV!yA=@qdN~y`N!4EE{3WW0g8}6v4m4m*7le_yU_# zdRn7RIJ%O3G+vDo4*0@~WP{5xjK3f5WHeSRc;L3c8#k%?@NGe{9k2G(!xqXO9;h2D z{TnSf5;eqE>xvEKRdu~2aV0-#TO5JYy|c|myMyN}fDVk6A9mdIabSRUALDK^#Js#z z`bWm`uM%HQs6iL}jKr_qaDacTGSDGlmYl1fr8XR^nAneN66CBy2M-*%?>l!7=e1Cq zy+CpEt(tYd8nqq~BD(9_o{`8FF}H9m9?nPnlvaRHfl?q(1nx^%^TW1``yki$c?i#( zhRKT~_)wcHu6gM#JtFtwv4TxBdw_mnZATWGIFYiv!^Vh4gel1Vtu7xG8W-pz!LsaO z$}N^zSG_kL_e?sA zg{hl&qc%g6AB0onk>%8~S~jMttpqn?{_8zktbFeVN69cz0Cin7pJ(I-=w)Z|Mi;Z0 zN#XxtI_BU_CoSSAuHbd#Dy!10G4~y2fulGN;A5BV>WO(;OlLk6Y=>+WyZC%?i@41% zxl1usyg@CN<+5-L_{?rnMJ=%f`5T9IE+l15Tv|<8HcJ6TZw>wA@04d_Sa_r&2$!6- zIgtGeJ55hxn?Z6ir@5=d$RfOO#S8XZi?ZgCd2SRazuG)#1G7%rX8t_%+@adfRU!c~ zR7lSetG(vExO)GIOGKIG9V?UaQ_j4y_Aj|yXew25l1HHPimGP^k**ZbW;I3 zII_(=f~a0#o~S1^@JwKjT+GogoxLpO=eQbh%!4_Hjr4)_!7=n_)aQUuNc5AHzIoM& zbnfj{+cw%bB1W*?r4y*{K*M>2a?T;cj*J4PlC(gqK)Oq@x-V&y6s&5 zz$=JvS}4#Dj8bjGj9IZ^O4bvTkp827XaPlb{+o~^Pd}enJD`^n;d5&|tt3-gPmYRA z8n?$6(_~3oWS3SsK3ryqn!jk&pzArOFtSP87jv;0{d)gY%j~|FvQ9M(h|vkS=zVmb zZ5pOOtAJik*%*ZwAt5}w#F+>;&*Sx4FqA4iy@H)H?tRD&Q#{igGZ0Q$maj>DObKuC zC$tEhw(X5gl)dmr2c>EKdtH;nZk{OUI>Q<4=a`v;@q9kDrG&7-qnEo{R5FvGq?FO= zNS7AWMSoB+%?gwdeBOE}^(CQLcder^JQ*Gpe2NK&>Xw2V7wN?IzUF zCp{~wI$Py<7_eWNhu0k*2#OUmnd#rv|2;|o{&5k1hpm42L#ZC6H7QQpAo#(lK!-NT zee$O^)?tWb{E7q%s0Ww#`}1flqPD`gnL&3d**aDmf~cfbO5QF#b}Gf&KPm?~AV032 z)yfZzrQJ4E4Xcod3a0}n7;)9=d!LdOx>Pn&`VDI^fR8(&jQf?wpQx;~RX|OtIeBu6 zc?6CW%!i2Uk)# zFUDB){VpHw8STq&3BLBi-^~UXe?j@xs9L~abze4-o1J`NpizR~xp@kg<)@{<3)-W9TWOC@T$GMj_hN#?A_ zp?FdHA1!VPcTGjaF{rF;F^8GeJRY2_y&#3%T#-!GempN~{pjOxHGYjUR z5PPr{lmI;!ki?0-$)j_yt4{= z-se9%V8l;2^h>;!n%4O7_wp7@M6q}H#MNKen&F$cPx+DvP&fW7)yh`{LgiIz`%@Er z=)0JPhNYpaK@2ghI4tZ8SjqrtpkD+KTSQR(losSEBGe~tV|O3DNT2`EebhS_6=n9# zWRHH30DUh&z_1#hbY{-PmJTJhp^0O(TzN_1K5w!ZI8 z4Q!NXaVB@u*tKUA;C>9jKfWnPs71Ya*aP(yFj%kXJSBKMn`6FIP`H$y>xwX4Tayl* zgX{eol$9Q33%Y+6uytGhy?;$O?xJG@ALE>9n05HCW>?oW-# z=z{VaZb--^--9M$;y`@$M+o_G?=y;P?D>uG^2J9aiGqckPTD1@rh=cFhV)^Ofj$tJ z%ALHGb{d!&7tN4cU_K7I`Dw?nk-+49J9ddi_2@XR0P^MNS~RV;p{%>JR#)o&Dc9hom3gY<(&!xO6!xW(cn$KTb1t2;{G;n`RzFY9EdA;`YDZq%guL5j1+#F zKE?r?g$;2Z3_clbzIJ6Y79|@U35YA?2S=Qg0qZDx%3gPNBBhe-i486m^Hk;0*;WnT z|5r396zBt?eztU*&e!ixybmZ<=E{n-Lr!&?q3Xu&{b~gfaX~f#PgvZy)6e2pc>4)S!#saA1Ftnd#Afg`X$$o8!-88B|`w8MRH&mhBFx@wEbIEjh~h) z`fLMBs;+i*)5{{ybRYVsfR@eI!|kMv-fBQP!XAjn6$H?*GJq z%OwuNec&+#B&R=j%jB%r(kcDu51+_+k4p;jL6g{!(b&)s?E`d(s{OFQ#|tqs^KrRa ztdzM$#*BD2=o58lSo2fG?tK5&8UXkwYJlJE;A@ZY;;*;cQ)2hz7oJI%`W@%65Cf98 z%C}s83v_Nobazkw9PL)IYdS$C>Xs?8;FCea-8h-zpEss*X4*0ZA9o->G42IVNv%Ax z-l13KeD`WzW*6<22iz906&EO)=d~6-IU*oGu`vrP6Nu_Gwadba+_fWJUdpG7lQ!Shj=t@X4s*MJ3;d&L@P_>ytMgg zBrG8`H{w?F+8EIH0nU!DMtXlh%X;{lr<3M=w0WFxBIK?svNYeW+K73UAbu>D=)4l zFzf?33FK)`^or7d2YW2<)TnNDW>Q!qUotK2Uw!M)+-s%ZltJg~B>1#$lNncnt^>a# z(U+;zl}wKWV?vzgxuLjJ9_O}qs)Pf2B}9>z1x`~Ty=B(ehKhcPx&7ToUR4qv5o2?L zGZ-9Kqsjv6Dpe#wV|5x&*%zb45zCIee)L;f(g7lSh`AQW`7B`{fgokA;VfH09!=n3g7P^68#0=nLhH{|1hj6!>XsqsK33xF z=$-c}aJySD_9Elb+Zt0vqz$!e82)d(bxCLZiy9s1T%;tJ+=(1+&&;sDI@_&YKCU>O z%wK3(+@Ezx1g5WJz2C0qfVh$fzpr3}AT$V0s2np2cagT1cz($wldx>C42#d9{dRl6 z2j-uoP;9=GLVybVSb)KffB#ziS>;19YE0dl*@pI|uUg9~KL{t>abFzPhz>Thn0m<~ zJFMroX|-moCKBb`_PfcGOJ$`4sE1^T=eIXz&OO!_qC|!Vabmv|+s*jmqhQ&eerTj> z^3f=i!1|D^pYA7{I@j|hG96sN7veoQD~XH1cF8$1%0Z|<=BSbJSdCMxST4{^$~{RnFZo|ONHZ|h!@aK^R;OC)f2~?Tp4?~lz*&$(HRGdp@o%128%bQ)E+yI=kT&m$uV+oGR61!Q4Pq=f6dX zVIaOfd?L$QUS+#z-cQ@^G$9ki!Wv_cSO&%gePEtxVj;y4Ks}^qoA#aY8Ncgp8u`5O z$2~c8X^|N{&L>;5G>_kPu(B680P9Wqak|FKQmvZvS|4pm|qTkD0e)rzP)MjnIAPL#*>-#z1i!&Whv` zLKS@;ULF2kWTX}ydOY4FIFlAJMp*LHnksdx0(xbMQQboc4`oAKdp*0iS#56G0vEzB zPgb&>2eR-MCQT7s0iR_h)Genirt`KX2E}Cy2A$5d^I!#4GfeIxT;l}Om~Dz)b+Wttp)zU=JoNbo^q*WMoM>wBK$I-s>f|>X+85mix93KlzaKd}XjWuZ z{21^bKwrrt+VqlkLT7BzDzX_Dk8b{JacccDugFXP18?M}&2CGn8PF@Q@I%zHXpUg- za=|zOTQ{*-xwgMTl$81ymzMCdIz<%{R1f*wh~M3GQg#%g>f5ykF2d%W(N_sfSeOWf zW#OGL2YdZAfL{4=1{RTVUA0_G9UpepgE0{s67l0|y`$QQ8b_V)_>3_Zp!|+w=8Uu6 z{@ZQbuD+L4Xd?V7Y(>wLz%Aug3wDOypBX6$%t-|zy2(v^otvo!|BJukezgxP%C;M= z6liNi+<(`0HXpn0f&NoSne*gF$D+25E!AvrBu#k`^`8CEn4Twf|9d;j`MKwfQYBb0ld7bibyYry`k_k`!W3eePChZOc z`dL|e%jkzOhS+VWm$QS~Ci%Yssz&q;*zCuGuKnT)cvk##pbu2E`Uzm)6DzsF0hN^LDcxR|Dbi9g=w?Gm=B1TY;m?n_0N zrkx!@&(kXXN;iXvR1Iz0MJIKI~d9itH*EqXRIe(Y%WH^cFuHVCXk;hsk0{p`+$&uMWVyrZpyD5wtasa1vn}w3g<=felz)RZhtcM+>y$g9Dus15rlSF zL9(|W1azF&Ip|@bvIIz3^H9?tjyKK>NhVzV#RTDqpupWrg>?*3J2tDUd2|>e)18q# zPD@XfyFPt}DSc`P1AJDKYYcdeR+u%F_PZ{;$X~B+QX%<=@=WS0gmN+Oed&-$2;`?` zT(LW-N<~fkRjpc^*8ImD&0J%1cv}A9jYN>JKSJ8A4~Q=?-+%tPUHsZ}`%gaVMT^LN z<_fX`yCLl@uD6QUH{s}Dz(2LH9tPnl-`qb8M=a$59T)ab0jXL65V%9f+&JM~`6h$&KGd#$K1JzIFM0?@U3vJU~4( zemi`kX6`$V5V%$x8vnsv>fj=d#)jCo84!L*anpOaUJLl53D22f)=Rrmq{mvS@?9@= zSy`u(0+NS|C1m40f@YUr<^qHx@2{i&BcTLs5yuosvFDGT)Ic2Q4tH{1y!S_Qb=K_M z0E82C_J zOp=|bjxKMwh?Zo6&fZsxARhHjT<8ylS)WgaTZOouN^fr!M1^Pt`*M|Kl=ZGYX zTprAAaSN$0XWa+$Hd@Dl`m1Qy0vQZK2C$B_X1qzsNmFr^<1}Xb@>?@+V(LO8JXuh~ znZR_?Mw)u>f!1Jxayv(dr&lH0;c=(x=%@q8W}t3^~Dpbo63(rU1JL&pzpGD zJ{yJBb%qp`?bcWFr$+}ja!Dl&Cx`M#EGcr_FL_fMBy^4hosXwlX2pM)ZRALJJA@LqF4u3-RtdZv`9BxK$X z?aFlBpNHZOF?@aRXBkR)zHs&%dY4+~qJq9>(z7l!FOzi`_nD##44Q7Nb*l-T>+#Lw z;tw3NQco>82)P5+rC#3PS|RP+WpTTa2CCLW_7i00$S`-xHM~6h^4_cUtfL?>5A;eI zzdNycUbK*+p+#&hbnEbij1Njm^iZq_OEly5XYrl_zUcLo7mjTQA{S^>uuEC#EGm$| zW;W(glgKu`5m?T7UVVoJeAerEmuF+yl1LX{<)VhR??_b@NPm@{Rfj`G!*&+oVf$JO ztPj2M6IgH9+s34>hjsZbYmVAj$OvyDx%7u^O$;T9*`WxxfdEW5wGE2 zm1_0mv=?hgnpSkI`CE?da?ZFw_c?lhqaCWV?B(++t`EJ?xGADwvY#d^zgpK>E4vzz zTNp5b&gTvey^;h4XIA0)3@#mM)voeR;HH z$JO-uDaOlI{g&228w~YWo~nB-?dWaC{UAaGpt+% zoTIJNhEkP!**XZ-7mPrBH4(<`Xu13_I42KfDD+BgIb-@gYZVrd3^8&w(fQZXra;{c z!DEL$A+p2P7U$YR@P;s68Uzmo79ntyYHiy%92tJxUI01_Jy91s<)uFv=_*L+xagR% zV}E7TUtEF|lvd$cDC$@Ci3ITl2Yxos6M|z4-1fdA%ywgGuq-O2YPS~iOvzRK6pGZk z3FK#3{i_Snev!p0G$LR-wCI!9D}19~Mt3hE&6dJdt;+Gz7NFPguh9%y?wMgR?pvn% z0fF03wZD=Vr2CC^-*Yk&-INY|Vt~Ff5*4aslpDy-XsEpl5>~x$pK$BQ| z3gA#2p#XFk?HYj%7H`Z=hzcguC@*rBI=HrW6Xt>ax@a`yAJ?~W2AxAOhUtlPqTA+t zGiq%2ACmVnh5kSn=YL03AHkpJ|KeAh-vQ_~MzGsJwwjF;-ffhh-hWh41%)#hO;w5H%}##cQq&UEx%H=`t{g=EHL<1bwe-{G0PB z@PO=YXYZe1f!fNHBFyP#o|3u7NS9y zDr9JxNrt!PbwH8Y2jGiIbSsHg1?>;71G8oPbCs+R_e#H{O+K8ymFq-?y4E0bQeY05 zJQD3}ET~Txm(gnsj<~|G_aOf?Zvf#0zT~nk;b(OC41CZ_x=!j5iH1qF?V4^Nf8FBaEQ&cc z0s6qy)q+gwR!+X8p3KMgbU0+bpU&o?J*pi>yRq_tb1BJm0H~W8rmFbdX52|^WB=eM z-kfQDEHg%Ae$1Fx4y|nu^MofaW?-(FB_r#S;qv_i`xPZnR`nLj*A>sJgrG9jWJY7B zZph2I4LXl*&cf1Wj+Q49LT78i<-e>xnCp!2QDpO$G)NQak+XoN2MOe7Zu;h0m76vP zHhDPYK7dBR3^M_fDK*L)R=78lab5 zi@W+wF|$B7au3wQ{8qG?v9pxuPa7JSs;86xu;&!mMwsU6qK7K@fN*-CQxZ@Q^IH>c zKMztmLeyW`qfD&pXO+)-mi(y1!us()U6Gu5#X;x)EOMvvVZ4$d#ZjLe>{@nU0h6hZKe3A~kqRO7U$A#R8di~hqDC9g zSC*cuo3;LFMU!9_!%dMA^*CZp4}ygQdw&Zq_^&TxiN1r*w_8To4chx0#jCV7A!k}~ zmUY{|bS!N9gG`jXT&9Ks~I`Fofjy=H-aE zmJdET)CR16V-%uwAs=avL&+lZdf$}p0Oqq5`ag;H^gGD8k1Gt40eMjt?|H+HfI&y7 zBMT|L+1s^yXuuaM)gii*EDQAK4+3`wGx8Twc3#}c%EWseGyKHVmZiNzfdd3rYx2lBI4I;bwWC9Yqu>@#i2b^N93 z1eT>=5naWhhdETrX&R@32jpjCeu2yib&4Hr_qXjyM0wA-RF>zPRY&k*(4vgfHRpl% z9l&SfdONnI9o<)%u<*sGj4o~6@y9etT@#z0g>k?LPZ5GZ4Ny0mz!qJSz=h_7`^2UqI9=zBCjvV%*C z(~F#3d4RsRbvZ#|;gSNcOagcJ{T${hIHmaAUN>suMULo7+Y(Vd0~6>2+v3e|HD?$8 z3mh%p_5G1J$Dvw^FtmFAK8-b1dK$i02xL&)L9)*qwH*N&m+YdoE@FM-0e&NV8}@R8 z_Bh)Va~-?BD}XO{$p0NzTJxLx8Lw>u(K3B*t&MY22qTRJ6Di1EA@1k0_ZfsEOXWNy zf#BFIleuq6Cu4cjh}JG_2c$hNJzZIxE0q@FyJrERDWWd0srijwUtI(Ev&T6PevwNtJ*D_`} zEXNc~K!*dl9xo{>KG_XMl=$BA+DrIw?E9FGgLzA-;l~2+KWeFSz-I@KW{ts?!^5@k zl4WVGfrxmSmH$8(9Hs&9(3hsDB)r-| zc%;NW3d9&jP~5KctY~TYk}v7sJ5nD|l8{E6jIR9k)%keG=yME*o~+bCaYv=LCkkAd z=ft16ln3z;8DvxkJkYx&n?jWh5-GZ4o8XS zz}*{J>VXZ+E_gVu;Q(Te6_m7FW5MP{ zwp>tsok$3JEa*GaG@f>CKIuEp3x$IxrXML1YNN$2zb7MB!GoSdo#XB>FzgBZ(xTd1q@8ZhBpCr1^J~jb5oCGT$oA9bI+9FnMiVf*lvbxV81<$RPoI{ds z=l*^`K!WxYouoK?=#W!BxqiB(!&SI|Sq6`b5&qh1z(A|l-%2|_(NAd($*X~!cno_qU+W?q>lbp%K0 zr?~DhdyQ~W+u;-7Zv~BsK4e9kD&$DhjSkz1#9|YD*5sFD^{Aph)wir@;O-u2Cf2pDBppXf)f&u zpL5?ekKHt8^ctL}eORmoW9K}T$zkN$kAR36Vh;D+R0+^^)p;WKbf>r=Lp%ba9RXG| zLy6lwTYPb3rkEW+Zk54z0yZbqU? z%T+~bdzc$cF;7uA&_!yd*`QSE)hnrqQjY^2>&m@g_wfu1u-Q`4>Si zBdve3n$6RgL!!;=C=krX!1{3SkIYnW->-ged22OIrG`l~_*FQ+Al2I^&_ou=A@|Ak z9?;>wyq^3fGXCounx7svqhGyu-T})uMgi~{fm2=2velx%MWDVOv0GIH6KGq*+!W*y zM8v^wxFU?yE%4_qOoBMG_z%f)!hp}7bS*0=8lU}OFF>X@JR zWf%{W!~^+xO0*&(fBCv+z&GdObr23shyMsxwz$;lE4$uMm&>a}bp_<-DHEk+r3Q|I zD)x{Wf}iUj#X~QJh&ubxNmRBG(@?RdfCS7dPp!*;3L#)|#DSrX#%fpjU|&s+AaKBV zQ+jpKQ-i{*an3+|^(^VByW#C^6uDOakEN@Os;k+d6!+p3r?|Tpcc(zH;?M%c-QC^Y z-QBe~6nA%bcYoho=ic90N%l@A=iHf0&KdT)M4{!Pzc`t*1sb0aTG;m$Q~}UAr&D*n zB}skSf=$ya!~j>@pN=-J7gcug0P>+KAsZtU+jd=$zWu_kY0C^^a6WC*2~9^8Iz!pJ|H=WLooCg#3_bCA@cfVBMcl2+ zQq`y~C52(J-W$2QQa*_2w}Swloo91pAIsWSxvgzoRpN&T4;3Ig6E>R&p;uO-n&ZUCm|V9^sMgl&YoLAW_(btKhP$hjEAF zY^af}_=0g8bQ(ea)mnTndLIdpkk(uvPnNJRb7Z1t)n=37x&GERnd}&O2KxW&x)fjD zm)WRH8YRQ5)p~ZBf!!p7DCYGZop|9erAZzwjtA&-GjOviv8EyB;H0hgo-R(hf;m9# z2PcbU-+hZmd&pU{`vvgl_RHT;LMYS=Crw}~`x1hr@biIgSZK`8PZH@!Xwg|K!Jy~9 zn?ubG+N9=$mR*pPAv*J(EKEoXJwa9RlRvTaQR`LXFcL^#dF_v$FX!$Xcz*^Cg}`I@ zG`7gx72wXXr`(A6>Z!+9LH97+W~=f3HYH^4atQ2`rr6gM4jGCf5xd)D%U8l-* z-T|K7j{Wt!oz#eq3nWZJ7$?pUhG8GBj(Exs-fFo7I5F1ot{ZeHajsUsSVw)le@)5j5VHWOkffKR z2g$Y}K_JAK2$T#PxuG?p@v!YZ}2B z)>mZD;$i>zNp-Eg2Wzo*7x_vQ8{MwVVfz4m9tfA#9`zOjSk*z}n$$?tkj{%Me?mi$5>PzgGXz~RN?t+eKKAl>UR-vsP>lp#jh z=*wL~`Seg+63Hv-Rk{j2+w~ycD>9#d9vXPx0Jbf_JQhqV#WCGlTq?4CTQ0X0H3*k_ zWwmVZzD$*xc9Rs#C~gA$c^oD`zl<>2mSVp3b=wdf+Fme2-P*C}Hm4wpVMWB{->8E8 zd!t4^hB=MKY~#@(v?@nDFsBgZEcrrtd}nlLntJSIf(EQ3PpGYEFb_CWLMm&?3>&k8 z34VK=WlXWxii1eB1YUFkG*JJzrydKXFkOX&YHPi+H0_L2>P(licw^>I&rqITnS4|O z=p3Nldqzmxf`=bN;v^u?6S`3GpbZh4=49V^?44|ygk}7<)e{Nu;@MdRQ$pL<%lgHd zk3Ka|fjim?wpXGrgBTCxyeTI;Bw7>5Q!l7(jdN{WH_t8ZeGD#<))SLlqlL3=w{=XL z)!Ka`I4DqmotM0phpO9;c0=_0OcMBs7)gw;sO-}oT1vbXGYIHa?Qge0UU?a-gf)6; z60ZtOw@spt#k0c|z-89Mr7YGeSn{M!arI#X0r4{BTJ1#6+J4gJ1D#Okor$;*WA2`qH1+di^lX3XW-Hzw zXe*eVXRV=ix2-!Vh9&~?!0RJ-l|j#(d)oMHKcy&;>Yq1Q<-;8*8j;zIK~18W(mCz7)OoJiQ;gGmekWRBmPTfL$L_g)bxn_h{kU ztUvqs`?+x0{vL@_?>)gO$*17vXTyqs;^remGnY({-Z)2&*)CFdcTx4F;=7B2Qq6B0 zhp^lE4+{S@x$L`W zW3IJ+`?`7^B*|(s(7K*D^_#fF;nER8D#3|w69h@`^kHsSmH(7n~S=q}76c&;Y@SmHO`0r}!9 za&U1suj~9svYshsnjMZqv?T#*=6jk<&)M9Va2)(uPt!f4( zUoPiq2Ibgsj%V_g;)VUnB9L5KWnv`_l3?eV?5dE$^zrVl=1)F~kj;)4(eTG3m|upV zemLKq$6r(uajFR!HK;4CQwH#QDTL-!^43jJBv|P&&G}FffPa2)D!)Z%hmxS99Qnl$ zCE;0OV!jM(fh(8yh4Cg-Io8;d0lfGjfAXGaB^0ly;5t`P`fVaf&BN7HyV-QdWMwL` z)G~a<1*~&FG}c5h$KW6z(tNgqax)&5_lL7fj>8bwa=KG&kGjcV&|EdY`h>!ZQ`1;2 zSg4T@n8s+AwC%)ZAp{55shh(58rod zF1|zd6<6m7`Jj`21i%{rp(!=wppa&V(TYzQamMqtXUobu;lYcuji`J9V?9ZeOS(b4>?f^H6L33Y+(Ry$&S7M*i%!Kc{Hk}K4a7GJxc9cj*p z1i)Xw)T)-NK<&^&vE?2CgJZ?~Vk5UkGkf)L8D%>QfsgAmXx>P`Z5}lyl-I9?1hx@m z3trj$a1U3{b|UtAwl(~^((X7TEC5F!rSi@Ask{a|viB4$Q6&!gTYMUXvcYWrE>%jU z@O*LhD8OGJ8`$r$*S&WbPgHuA7$OyH)hKK1iP1pD8&%rSp9TzhW&qwmUcsChZv2O# z*Yw;;emv_jEbW(7)(dwEfW+ zI9!@7VIK@x0K9>PrEh8nnPcKl1(GOdYhPYpqIu0~n^>2>h?!?bBwUtP8v)~JFxX#$<(J~#e8(TPrfPcXu^y8vL$OuUj-=82S%l(nx94;NG zsCX~W#5G(A6b9*ffqD=E9y*_RlZt5(0kxet5@5ybJhd>qP;Om1pU7^*#7T!w2=EfZ zx+aD?@!GJp8#{o^`idrmr)S!kksvSm?sG42Q(Hg;n)?xAc#;PxM$3iL>?3|Q51FB^ z)$sdEpf+S6}Zx5uO(C6)klBV*MxwH9rIr2Bhyp3WlkW)u%XWjo&H= zsVg6^*+f`WvL0m&7Os)bn3xfCo)?NI+SDX2Zm9(av-U((%*>Oqd75^@{`1_7;aN)3 zlf94{c&>(OiAh0H-{yQu9S#Xan^ut4x(SJ0M0A*YCf%M>B%RZ^2i{*ob6UD#1V}Ax zL*@>GJ;o!!$_AcCpB3lO)^L-R^~ zk(!FKng2$rw8bTQO)l339)4@>SK0q_F`*st9kkyWMxf31Cg|1}ln!l;3~~KbNq{#k zK)V{%Bh~E>Pb?SiI)=m=yeC|M$Br$7+5V zksjHL8*656?zb}&@pIeu8c_dGSQ;-kj&L1wrXn&{WPGaerpQ$?jmVluhD9-up^3sBnQn3)FOc?zZQEFR?Xi2$>mC2@x{?EV5Vrpsw%Qm|*c)Lk zptk*2@yYf?YMf~Nv+ugQ^gLg^B$g7uU)b5}caAH=s04-NsNKV`62dRAo|}EfrHVX* z5QDp0Yc*^DFX8lt^O(Ewe4`R3=(GI+v_pYozE7Hav~Po!0v3x&LSX2C-Edw}HJST5 z2u0dduTw(sgF}lF!rki2_xbw8eM~1u8A?#!T)6%giL4|zJtuN$XBBLc(u=B;V7ihU zX$fTv58*cT+GhojzH}WK_bfuv%%G+N1Qm~PnoOy*j(l?kY&u-VA+T4mJYj&B@Z4Io z;7_a($J6&?w5!l!G44^TUR#CWVb^X$2$B2RBQYR-O9t-5>Ij_MgLQ_v3I-X~%(&Y0 ze-7z6zKWSLr4ms%t$_5c6AWYPxnUl?aOQnj5NXJ-N8_64{`+**Q9b}q8!9-k1n7&f zLWbKE;|OT>F-aIxEjni$*8`(CCoLD%cD^L;9u{l?y@y3~QtwcQ^TE0q4#-N<`N>rm&f_yRIAX?}es)~x zGv3zd#slyY1;cd~h=?NPOu;XGx`LXMXt6q>aTn@WoDqHB#Y-zyiVEaaROSmZSq1T8 zwb!8Qt!DPYSDJ3p6cpT9=?DwtdYM?E)C{0LNAupkJ=LaVCPpboelD~SXr-#Qrz+tg z2uNllSj1qT;-v%XP_z<_Lcy7?H5wsQliq#C-Dx-gf@-=|DtUjY(}b)XsKD{Xz694EPsfu zPIin(Sfcb9u0%n@`cuIJ{6)K~(6@$I8pvqWElfxsT;gy0Yrqp9@zWl$?jFEBM(Y@Z zaFlfIj5l~tRs}BO|Ka$e>=DBH7Z+-3WpN{^Vo-drf@T0D*Ejr20Fj(3TZTy7W~$H} zW&=^32l2eDdQ{<|8!fao4h4jF`y;CVlc2tW?RC!&u#;w4Zl2}qaUN0P<~v72#@r|w zHUMu-2zATHci|15v5wFED@WvR2xkH~jaGbGYVs;G>_tXJ+kjjwI~=X-zkjEGp4|B4 zwrT6Nv)cZ+Wbf8qxzR*)sy6~^-~irOWqLVhX9frYvtMQeR7xHv(U@kw^LL}_tY4k= zb9-;ioB;k}4gI+JO6y4;;xHcE)$#UTVqh?_-r4%lwiziT%VZCqLH%p7f4*AL1lGFG z1woM?EB|0HT1gzjD{Qy;ozY|QQYeMjTMguMY#&DmHaF3Z#GmbQ3iuRFqQ+gV8ime> zEagEOkF(!0)#CtvaX&W8tTOJ`gLNr6n!(38U(UoD)lWBWEU%r6YFh(hlTQHu;=&ap z(VQN@;!|jG51{i|ZUWN@<~v|W@#GPcbz5U5DzHHMdhLq4I9PV1h25<&Rm_V;EwYtjSJzKv2jR)+;qZ6d?dkFK}m5R)s&4}3Y z+N_-T3_JV%nH{{>haN~;2KDtNIA{@~PJAt`LBp>1R4dRr4CL|cydT155;(25ZQQQz zH39G@xRA%2W-N0qBNEdzcN$N+X5RH!Zyo=sH~UWaerMxAEC{Ug1fPp@32!Ox^DmI7 zbNZgYIJ?~iAK~!X{#;<9y`-1OSV@5Ny-w>}JmvKFd*d((6nii@ilNNScf0A)9t6Yd zY!5&3P6PEIk>NRWfHXzF>hDqfsREC-EfjUB4G(HKpofWXb4XWXi<+X`BA z;LB$8C!*eekPm@;Ny2W)KqZ|m>emV_(AN}Vw=cvjZiwfd=ywqNqm7&5u>$I^OCr7a z>u4|%kv5LPiQ(?diP~r-n4}2_TcBF*juSExqxb`eTM~J4?rC_JHv09<Ge5cg3U z<1vxY{bGGH4BDkHM5PmeBZ(p*Ke<{oy}EP#6|q=C+VA{QoLF|in6sIPyoB(p!9+Aj zP9w}6A~6e>du=#xjvaUY=y5XGibeeDhq-6cMlIHJZaGNqPq{FbPBX+Z#dQJSH&Pjx z3|hXmhz9DLM2gS}a`lh#3;=Jklw?IP%ww_69vws6^A59govW5{kl(Vu@$xC`Te~@$%58+o|t$vs=LE8#?o z(P`*14eH}bF%_jKtOzrwSNTvGMyZ-CHsn%RcQCY{Md`0H32&Q;0o_kY#TZLf2_1*x z?+SdhA|Sv>z$KiKEOc|tOW0CY>&{2w1=aCXoQ*04mCD{Pr!10=g*B7XFCFCF8b_(M zgQ0s26&pogb^!lU*`K-n9@~eu2(1yNdY}*h9*-B#6^0Cw+uT>Q4Y-m$4Vq_^E-^fcGS^PS)QfR>yfH{7UL;*cR)Z|K5eXT!=%%i>q2oY4XP_}|d`-9F z;1@DY)K(4e7@T$;{Suzl8zPm()9- zuDlWX<{6BbKO@yY>ZGw6tw7!KASQO_`V+#1yba(vqjo=t`;LdLq6rcSj_!AR`CFR4 zAk{%Rp0q+%|IvY(+c;1+G6m$mOox4;Dw19 z$t?R2^=W_}{b>mqI6dbdVw&{o^(GHF#C;>7C;z$;o(K9LpXHcBl^cZD@k)Pr1 zLaIh}8`Fb>?kQzYxuB^(IgLvMaxSoXq4B#=w|3VHb7l4O?Jr8b87Nb`0{msq^^bXP zhEBrWku?_}^j~0A*)xal!Ey>@b*-$9ck*m z0iyE^;5iT0Z`zrS)!$x5<-e~8&|;)V;D-1*1sjByo0DH_CkZH#f#h(BcW;TOa()t2 zMk(LFeNJ$G;`Y<{%HvVTj+~84T^qFwp>miK<037+?O3JXf zo|#e)fwW6Om`Y)H+*1PI?4G)-13ad{ia1k1`-%DVNtLu}GS}G#)>)ntx=wf^|0oDE z`V`QaR7l${0~|L%`;+-UDJ2n)eWhe~Jmv|(XJ;KW_1_8F#rsUlQIB#sOo@5ufjr1R zpU(luKM3px|5+(aiPGvc2{x&rEy?T@FD)0401ci6>bowG=`8U2mAI=&IV$87fxB}J z{bkYP$Gn}%Unw3#7ZlhR0su#W)`w)=yMyVS)#gz-GCG`CFigtnC)p=c2WQ1U|KcW% z@c~{6B9U3er*p9IDPNh|ZlBQ9(NH;+6VAGXi82)>WFu^5LHEWA(gRQKr8ifi!OjS1 z)~UN0Zbw6pUZbu5xpy0B8TG(mmI62m3OPQ+nYR_m!gTf5J_RPTBz)5i_`6nI{>jEd zK&zFyU=@gOK~af^K&Qa@{fiD`J2_td8+^=xWAXSEqr{i0h9vBKWzhe(f&(*@!pi!g zABBxYdqlHn!N$o^)1#{r=oyHl!!=>WM-zbELX?Ynx3Q}4ZE1^e3uT(VGc6cALdC~# z)S0Qq%N60+J_H~+nRMIxlcke+=@~@ZI{epv-CQ2-flxRkIZeops(h7Jpnl~-^%voA zvJbbaQ(^+>eap6LM~HVHXFV<4EMcB_!WrpeP<<}6PD))AKvX8qR43dVMuI6h3Fc>m zSMBYzn?4J%M$EkP0rIp6dErHV_C~mz;Z*Ed6q(ubX9caEu@+I}gYLk`yRgLNKOk;J zd{VGWBn#IPn z2~d8O+IU}W=&uhKGf!_;4+MBFilGhw8@AlCT&6~>NR+pN(%O&aI+SGlNbOD>|B4sBiyI;? zz9Dp9H_M<=;{CBwyzU5SD!}G{-|ZCK6HinHcqu-u>X}SanKi(!xK>Cu@Z`>%F?_J* zd2J@$e4lY;6v>eQaFjs%Y;WS|?7$CyF#q>Le#RZSs*alM(}=-tKzpF`Ww(L^c+Ql- z-zwx)3ds8@CHq>qDKj`C%X>ZZ>R!dA8eB{(xF@EA`T$Dk{<41WS^XW!b;&Sn@VzW+uV>sKAP^BTJ>JfydLLlJ> zor9IU(v1yzy}+Fr!g+k-an4uqHWLkHQofArPl3PpXkzmS1mac-RfL;7XcHS8G*w!V ze<*yD=#jF+@h1`rOv4T~>G*W`9>~8^4NseIXi9NcoK8-q)7~Wc>y7g*Bw5@@0R#Cn zvq1|VbU-}HI5IUC`f8{Ok4wqH={p;rerG0XXc1l0DQy=TAoG4$ego{5b-UTe$96%R z>zxk|xNPJ&5w#L;^?0Ben4j6E&?55Rf#zwIttk@TM-l29`;3A4|4Ya@&dSzyEGNRT zML_D@(`%mp9t*^yToZrMtjmo%Ag%+ke{!=vgqx+e@+!oYA_1&Vy>82gr2^ouTyqR; zKWW!F?RxFkJ0aU0%%cJggVSI%&r4*T;g4@j(4an)3c(tFvk}`8*B>Q9{q2p$Dg^|_ ze;st&`#(ip|E8&>N;!7-P?feV?7-TOU%F>t`qcJ;Zo5%*T`5FPhT)*| ztxB4L1hYd|c0pvvwN39%cGG%sNFjzNy#~ME4tlI!jq9Mf`<0Hj60bFB>G9*LLqq$n z)uM!Y8@|H^@#P7}5P~I=^X@|cf0c7|=rrQYk`)vuE9V5>t9JI-{c0MVP!vC8V;NVa z_PlKYyp@ZZJ9jEGk4uCuB?nnhL1sm?p39NM)wNQhj9jY{>%{jUxtHM35bu((nv%BH z!NXssXA&n!E{BR_oi-k12JTD{oS?bqRSJl|=henoKYFSl>Ee*Ks^j%Oy(?a==Dw9r zV&Kw0nZ))Z&ggzYm^?elnD7-TX#G1W5%EAHR0Ln z)Qj?y#usC#KLQ-U`mKsXO=xVG_Csd1mbxl6C^uT5Mw1;U3JZ!mf0xmmD0mYC^0ca~ zf*oDbhFMq`(z+jFO5NTj?$xb*-GibysTv%?*5wThB=`O)AG~K`#%@$e*<`YT1*PJ7 zQTmH(`pxZr&Rp`ZLrgfpziI^W%??$SPE_k5HP%m%sY%}9d{&<$*)`kvGAqEsE=ni> z|EkSL2$#8J9*r8sMnwFym%b^Zu~i(6zL35lZMrJP`nh}p;!z#SvgTV){oeghxWlf2 zqphuGfZfmK&j6nE$K#M>Q*NV;`alRFrP71U z@J^Y&g=p=$4O-l)4CNb3SyV2}GxEz*3WDBaYjU;XzrHMu^?8@IVe%lm7-p-}niR7Y zn|2vkLbRO`t$^+w*EB)Z5!PSDB+q`TvD zyfa^c3vXLMly5}otUjj&;JH?hw^W6bh*qhe9V|p=Sw16&B>x@l(vep$WVr!f$l)3^ z*RR%6%(eE$>NtsaCgDmElel>he!$kS!GRQ&`EJftZr@oH@UJ#Z*TuYU!Q)ck?!}=btN-5l4X;oDBB+X&3*mjKS@;0;s()aOP&E@t7oQKI= zcLf&-V-_P8M^5!?M=>EaMz=fWd80Xy2X)X=;|=8Rzq!h|2DYaZ5_^-QvsG?+CzFFc zlD6V}B>F-9z;((ujAP_G2ReUhj0VD!(0M-(tql{x{6o&<+cTi%R4O0@>V2J2$5~=6 z?Bv4e^IE;?*HcZd&t9?l$WnU*oQP5>aH}=A055g_Br7mquPdh^a4Em;)_a;?ZLdBs zyRn^3od<&n7NF_20eI`S8R*K08FRUPm*I1a3*S4DD`YDxIdO(7NmO@mvO@8H066Li zer}1Pe>DBjXn~D4?F~nZ03^ruTMV1U?Ox6@R_xWz0OK>I z_jwTXD@1d4(J3nLF0TVUP+#hKJezHD14yFzJk&h&tecIFDa}4fkHIW=Uy3q|N}~@l z0qeKk*pjjZ({JKr)3oR=^)#%x)kl^vpr{3_T^Zu%i=o*BG)OL0C>I}!twpy0+rvpJ za<^L^k(hy)G_pB-*4T5=J`fG`9isZOxjOQe%UsrMXI2^nt;HI<6Ic}P!V3J#E)q}d z$TxQ{kiHJ}C8x_IDPHXFSxy1uV}GyJSzQi3*sO5J9jk;b_YXk*i48_A7+ctw_uEzc zieYp|II}#d63BB7ga2*vw0z}*9Nx$Q^fma7A(LNmLfk(mnLfYrkr>&URZ7_T+qj(O zfV+i26beuP{59maakRLO+M3l=>+s1weDMu(Wx0!+u6&cQMbm&@?J$S|@}S}7^n!YM zF1V-rS5}YeQS>2F1C1I+@5uStOaMmLkfehS5Z^|$jWac+FYp`SznZ9RRE-A3M`6Br zkKO$@+X5~P0Y&uo5G2QuO(o%Z+`3VmF_F1+U11h{+D<+_=3RO8B|wZ)xF-h;z|mME z*=UfdPn>n#^C=B84k^+BzR=&wd4-JBS0WpGyVpAjz|q*KPF0GaqkY_6~Rz*)R?q%T5=;fV^+~5T%MZjV%$#;coNg_m1S z!s_Q`1n|;iF;%7B+ z^(M-oi#z)*;w*7TdQ|%h4AEX(@KT(gA9r4` z*zS{*YQ{{!$yBC~Wmr8bvZ!7jeb`wEqmmn_|GAAAd`2Q=bbBtZn=1rME^W8J3$ph* zVpJ+;`uk3PrDA>x5Z^X6vgM+AnO^htB|#2O#?eTp!_rKqbDhM^zc9t$gT_tLfVj02 zp)_5*q2@9WPWX;4p`k~CEtS|f)F1Jpd{5fh(99jU1@N|OQh2KgB;!S{RVPR>wj^fl zF8c2NDf(~lxQ+<9Bwz8G3lQJ-e&rYa_Z1Hdu;PWZFM6qj%WAGg`dczF!YgG0Qqt^6 zp!s_3Ge2@gh;+0m3lRL+>AL=^(C{uP4m%lU`(2%0E-#*Zfcl@??|l>JqXa5PCCjVo zJY|xHYN8`gl&os79(D5Bj1Ex`xIq3reP(TZzgFn%BSSKRZb$K#+`=qIfe00k*Fj1> z7TAl`1n_ncN{e#nwSLyim7+z0Q+Yxq+H~`l`~b%uo*%k06#QfW>eK4*4Yft$%!_}6 z743i`bN*3x{%m3{7sveR1-yuz^1Cl>4uH2Kjml~Idc6n9_X{4RAXPBgY8vrhcq~EB z9C(u-O-PZ6Mj*RAqpI()YeHwMhX~{XA-QBNhj}rC6?%WvzKsdaouNs9=Ja;VKhkN# zD4knj!)$>ypMZ zF)G{q>-gNbly_U=3EovYDTWhI28vG{0rom&o5sj}ZVhrOJ`5=95 zqucaLBPF4d5Wgxc;xN{GPG%?__)YXw`p2K2h8z4L0iOSkx@?^$;%@QMTThi?W|{8c z1mda2+fI%}e`xmvqeBaYzMUe;{w6rYPi?e|<pr(^wz?38FX_bN z0rKyz>rir{5#Kat=HP{l@<{EJe4_R+nKBGY3+}4x&bI}0&#RL!*27s)NTv9v!TJ(H zS6tq(_+z`S4|3i&Y0T2J#a(Yzz`ssmIB!B$a(-NMK7+@X=e%j+ z?9m0#eS=QPQ)okXlhimw*-^{g%9PK{3PH~75ra{3HXGk)YVYhp`!t>EL{4d;<8!-y zG6}do$B;7+Q43A-PTd8YG{tGe+Kz-70DqmvZy8BGn`~bAT{)VKNs-oI#=DU}bNovW zQ3raC{O;@-fx6adM{nwR`|Qq=1>Tzz!f1m6mL&YT86wjzhpQ4)5&l=?uejA6bNs?5~opuN2*?l7#v1C^lLX5x+Lm# za(TsZzfAQ$m}Lb#P!Bp2?OD5Gl`>&-SZv+?nR_c9TPUJl6eXY>8CptBNVNoj<|K4s zh3Y#ge$W5bfEe;n0ine^FafsOg4#x2GE()2ob$o035Z)4>!PcIVv?0EiE{l^_xd)>D*78!8J&9H>(Heb^@zqD z322T|H)cfyA5@fX2|z5@P!dbUp&Uv;= zuU%)I~vu^BywpxW$H;=%6926`9uMd+pTr3AB@>s0g#BEY|1m{-)JR>|`Q)STdrrw@8{3>QdBT!R|e40=Q| zx@d@Gs+IV6%MEZVFmkl_qSmEk^$_GQ#lq2@BgTr3Iv}}nbdqNJvlmsUFQLwDed;T# z2gnEFHBWkLHxHmVlW$qHF+axtd@4dCd_43! z2J*Qdcf{O==Pc#(xzup#Q)XS;VT4Kg$=j$NX9og}4c)Ie5+I-Z$r$3y)k%h0V9+dn z{}ZFNyUAnv&T5C*b^Iy<`GA&u&;=ig~-qGYk7 zMK^OEF$GAU@5l6jKGQGk4u%aH<@c8Is>qNYFqlBFdIX_F&rPIYP<;DC7r>c}4g7z% zP4_#IN@9x1}cMg%@SA8LR8jN_$9awfliY!Pmy5BN9WpQ~g4 z>t+FrQO}4qq@5gx=jk&8tDY_AEL6u=70-A@(DQQO#A#{?Un!}znP(8aA16le@69)l z+qCjj)-SL|ZlAUUbO3#WjKu0kW5nH$j3@3rKVOIXxzUa?q={ZqtSyO?7VOg_I)Ho` z)Jw|7SjwyYdNg-mBdV5#bnmQGlzT2Q6Y2A}>URu&a6V8E27Q%d{;3u_ei7oIFZ4rz z`A}6aQP#UPSX-7CTVYtUyaUZS9!!ubLAz-Xmtk@mlKy`6XYaFIovPe;j830;rmJ^| zC^)E3@Lvz@Z+`o-bWH55*o%TH=uZyJ&Psgr8v6Irmo}@zRis>ifB)V-OmDCQ+r@-j zhayAX?phijvQMq6?P)WE5cqLA3!Xstv4;r2ARCwCuFMEW5-VBaNtZO2JbsL2cdcRl zcU_qAHi4iMz%j)1b?aMWVCnYA{q?(!XUgV2QI_XL<9Dyf9b{6&)rV%#J;R|;IKE6? zw?J^$S==iqS&V=9%cA88gsF7p0zGIu@m_nNIgmrqb;(FuJ6mA5r?Bcj!^_8x5Q&}= z%>`fP_FJ=P_N0(df%P#|&S;Ffje^Lc!qwKFXY;rpgAL*D$8@SgV~b7P6Jd7|1(Lh| z;TEE&m&oU4qzxum`t>>Aew;#L*&SD~<>n&PgPnIK*Oe-D%Ye%CEbs9u z4dl!4uzGOPollPgGuFl)D#EN$-#E^f2Wt3$^=- zt=|mXxC(*zj<79Z2=*`G(&_82^n7bMw`1g@#vCZoO_!f7w_N2nm)in(83__mFA45^ zT6eA*N-@lxB9i}5%FXAVk9fD0v5d&drn16SUg~2x^yR zKNRAX<@7M!TY;W4BW)k(G?}PHM3gg-He7;lvMeEUA-I+Ii)_$UeSgbK&ip}khk4-f zx4s)6V?>_p1#6U&#imW?9?-s)jV+8LED{tJqb432Z2KYA$?pQv&(K!2=mF(92g7B*-mUqR&;5xM1WNJl~Pf*<$67auq z^b7BsWYK5Se`#?rJmO^qgXc#(Vc2iJ>%LqrZ$FF^l=lIm^r}1c%29`IO|@yE zxB+&@2nu?!ejauTx2dV>xzk{AndA|`F$vEM?uMTu zV!Ke$Bc||8&}OY^{403H_6=U5pu4QO7Ye9vZc;Ns{h*@yM#oOwUWSwE%Z~QAXns)2 z@85{vl2v3cy0Tgzec>l~$(UNCF|r;Tt~i_?I{OYg1|6L1IlT2d($a%Y$L1ipU55xY zONiiK+7nBoRV{Cr`av|9O?Z#K99?LSJof_Chcv-}E!HiyXdmolaYGeHSXyTwpJxnc;X=sqDByM+ z2m|U1I11{;Wf};EY`w}&V7wBY$)iz#e3>z&z$vtq5{KNvtCtv=P+~s1ZYA%*ig7v{ z66*L~VQv7b<1=xN(Ki?TB8hlp+)Jts@S6%L6cI$Htc2``$?fJJH1?qW-I>2bLq6Ig zT9$vm8&Fg|CCTW2`w<37%4LGeg)v@eBG6L<W9kLFnoF-Al9aM< zD)%bB`5W?6X3zt6=a=8#e-$+e?;$4npw;fMzTc6Qw2-RHQSm#>5f)wM{SE};y8z~& zB?*gM>-@fSubi{C!;p)wZOyV=h9z*fmj1P4FI)#$M+?Xbwit;g=o%=^%&e{wQM$6V zvKQYXusNjLO+CuOA&sK}UKV8G(dDd--*kf)=Vc$wVrkdhrgr! z`GJdWPa}{5*j*ymQlE?vC+W}EtNWtj(T=1QB1D@qIRr(cF#@AE!u|~UKeJR5B-Z{y z1rxJt1Z%USJ@r_Av1sr;&iq!^aP`E~dLPFD;CX22f-E&<*v�kLuwQZV0=H|l)tT9jiZ*<{`l~1(4T}6W_bYNc(+qFLJ$Z9h6TJ1lO~g34payp>8s8D3_}0$CoM9r7avA?MW{6k9fWuF3=68sOjRi=6(M$eq8(EsS5ncMAWM0{I4UJsvR5P%{0gw^|9Me1N|- z)#0e2qDF(A=?64)SF_bt4wySKuZ8Bjo0V26I?R`QP#@2lE%*R0?@uH{1GKyB=r7IV zUKB(!iPhNG?sH$6GrK(05CFSt6X17HuBiMKEl?-Mf*jfK8~%$~bAm9WCnCP*$H*PA zpMZ6=j5!hADs-lMScl9!e7Iiv)~DT^pXYTXJAf^USCQ*d$|2uRE389$kOK@bRCNZkH*FMbQ5WE;A;;(nGmf$~g$X;LdW{QD2v|(kDuFshA8T&0<751@AHB^9SwX&h* zF-xZRt4lP#k09tfY#XzE-ZJYUjkpvWu9xM-3V^7USbI#G0T1s%0UoJTIq?|0c`rZMkuusYPrld}$w@eklSSG^s9_k&H>!b~wNwDdk&+G_M^7G3nG z^FG7pHj#kkW~?scj^|&W0sl7RH_hVl_!ws{%R_sJp{aDz6t!U$xngZ)q$Qa{iPx8H z0sc0}cG*h}Gop03i_k_EME@i@OVO6E1+)d~fC+wxn=tgt0=#S<&On)u!$RC@x?0(GD zQ7DdkeoUR@2mQnK)S20zCTV7WLKmk1&1c;Pt5`SnR5E#M4E*bvTb*W`@0NU>uTsP< zEK3&|&WZ944%DG-6<<4R)d|N8L^XmYB|NqdEgVi&HqYH_IKAdU*D@E286f|*?IDe= ze>Og_R#|*tE|f5D4S#VKno2 z6q|!OuR3{9zs5Zkxedg3yPE10i&O=Yj7D{=3;~t61>2LLMzX-auueX1q4cPb0(7pn z-OMwPxwXAO3YoQ@?w9&^?-1_S zBV55WvG#%1WfEa$eS8fz79hJPxWZ%H&bq$Vv+V0Dn7rt&nxSmFi^CHj(^K zM>)IZ^!HbOW}{L>n86`q(0L4?xtBZZyImgZkrs6%(tetUHwOE2a=#@I+^vYH%fzjT zF*pX<)cOEI75mTkDE| ztFSAlXk%-mLUA0lf$H-f(($Yd3Xf`jm|)Ei@*}(M!*{6Df1Q+~y9`$)0|p#VM*z=z zXv+JpE{fS#3OBuuWrw?{cTk9l3o~vQp>`G{=wh05EWrBMbNQ5p--gP3TuRz|9~Z3m zX+T?RzPU6OTnW|4JH;5G*Au|8=UEq$z@C};{jIvp8a?QrTj|KGilA1OdeC(+4c`{R z*9VZkyy3Cv*kajV++8cYgOSg~6~grdCh3#z{(Mte*j>?j!61Eg79%J#Wp8mQ$?t>0zH4t(}}9yVNS0{|=A71ur0%19JOz(gsDNJjKeA zSd(!MAF&&xcLw|ycE#+$6PLdx*<+wU{k!|A1UtRq{P%EWtyr@qFDY*Cyke4DOs@pl z#_%X)xQ&%ZKpyO0K+S1kC=eYKX#{eOQ%IUzRde;rsMc+iT=o-MSf&_G0r5D%t+bMQ zo2jCgbBvU)5D4mqOKP0g*3>J)vl1aaUA_pG2cB;ST)A)Ryk+Yj2qPV`bV?Wu%up|=op7=Y)4@U7F76H6o&WP51k^$DXp z!ycy{)9<5@vQHx^Uxr1FLHCaiE=Nq_!U|P>pJs?t(WTSXos05s-d^8ur<=kqyw6}w z_yas2JlimL6`Jcoa!2vTV*OuB*BuSl7lic|y(hs>Cwh$*EzzPy z?@~#G5G`Vt=tS>bqQ_b_T8Lgl5X7z$C99Vu7Q4h+)-UJ0{eRAPXXg9nz5Cvod*`61 zt|!=_$sW+b4(n-{Zq3H3m8==(aS$ThAiSQjljq6(1BLc>AEF+Ms4f6+$MeT9^-m6a z4nCD;)!CGwO6^Bf=L4UbzwQdO{vY9I@S`%{(}|KR_z5yMcBkDjR+K z35kPwUdyv;o(J-`OY@xU4Gt4;&_+vOWyZlEpwcaJDHjMna7pTbU) zr(mHl2bW_#SKKGV64^z9oj#6)n}X3g&Z?NhoNF#0TIT;lF&d|SIOcegF0rNW^%~IC zZiU_gl=oz42i32a3R>CxV%nl@{MA;bAl_QQkiEi~PK4lJw+3v9O)Eyn%Q@Q+BWyT4 zi;lt3oWB6G@>AN%+@X{^0r=e=I@ge@?USHr1@*qK6yATn-QS>;HJ#nziAkY74Q}tg z2k3A2H*tpJ>~?zI6R7l$*JmGYyJ}5=HR)I7B!%%Wu>OY5MZj|sX&o@dBhO5xMYnye z*G5!F$@#uA%u+rPf*P8Osr=ZWMEGxuv}LH{)@36NOEX^*U4DNP(zOJQd-$|p`Yl38 zqLR*`KpXH6nI|GO6ziR1P{~R;S*QTx)qb?;bO8O*Oc`0PQ0-rVBFs%g!kAoqgDmf7 zsCC3E{v^#iz2e=7mMS%!cIV-+$B>v>tO5KYucqg9o>W({xTEfbE|sz!^!NC6l3NIh zJ`CMCHO@`wAnbGYG)W{Jy*Q^yLpWpMRbAYZCJ8fftjYJaxPCaDGkb(&KL+^NGaM`E z`!RnfwaO2KOZAFGu!O{z+#+1BAE$X0jez(zp{}3Hzgg&Ev z&TwgW+hV1ve-ogeo17n(r0vqa7}qL`7h)2=l)Nep1p&I>zsdQ@@=sW-XBSlB(e%QB zs?Tsbk&Gr+44QrV>RKkJG z6YWzfVBk9|ye$2hKUnlmfj{$!Tsxqb18|1vgxAbIwmFT|$I?MgQ-|V>;}E1cLFJ>% z;+y!?h+u%XgLcVQ#Lz;dwbV5UI-R%}#JcR__NJ{IvhG#;AiI0Y4-}vml*wHd_3W19 z4-SXjGjm1g43v`27B+Frr26SENeQ!7Hp00;l+}_c$>%2N;>Mhj9BbC&p=0d3Za7ZN zCZFOv7*o>Fc|^dg`15-TG77fj79~aOj98nzi{aQO+UF+csMj1hVh$RN2I57XKIIEb z3}Ph})je;=Gg=6ujDu4` zD{L)8@FAT*H*Udn0N~@$!n0R^NYrZ0Bzi%B(NoQsT3RzwIa`0R*5(1vwaL(le!#!O z6ew2U0&Ji#H19DUObROBA7Qh)QEF;NWv8n3gF;x6(7$omnx&|Ak@O-Z1Vh_=y`GC6)F9mwAim`2X#z^|w*_bT^YCS2olNvQqcBQx=|_(4UZ zsW#~st^~X+62}j!>1|;J!P^qGTaHE;GH3!wzILe44=THO6TN$#fH%#9ZL{Bw>ANIL zq%Ic@q$jqakjd~(g5m%3K8#Iz>=^>&?`Wa0wrhO7f;XaA?@9w9zrYswZ}ceEZ#R;p z^h!i#+T9RX7cD^AnCzpjUZ~=`rxd;$>-QD2Pu;sE1h3_;!KoG-{ka41hQ8IF;&B__ zjM!u$NB*4pj}M&j5pR2!-$}W5`jS_j49NxHVRVeziJsoYF0J)i$a|y(IN?P}L{V9*?tvcK9^)(uaKZjW< zs0B>dP0*|j`EQA{_XzK!Q*mO<_7tM)HC1s~iNo$B{6U5z72nowcu zNuNR zC)}s{_wMV@FOin@doXqItVQ<5DX&8Az2@MIrCzBrrv;;V{Rkk>e;w5LRY=4h7ez5Y z+Etx>`a$nEYS$dshH2qgpbyE-86(U?`x|9tw!1I6ZuX64XmKV*RCm>VJandp6Wli4 z#wAkq?)?f-zki|Q5D=fsg!_Pxlf3Hzdm$9}GXmcA>yG!f*LYE1=Hw)>?wNS<uTg08SlF39G0NIpqg;AJGiE_q!-i~0d)wKj&cZ&|ss;)BHwJW!%5 zDJ+~jTYb@yjj(??%Q=4uo8SUdU2w2$q;l(>mAHL;1DpEVzV!S?81C5|F9!jyJDP>9 zpx@6)8oI2N<}!K~-bK^#z`!{HzvZWyGcZWp57h73JS4Va+32&k1PVfqDr|JuaElm> z_k=XrsFpax^QYq^0p8A*JUjhXq!L9n`KA*L=`mk|R5t4dS^&pR_0am2ZP3oCQY_&pX*_UjX7bx7)0#S~}kt zt19&QPf-6lG?D*o1l1XGCg$&c_tt0ZH8BwRwy6cEGp(D7VAo<`uATzAdA4&V&*1!_ z^@l-&l<&mc!xGhzJwtDZ7=I<;-$g4cZR13HsqpBRTNgrYDy=F|jpxmHlV)ufc$;R^ z;s^r3yBN_sh^0)Ib-yvbPJMcw!KA=^2U~GZ%1v583#Z54aJU8J@8ay+NVwQ1mL=ho z`v|#&KDmAwkDQh=o4vZXUI$S!?9T}Ak4rj75QlR;li^{2hb+azI?FJZN)~4Q)mPN9 z3O5je{ab|Z(B73KP#I z(`h+QN!!q*R?L5Wh2K`QabP1}jQNPG%(c<^cD`&3;2&P_SLS%u^+Ulx`z*@A+D8>X>IbU#-4dUD1(WCi%<77!ec@FTOv}>KN%7hAcduU71hW2$um9+#l+}p_^m1jvjjfqD;n!C+ fFXv6DTmOE2|GI+}-4KXKw!?RDarG Date: Thu, 30 Jun 2022 20:00:02 +1000 Subject: [PATCH 25/37] feat: add Reader#Inspect() function to check basic validity of a CAR and return stats --- v2/block_reader.go | 18 +--- v2/car.go | 21 ++++- v2/car_test.go | 9 +- v2/reader.go | 147 ++++++++++++++++++++++++++++++ v2/reader_test.go | 222 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 393 insertions(+), 24 deletions(-) diff --git a/v2/block_reader.go b/v2/block_reader.go index a74e3996..55ebd1cf 100644 --- a/v2/block_reader.go +++ b/v2/block_reader.go @@ -64,20 +64,6 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) { if _, err := v2h.ReadFrom(r); err != nil { return nil, err } - // Assert the data payload offset validity. - // It must be at least 51 ( + ). - dataOffset := int64(v2h.DataOffset) - if dataOffset < PragmaSize+HeaderSize { - return nil, fmt.Errorf("invalid data payload offset: %v", dataOffset) - } - // Assert the data size validity. - // It must be larger than zero. - // Technically, it should be at least 11 bytes (i.e. a valid CARv1 header with no roots) but - // we let further parsing of the header to signal invalid data payload header. - dataSize := int64(v2h.DataSize) - if dataSize <= 0 { - return nil, fmt.Errorf("invalid data payload size: %v", dataSize) - } // Skip to the beginning of inner CARv1 data payload. // Note, at this point the pragma and CARv1 header have been read. @@ -86,12 +72,12 @@ func NewBlockReader(r io.Reader, opts ...Option) (*BlockReader, error) { // fast forward to the beginning of data payload by subtracting pragma and header size from // dataOffset. rs := internalio.ToByteReadSeeker(r) - if _, err := rs.Seek(dataOffset-PragmaSize-HeaderSize, io.SeekCurrent); err != nil { + if _, err := rs.Seek(int64(v2h.DataOffset)-PragmaSize-HeaderSize, io.SeekCurrent); err != nil { return nil, err } // Set br.r to a LimitReader reading from r limited to dataSize. - br.r = io.LimitReader(r, dataSize) + br.r = io.LimitReader(r, int64(v2h.DataSize)) // Populate br.Roots by reading the inner CARv1 data payload header. header, err := carv1.ReadHeader(br.r, options.MaxAllowedHeaderSize) diff --git a/v2/car.go b/v2/car.go index 19473136..571eb114 100644 --- a/v2/car.go +++ b/v2/car.go @@ -2,7 +2,7 @@ package car import ( "encoding/binary" - "errors" + "fmt" "io" ) @@ -170,10 +170,21 @@ func (h *Header) ReadFrom(r io.Reader) (int64, error) { dataOffset := binary.LittleEndian.Uint64(buf[:8]) dataSize := binary.LittleEndian.Uint64(buf[8:16]) indexOffset := binary.LittleEndian.Uint64(buf[16:]) - if int64(dataOffset) < 0 || - int64(dataSize) < 0 || - int64(indexOffset) < 0 { - return n, errors.New("malformed car, overflowing offsets") + // Assert the data payload offset validity. + // It must be at least 51 ( + ). + if int64(dataOffset) < PragmaSize+HeaderSize { + return n, fmt.Errorf("invalid data payload offset: %v", dataOffset) + } + // Assert the data size validity. + // It must be larger than zero. + // Technically, it should be at least 11 bytes (i.e. a valid CARv1 header with no roots) but + // we let further parsing of the header to signal invalid data payload header. + if int64(dataSize) <= 0 { + return n, fmt.Errorf("invalid data payload size: %v", dataSize) + } + // Assert the index offset validity. + if int64(indexOffset) < 0 { + return n, fmt.Errorf("invalid index offset: %v", indexOffset) } h.DataOffset = dataOffset h.DataSize = dataSize diff --git a/v2/car_test.go b/v2/car_test.go index 9e113259..d993a374 100644 --- a/v2/car_test.go +++ b/v2/car_test.go @@ -56,11 +56,12 @@ func TestHeader_WriteTo(t *testing.T) { "HeaderWithEmptyCharacteristicsIsWrittenAsExpected", carv2.Header{ Characteristics: carv2.Characteristics{}, + DataOffset: 99, }, []byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x63, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, }, @@ -114,12 +115,14 @@ func TestHeader_ReadFrom(t *testing.T) { []byte{ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x63, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x64, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, }, carv2.Header{ Characteristics: carv2.Characteristics{}, + DataOffset: 99, + DataSize: 100, }, false, }, diff --git a/v2/reader.go b/v2/reader.go index 40c5d8c8..0208284c 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -3,10 +3,15 @@ package car import ( "fmt" "io" + "math" "github.com/ipfs/go-cid" + "github.com/ipld/go-car/v2/index" "github.com/ipld/go-car/v2/internal/carv1" + "github.com/ipld/go-car/v2/internal/carv1/util" internalio "github.com/ipld/go-car/v2/internal/io" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-varint" "golang.org/x/exp/mmap" ) @@ -116,6 +121,148 @@ func (r *Reader) IndexReader() io.ReaderAt { return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) } +// CarStats is returned by an Inspect() call +type CarStats struct { + Version uint64 + Header Header + Roots []cid.Cid + RootsPresent bool + BlockCount uint64 + CodecCounts map[multicodec.Code]uint64 + MhTypeCounts map[multicodec.Code]uint64 + AvgCidLength uint64 + MaxCidLength uint64 + MinCidLength uint64 + AvgBlockLength uint64 + MaxBlockLength uint64 + MinBlockLength uint64 + IndexCodec multicodec.Code + IndexSize uint64 +} + +// Inspect does a quick scan of a CAR, performing basic validation of the format +// and returning a CarStats object that provides a high-level description of the +// contents of the CAR. +// Inspect works for CARv1 and CARv2 contents. A CARv1 will return an +// uninitialized Header value. +// Inspect will perform a basic check of a CARv2 index, where present, but this +// does not guarantee that the index is correct. Attempting to read index data +// from untrusted sources is not recommended. If required, further validation of +// an index can be performed by loading the index and performing a ForEach() and +// sanity checking that the offsets are within the data payload section of the +// CAR. However, re-generation of index data in this case is the recommended +// course of action. +func (r *Reader) Inspect() (CarStats, error) { + stats := CarStats{ + Version: r.Version, + Header: r.Header, + CodecCounts: make(map[multicodec.Code]uint64), + MhTypeCounts: make(map[multicodec.Code]uint64), + } + + var totalCidLength uint64 + var totalBlockLength uint64 + var minCidLength uint64 = math.MaxUint64 + var minBlockLength uint64 = math.MaxUint64 + + dr := r.DataReader() + bdr := internalio.ToByteReader(dr) + + // read roots, not using Roots(), because we need the offset setup in the data trader + header, err := carv1.ReadHeader(dr, r.opts.MaxAllowedHeaderSize) + if err != nil { + return CarStats{}, err + } + stats.Roots = header.Roots + var rootsPresentCount int + rootsPresent := make([]bool, len(stats.Roots)) + + // read block sections + for { + sectionLength, err := varint.ReadUvarint(bdr) + if err != nil { + if err == io.EOF { + // if the length of bytes read is non-zero when the error is EOF then signal an unclean EOF. + if sectionLength > 0 { + return CarStats{}, io.ErrUnexpectedEOF + } + // otherwise, this is a normal ending + break + } + } else if sectionLength == 0 && r.opts.ZeroLengthSectionAsEOF { + // normal ending for this read mode + break + } + if sectionLength > r.opts.MaxAllowedSectionSize { + return CarStats{}, util.ErrSectionTooLarge + } + + // decode just the CID bytes + cidLen, c, err := cid.CidFromReader(dr) + if err != nil { + return CarStats{}, err + } + + // is this a root block? (also account for duplicate root CIDs) + if rootsPresentCount < len(stats.Roots) { + for i, r := range stats.Roots { + if !rootsPresent[i] && c == r { + rootsPresent[i] = true + rootsPresentCount++ + } + } + } + + cp := c.Prefix() + codec := multicodec.Code(cp.Codec) + count := stats.CodecCounts[codec] + stats.CodecCounts[codec] = count + 1 + mhtype := multicodec.Code(cp.MhType) + count = stats.MhTypeCounts[mhtype] + stats.MhTypeCounts[mhtype] = count + 1 + + blockLength := sectionLength - uint64(cidLen) + dr.Seek(int64(blockLength), io.SeekCurrent) + + stats.BlockCount++ + totalCidLength += uint64(cidLen) + totalBlockLength += blockLength + if uint64(cidLen) < minCidLength { + minCidLength = uint64(cidLen) + } + if uint64(cidLen) > stats.MaxCidLength { + stats.MaxCidLength = uint64(cidLen) + } + if uint64(blockLength) < minBlockLength { + minBlockLength = uint64(blockLength) + } + if uint64(blockLength) > stats.MaxBlockLength { + stats.MaxBlockLength = uint64(blockLength) + } + } + + stats.RootsPresent = len(stats.Roots) == rootsPresentCount + if stats.BlockCount > 0 { + stats.MinCidLength = minCidLength + stats.MinBlockLength = minBlockLength + stats.AvgCidLength = totalCidLength / stats.BlockCount + stats.AvgBlockLength = totalBlockLength / stats.BlockCount + } + + if stats.Version != 1 && stats.Header.HasIndex() { + // performs an UnmarshalLazyRead which should have its own validation and + // is intended to be a fast initial scan + ind, size, err := index.ReadFromWithSize(r.IndexReader()) + if err != nil { + return CarStats{}, err + } + stats.IndexCodec = ind.Codec() + stats.IndexSize = uint64(size) + } + + return stats, nil +} + // Close closes the underlying reader if it was opened by OpenReader. func (r *Reader) Close() error { if r.closer != nil { diff --git a/v2/reader_test.go b/v2/reader_test.go index c010445f..58b9a328 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -1,14 +1,19 @@ package car_test import ( + "bytes" + "encoding/hex" "io" "os" + "strings" "testing" + "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" + "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" ) @@ -268,3 +273,220 @@ func requireNewCarV1Reader(t *testing.T, r io.Reader, zerLenAsEOF bool) *carv1.C require.NoError(t, err) return cr } + +func TestInspect(t *testing.T) { + tests := []struct { + name string + path string + zerLenAsEOF bool + expectedStats carv2.CarStats + }{ + { + name: "IndexlessCarV2", + path: "testdata/sample-v2-indexless.car", + expectedStats: carv2.CarStats{ + Version: 2, + Header: carv2.Header{ + Characteristics: carv2.Characteristics{0, 0}, + DataOffset: 51, + DataSize: 479907, + IndexOffset: 0, + }, + Roots: []cid.Cid{mustCidDecode("bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy")}, + RootsPresent: true, + AvgBlockLength: 417, // 417.6644423260248 + MinBlockLength: 1, + MaxBlockLength: 1342, + AvgCidLength: 37, // 37.86939942802669 + MinCidLength: 14, + MaxCidLength: 38, + BlockCount: 1049, + CodecCounts: map[multicodec.Code]uint64{ + multicodec.Raw: 6, + multicodec.DagCbor: 1043, + }, + MhTypeCounts: map[multicodec.Code]uint64{ + multicodec.Identity: 6, + multicodec.Blake2b256: 1043, + }, + }, + }, + { + // same payload as IndexlessCarV2, so only difference is the Version & Header + name: "CarV1", + path: "testdata/sample-v1.car", + expectedStats: carv2.CarStats{ + Version: 1, + Header: carv2.Header{}, + Roots: []cid.Cid{mustCidDecode("bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy")}, + RootsPresent: true, + AvgBlockLength: 417, // 417.6644423260248 + MinBlockLength: 1, + MaxBlockLength: 1342, + AvgCidLength: 37, // 37.86939942802669 + MinCidLength: 14, + MaxCidLength: 38, + BlockCount: 1049, + CodecCounts: map[multicodec.Code]uint64{ + multicodec.Raw: 6, + multicodec.DagCbor: 1043, + }, + MhTypeCounts: map[multicodec.Code]uint64{ + multicodec.Identity: 6, + multicodec.Blake2b256: 1043, + }, + }, + }, + { + // same payload as IndexlessCarV2, so only difference is the Header + name: "CarV2ProducedByBlockstore", + path: "testdata/sample-rw-bs-v2.car", + expectedStats: carv2.CarStats{ + Version: 2, + Header: carv2.Header{ + DataOffset: 1464, + DataSize: 273, + IndexOffset: 1737, + }, + Roots: []cid.Cid{ + mustCidDecode("bafkreifuosuzujyf4i6psbneqtwg2fhplc2wxptc5euspa2gn3bwhnihfu"), + mustCidDecode("bafkreifc4hca3inognou377hfhvu2xfchn2ltzi7yu27jkaeujqqqdbjju"), + mustCidDecode("bafkreig5lvr4l6b4fr3un4xvzeyt3scevgsqjgrhlnwxw2unwbn5ro276u"), + }, + RootsPresent: true, + BlockCount: 3, + CodecCounts: map[multicodec.Code]uint64{multicodec.Raw: 3}, + MhTypeCounts: map[multicodec.Code]uint64{multicodec.Sha2_256: 3}, + AvgCidLength: 36, + MaxCidLength: 36, + MinCidLength: 36, + AvgBlockLength: 6, + MaxBlockLength: 9, + MinBlockLength: 4, + IndexCodec: multicodec.CarMultihashIndexSorted, + IndexSize: 148, + }, + }, + // same as CarV1 but with a zero-byte EOF to test options + { + name: "CarV1VersionWithZeroLenSectionIsOne", + path: "testdata/sample-v1-with-zero-len-section.car", + zerLenAsEOF: true, + expectedStats: carv2.CarStats{ + Version: 1, + Header: carv2.Header{}, + Roots: []cid.Cid{mustCidDecode("bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy")}, + RootsPresent: true, + AvgBlockLength: 417, // 417.6644423260248 + MinBlockLength: 1, + MaxBlockLength: 1342, + AvgCidLength: 37, // 37.86939942802669 + MinCidLength: 14, + MaxCidLength: 38, + BlockCount: 1049, + CodecCounts: map[multicodec.Code]uint64{ + multicodec.Raw: 6, + multicodec.DagCbor: 1043, + }, + MhTypeCounts: map[multicodec.Code]uint64{ + multicodec.Identity: 6, + multicodec.Blake2b256: 1043, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader, err := carv2.OpenReader(tt.path, carv2.ZeroLengthSectionAsEOF(tt.zerLenAsEOF)) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, reader.Close()) }) + stats, err := reader.Inspect() + require.NoError(t, err) + require.Equal(t, tt.expectedStats, stats) + }) + } +} + +func TestInspectError(t *testing.T) { + tests := []struct { + name string + carHex string + expectedOpenError string + expectedInspectError string + }{ + { + name: "BadCidV0", + carHex: "3aa265726f6f747381d8305825000130302030303030303030303030303030303030303030303030303030303030303030306776657273696f6e010130", + expectedInspectError: "expected 1 as the cid version number, got: 48", + }, + { + name: "BadHeaderLength", + carHex: "e0e0e0e0a7060c6f6c4cca943c236f4b196723489608edb42a8b8fa80b6776657273696f6e19", + expectedOpenError: "invalid header data, length of read beyond allowable maximum", + }, + { + name: "BadSectionLength", + carHex: "11a265726f6f7473806776657273696f6e01e0e0e0e0a7060155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000", + expectedInspectError: "invalid section data, length of read beyond allowable maximum", + }, + // the bad index tests are manually constructed from this single-block CARv2 by adjusting the Uint32 and Uint64 values in the index: + // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset + // 0aa16776657273696f6e02 00000000000000000000000000000000330000000000000041000000000000007400000000000000 11a265726f6f7473806776657273696f6e012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000 8108 01000000 1200000000000000 01000000 28000000 2800000000000000 01d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca 1200000000000000 + { + name: "BadIndexCountOverflow", + // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset + carHex: "0aa16776657273696f6e02 00000000000000000000000000000000330000000000000041000000000000007400000000000000 11a265726f6f7473806776657273696f6e012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000 8108 ffffffff 1200000000000000 01000000 28000000 2800000000000000 01d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca 1200000000000000", + expectedInspectError: "index too big; MultihashIndexSorted count is overflowing int32", + }, + { + name: "BadIndexCountTooMany", + // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset + carHex: "0aa16776657273696f6e02 00000000000000000000000000000000330000000000000041000000000000007400000000000000 11a265726f6f7473806776657273696f6e012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000 8108 ffffff7f 1200000000000000 01000000 28000000 2800000000000000 01d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca 1200000000000000", + expectedInspectError: "unexpected EOF", + }, + { + name: "BadIndexMultiWidthOverflow", + // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset + carHex: "0aa16776657273696f6e02 00000000000000000000000000000000330000000000000041000000000000007400000000000000 11a265726f6f7473806776657273696f6e012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000 8108 01000000 1200000000000000 ffffffff 28000000 2800000000000000 01d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca 1200000000000000", + expectedInspectError: "index too big; multiWidthIndex count is overflowing int32", + }, + { + name: "BadIndexMultiWidthTooMany", + // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset + carHex: "0aa16776657273696f6e02 00000000000000000000000000000000330000000000000041000000000000007400000000000000 11a265726f6f7473806776657273696f6e012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000 8108 01000000 1200000000000000 ffffff7f 28000000 2800000000000000 01d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca 1200000000000000", + expectedInspectError: "unexpected EOF", + }, + // we don't test any further into the index, to do that, a user should do a ForEach across the loaded index (and sanity check the offsets) + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + car, _ := hex.DecodeString(strings.ReplaceAll(tt.carHex, " ", "")) + reader, err := carv2.NewReader(bytes.NewReader(car)) + if tt.expectedOpenError != "" { + require.Error(t, err) + require.Equal(t, err.Error(), tt.expectedOpenError) + return + } else { + require.NoError(t, err) + } + t.Cleanup(func() { require.NoError(t, reader.Close()) }) + _, err = reader.Inspect() + if tt.expectedInspectError != "" { + require.Error(t, err) + require.Equal(t, err.Error(), tt.expectedInspectError) + } else { + require.NoError(t, err) + } + }) + } +} + +func mustCidDecode(s string) cid.Cid { + c, err := cid.Decode(s) + if err != nil { + panic(err) + } + return c +} From 708b0a293f6b70d52e1ae43686acc597ba051c97 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Fri, 1 Jul 2022 13:17:03 +1000 Subject: [PATCH 26/37] feat: add block hash validation to Inspect() --- v2/block_reader.go | 2 +- v2/reader.go | 67 +++++++++++++++++++++++++++++++++++++++++++-- v2/reader_test.go | 68 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 127 insertions(+), 10 deletions(-) diff --git a/v2/block_reader.go b/v2/block_reader.go index 55ebd1cf..252885c3 100644 --- a/v2/block_reader.go +++ b/v2/block_reader.go @@ -119,7 +119,7 @@ func (br *BlockReader) Next() (blocks.Block, error) { } if !hashed.Equals(c) { - return nil, fmt.Errorf("mismatch in content integrity, name: %s, data: %s", c, hashed) + return nil, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, hashed) } return blocks.NewBlockWithCid(data, c) diff --git a/v2/reader.go b/v2/reader.go index 0208284c..c3ef3653 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -145,6 +145,13 @@ type CarStats struct { // contents of the CAR. // Inspect works for CARv1 and CARv2 contents. A CARv1 will return an // uninitialized Header value. +// +// If validateBlockHash is true, all block data in the payload will be hashed +// and compared to the CID for that block and an error will return if there +// is a mismatch. If false, block data will be skipped over and not checked. +// Performing a full block hash validation is similar to using a BlockReader and +// calling Next over all blocks. +// // Inspect will perform a basic check of a CARv2 index, where present, but this // does not guarantee that the index is correct. Attempting to read index data // from untrusted sources is not recommended. If required, further validation of @@ -152,7 +159,34 @@ type CarStats struct { // sanity checking that the offsets are within the data payload section of the // CAR. However, re-generation of index data in this case is the recommended // course of action. -func (r *Reader) Inspect() (CarStats, error) { +// +// Beyond the checks performed by Inspect, a valid / good CAR is somewhat +// use-case dependent. Factors to consider include: +// +// * Bad indexes, including incorrect offsets, duplicate entries, or other +// faulty data. Indexes should be re-generated, regardless, if you need to use +// them and have any reason to not trust the source. +// +// * Blocks use codecs that your system doesn't have access to—which may mean +// you can't traverse a DAG or use the contained data. CarStats#CodecCounts +// contains a list of codecs found in the CAR so this can be checked. +// +// * CIDs use multihashes that your system doesn't have access to—which will +// mean you can't validate block hashes are correct (using validateBlockHash +// in this case will result in a failure). CarStats#MhTypeCounts contains a +// list of multihashes found in the CAR so this can bechecked. +// +// * The presence of IDENTITY CIDs, which may not be supported (or desired) by +// the consumer of the CAR. CarStats#CodecCounts can determine the presence +// of IDENTITY CIDs. +// +// * Roots: the number of roots, duplicates, and whether they are related to the +// blocks contained within the CAR. CarStats contains a list of Roots and a +// RootsPresent bool so further checks can be performed. +// +// * DAG completeness is not checked. Any properties relating to the DAG, or +// DAGs contained within a CAR are the responsibility of the user to check. +func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { stats := CarStats{ Version: r.Version, Header: r.Header, @@ -189,7 +223,8 @@ func (r *Reader) Inspect() (CarStats, error) { // otherwise, this is a normal ending break } - } else if sectionLength == 0 && r.opts.ZeroLengthSectionAsEOF { + } + if sectionLength == 0 && r.opts.ZeroLengthSectionAsEOF { // normal ending for this read mode break } @@ -203,6 +238,13 @@ func (r *Reader) Inspect() (CarStats, error) { return CarStats{}, err } + if sectionLength < uint64(cidLen) { + // this case is handled different in the normal ReadNode() path since it + // slurps in the whole section bytes and decodes CID from there - so an + // error should come from a failing io.ReadFull + return CarStats{}, fmt.Errorf("section length shorter than CID length") + } + // is this a root block? (also account for duplicate root CIDs) if rootsPresentCount < len(stats.Roots) { for i, r := range stats.Roots { @@ -222,7 +264,26 @@ func (r *Reader) Inspect() (CarStats, error) { stats.MhTypeCounts[mhtype] = count + 1 blockLength := sectionLength - uint64(cidLen) - dr.Seek(int64(blockLength), io.SeekCurrent) + + if validateBlockHash { + // read the block data, hash it and compare it + buf := make([]byte, blockLength) + if _, err := io.ReadFull(dr, buf); err != nil { + return CarStats{}, err + } + + hashed, err := cp.Sum(buf) + if err != nil { + return CarStats{}, err + } + + if !hashed.Equals(c) { + return CarStats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, hashed) + } + } else { + // otherwise, skip over it + dr.Seek(int64(blockLength), io.SeekCurrent) + } stats.BlockCount++ totalCidLength += uint64(cidLen) diff --git a/v2/reader_test.go b/v2/reader_test.go index 58b9a328..85605dad 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -278,6 +278,7 @@ func TestInspect(t *testing.T) { tests := []struct { name string path string + carHex string zerLenAsEOF bool expectedStats carv2.CarStats }{ @@ -394,14 +395,43 @@ func TestInspect(t *testing.T) { }, }, }, + { + // A case where this _could_ be a valid CAR if we allowed identity CIDs + // and not matching block contents to exist, there's no block bytes in + // this. It will only fail if you don't validate the CID matches the, + // bytes (see TestInspectError for that case). + name: "IdentityCID", + // 47 {version:1,roots:[identity cid]} 25 identity cid (dag-json {"identity":"block"}) + carHex: "2f a265726f6f747381d82a581a0001a90200147b226964656e74697479223a22626c6f636b227d6776657273696f6e01 19 01a90200147b226964656e74697479223a22626c6f636b227d", + expectedStats: carv2.CarStats{ + Version: 1, + Roots: []cid.Cid{mustCidDecode("baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5")}, + RootsPresent: true, + BlockCount: 1, + CodecCounts: map[multicodec.Code]uint64{multicodec.DagJson: 1}, + MhTypeCounts: map[multicodec.Code]uint64{multicodec.Identity: 1}, + AvgCidLength: 25, + MaxCidLength: 25, + MinCidLength: 25, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - reader, err := carv2.OpenReader(tt.path, carv2.ZeroLengthSectionAsEOF(tt.zerLenAsEOF)) - require.NoError(t, err) + var reader *carv2.Reader + var err error + if tt.path != "" { + reader, err = carv2.OpenReader(tt.path, carv2.ZeroLengthSectionAsEOF(tt.zerLenAsEOF)) + require.NoError(t, err) + } else { + byts, err := hex.DecodeString(strings.ReplaceAll(tt.carHex, " ", "")) + require.NoError(t, err) + reader, err = carv2.NewReader(bytes.NewReader(byts), carv2.ZeroLengthSectionAsEOF(tt.zerLenAsEOF)) + require.NoError(t, err) + } t.Cleanup(func() { require.NoError(t, reader.Close()) }) - stats, err := reader.Inspect() + stats, err := reader.Inspect(false) require.NoError(t, err) require.Equal(t, tt.expectedStats, stats) }) @@ -414,6 +444,7 @@ func TestInspectError(t *testing.T) { carHex string expectedOpenError string expectedInspectError string + validateBlockHash bool }{ { name: "BadCidV0", @@ -430,6 +461,31 @@ func TestInspectError(t *testing.T) { carHex: "11a265726f6f7473806776657273696f6e01e0e0e0e0a7060155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000", expectedInspectError: "invalid section data, length of read beyond allowable maximum", }, + { + name: "BadSectionLength2", + carHex: "3aa265726f6f747381d8305825000130302030303030303030303030303030303030303030303030303030303030303030306776657273696f6e01200130302030303030303030303030303030303030303030303030303030303030303030303030303030303030", + expectedInspectError: "section length shorter than CID length", + validateBlockHash: true, + }, + { + name: "BadBlockHash(SanityCheck)", // this should pass because we don't ask the CID be validated even though it doesn't match + // header cid data + carHex: "11a265726f6f7473806776657273696f6e 012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca ffffffffffffffffffff", + }, + { + name: "BadBlockHash", // same as above, but we ask for CID validation + // header cid data + carHex: "11a265726f6f7473806776657273696f6e 012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca ffffffffffffffffffff", + validateBlockHash: true, + expectedInspectError: "mismatch in content integrity, expected: bafkreiab2rek7wjiazkfrt3hbnqpljmu24226alszdlh6ivic2abgjubzi, got: bafkreiaaqoxrddiyuy6gxnks6ioqytxhq5a7tchm2mm5htigznwiljukmm", + }, + { + name: "IdentityCID", // a case where this _could_ be a valid CAR if we allowed identity CIDs and not matching block contents to exist, there's no block bytes in this + // 47 {version:1,roots:[identity cid]} 25 identity cid (dag-json {"identity":"block"}) + carHex: "2f a265726f6f747381d82a581a0001a90200147b226964656e74697479223a22626c6f636b227d6776657273696f6e01 19 01a90200147b226964656e74697479223a22626c6f636b227d", + validateBlockHash: true, + expectedInspectError: "mismatch in content integrity, expected: baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5, got: baguqeaaa", + }, // the bad index tests are manually constructed from this single-block CARv2 by adjusting the Uint32 and Uint64 values in the index: // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset // 0aa16776657273696f6e02 00000000000000000000000000000000330000000000000041000000000000007400000000000000 11a265726f6f7473806776657273696f6e012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca00000000000000000000 8108 01000000 1200000000000000 01000000 28000000 2800000000000000 01d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca 1200000000000000 @@ -466,16 +522,16 @@ func TestInspectError(t *testing.T) { reader, err := carv2.NewReader(bytes.NewReader(car)) if tt.expectedOpenError != "" { require.Error(t, err) - require.Equal(t, err.Error(), tt.expectedOpenError) + require.Equal(t, tt.expectedOpenError, err.Error()) return } else { require.NoError(t, err) } t.Cleanup(func() { require.NoError(t, reader.Close()) }) - _, err = reader.Inspect() + _, err = reader.Inspect(tt.validateBlockHash) if tt.expectedInspectError != "" { require.Error(t, err) - require.Equal(t, err.Error(), tt.expectedInspectError) + require.Equal(t, tt.expectedInspectError, err.Error()) } else { require.NoError(t, err) } From a36603ec7728ccb828810d7015b65fd8b36d2019 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 1 Jul 2022 05:22:51 +0200 Subject: [PATCH 27/37] test: add fuzzing for reader#Inspect --- .github/workflows/go-fuzz.yml | 2 +- v2/fuzz_test.go | 55 +++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/.github/workflows/go-fuzz.yml b/.github/workflows/go-fuzz.yml index 3aa7f853..548a4a91 100644 --- a/.github/workflows/go-fuzz.yml +++ b/.github/workflows/go-fuzz.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: true matrix: - target: [ "BlockReader", "Reader", "Index" ] + target: [ "BlockReader", "Reader", "Index", "Inspect" ] runs-on: ubuntu-latest name: Fuzz V2 ${{ matrix.target }} steps: diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go index 8187457b..c7473750 100644 --- a/v2/fuzz_test.go +++ b/v2/fuzz_test.go @@ -13,6 +13,7 @@ import ( car "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/internal/carv1" ) // v1FixtureStr is a clean carv1 single-block, single-root CAR @@ -116,3 +117,57 @@ func FuzzIndex(f *testing.F) { index.ReadFrom(bytes.NewReader(data)) }) } + +func FuzzInspect(f *testing.F) { + seedWithCarFiles(f) + + f.Fuzz(func(t *testing.T, data []byte) { + reader, err := car.NewReader(bytes.NewReader(data)) + if err != nil { + return + } + + // Do differential fuzzing between Inspect and the normal parser + _, inspectErr := reader.Inspect(true) + if inspectErr == nil { + return + } + + reader, err = car.NewReader(bytes.NewReader(data)) + if err != nil { + t.Fatal("second NewReader on same data failed", err.Error()) + } + + if i := reader.IndexReader(); i != nil { + _, err = index.ReadFrom(i) + if err != nil { + return + } + } + + dr := reader.DataReader() + + _, err = carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize) + if err != nil { + return + } + + blocks, err := car.NewBlockReader(dr) + if err != nil { + return + } + + for { + _, err := blocks.Next() + if err != nil { + if err == io.EOF { + break + } + // caught error as expected + return + } + } + + t.Fatal("Inspect found error but we red this file correctly:", inspectErr.Error()) + }) +} From 965f1f339e483029d01442980308293177b10611 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Fri, 1 Jul 2022 15:49:42 +0100 Subject: [PATCH 28/37] Use streaming APIs to verify the hash of blocks in CAR `Inspect` `go-cid` exposes `Sum` API that facilitates calculation of the CID from `[]byte` payload. `go-multihash` now exposes `SumStream` which can calculate digest from `io.Reader` as well as `[]byte`. But, unfortunately the equivalent API does not exist in `go-cid`. To avoid copying the entire block into memory, implement CID calculation using the streaming multihash sum during inspection of CAR payload. --- v2/reader.go | 42 ++++++++++++++++++++++++++++-------------- v2/reader_test.go | 4 ++-- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/v2/reader.go b/v2/reader.go index c3ef3653..cd10b81d 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -11,6 +11,7 @@ import ( "github.com/ipld/go-car/v2/internal/carv1/util" internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" "github.com/multiformats/go-varint" "golang.org/x/exp/mmap" ) @@ -266,23 +267,36 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { blockLength := sectionLength - uint64(cidLen) if validateBlockHash { - // read the block data, hash it and compare it - buf := make([]byte, blockLength) - if _, err := io.ReadFull(dr, buf); err != nil { - return CarStats{}, err + // Use multihash.SumStream to avoid having to copy the entire block content into memory. + // The SumStream uses a buffered copy to write bytes into the hasher which will take + // advantage of streaming hash calculation depending on the hash function. + // TODO: introduce SumStream in go-cid to simplify the code here. + blockReader := io.LimitReader(dr, int64(blockLength)) + mhl := cp.MhLength + if mhtype == multicodec.Identity { + mhl = -1 } - - hashed, err := cp.Sum(buf) + mh, err := multihash.SumStream(blockReader, cp.MhType, mhl) if err != nil { return CarStats{}, err } - - if !hashed.Equals(c) { - return CarStats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, hashed) + var wantCid cid.Cid + switch cp.Version { + case 0: + wantCid = cid.NewCidV0(mh) + case 1: + wantCid = cid.NewCidV1(cp.Codec, mh) + default: + return CarStats{}, fmt.Errorf("invalid cid version: %d", cp.Version) + } + if !wantCid.Equals(c) { + return CarStats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", wantCid, c) } } else { // otherwise, skip over it - dr.Seek(int64(blockLength), io.SeekCurrent) + if _, err := dr.Seek(int64(blockLength), io.SeekCurrent); err != nil { + return CarStats{}, err + } } stats.BlockCount++ @@ -294,11 +308,11 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { if uint64(cidLen) > stats.MaxCidLength { stats.MaxCidLength = uint64(cidLen) } - if uint64(blockLength) < minBlockLength { - minBlockLength = uint64(blockLength) + if blockLength < minBlockLength { + minBlockLength = blockLength } - if uint64(blockLength) > stats.MaxBlockLength { - stats.MaxBlockLength = uint64(blockLength) + if blockLength > stats.MaxBlockLength { + stats.MaxBlockLength = blockLength } } diff --git a/v2/reader_test.go b/v2/reader_test.go index 85605dad..5686d844 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -477,14 +477,14 @@ func TestInspectError(t *testing.T) { // header cid data carHex: "11a265726f6f7473806776657273696f6e 012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca ffffffffffffffffffff", validateBlockHash: true, - expectedInspectError: "mismatch in content integrity, expected: bafkreiab2rek7wjiazkfrt3hbnqpljmu24226alszdlh6ivic2abgjubzi, got: bafkreiaaqoxrddiyuy6gxnks6ioqytxhq5a7tchm2mm5htigznwiljukmm", + expectedInspectError: "mismatch in content integrity, expected: bafkreiaaqoxrddiyuy6gxnks6ioqytxhq5a7tchm2mm5htigznwiljukmm, got: bafkreiab2rek7wjiazkfrt3hbnqpljmu24226alszdlh6ivic2abgjubzi", }, { name: "IdentityCID", // a case where this _could_ be a valid CAR if we allowed identity CIDs and not matching block contents to exist, there's no block bytes in this // 47 {version:1,roots:[identity cid]} 25 identity cid (dag-json {"identity":"block"}) carHex: "2f a265726f6f747381d82a581a0001a90200147b226964656e74697479223a22626c6f636b227d6776657273696f6e01 19 01a90200147b226964656e74697479223a22626c6f636b227d", validateBlockHash: true, - expectedInspectError: "mismatch in content integrity, expected: baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5, got: baguqeaaa", + expectedInspectError: "mismatch in content integrity, expected: baguqeaaa, got: baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5", }, // the bad index tests are manually constructed from this single-block CARv2 by adjusting the Uint32 and Uint64 values in the index: // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset From a274e752a4fc52a895e6820ee286f6ae9242c65c Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Sat, 2 Jul 2022 09:51:07 +0100 Subject: [PATCH 29/37] Use consistent CID mismatch error in `Inspect` and `BlockReader.Next` This reverts the earlier changes to get the message consistent. Note, the CID we expect is the one in the CAR payload, not the calculated CID for the block. --- v2/reader.go | 10 +++++----- v2/reader_test.go | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/v2/reader.go b/v2/reader.go index cd10b81d..f67638af 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -280,17 +280,17 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { if err != nil { return CarStats{}, err } - var wantCid cid.Cid + var gotCid cid.Cid switch cp.Version { case 0: - wantCid = cid.NewCidV0(mh) + gotCid = cid.NewCidV0(mh) case 1: - wantCid = cid.NewCidV1(cp.Codec, mh) + gotCid = cid.NewCidV1(cp.Codec, mh) default: return CarStats{}, fmt.Errorf("invalid cid version: %d", cp.Version) } - if !wantCid.Equals(c) { - return CarStats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", wantCid, c) + if !gotCid.Equals(c) { + return CarStats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, gotCid) } } else { // otherwise, skip over it diff --git a/v2/reader_test.go b/v2/reader_test.go index 5686d844..85605dad 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -477,14 +477,14 @@ func TestInspectError(t *testing.T) { // header cid data carHex: "11a265726f6f7473806776657273696f6e 012e0155122001d448afd928065458cf670b60f5a594d735af0172c8d67f22a81680132681ca ffffffffffffffffffff", validateBlockHash: true, - expectedInspectError: "mismatch in content integrity, expected: bafkreiaaqoxrddiyuy6gxnks6ioqytxhq5a7tchm2mm5htigznwiljukmm, got: bafkreiab2rek7wjiazkfrt3hbnqpljmu24226alszdlh6ivic2abgjubzi", + expectedInspectError: "mismatch in content integrity, expected: bafkreiab2rek7wjiazkfrt3hbnqpljmu24226alszdlh6ivic2abgjubzi, got: bafkreiaaqoxrddiyuy6gxnks6ioqytxhq5a7tchm2mm5htigznwiljukmm", }, { name: "IdentityCID", // a case where this _could_ be a valid CAR if we allowed identity CIDs and not matching block contents to exist, there's no block bytes in this // 47 {version:1,roots:[identity cid]} 25 identity cid (dag-json {"identity":"block"}) carHex: "2f a265726f6f747381d82a581a0001a90200147b226964656e74697479223a22626c6f636b227d6776657273696f6e01 19 01a90200147b226964656e74697479223a22626c6f636b227d", validateBlockHash: true, - expectedInspectError: "mismatch in content integrity, expected: baguqeaaa, got: baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5", + expectedInspectError: "mismatch in content integrity, expected: baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5, got: baguqeaaa", }, // the bad index tests are manually constructed from this single-block CARv2 by adjusting the Uint32 and Uint64 values in the index: // pragma carv2 header carv1 icodec count codec count (swi) width dataLen mh offset From 641c0f8793af6e7c2b437c75d8e328d7c492ba7f Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Sat, 2 Jul 2022 10:10:41 +0100 Subject: [PATCH 30/37] Benchmark `Reader.Inspect` with and without hash validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark the `Reader.Inspect` with and without hash validation using a randomly generated CARv2 file of size 10 MiB. Results from running the benchmark in parallel locally on MacOS `Intel(R) Core(TM) i7-1068NG7 CPU @ 2.30GHz` repeated 10 times: ``` Reader_InspectWithBlockValidation-8 5.30ms ±48% Reader_InspectWithoutBlockValidation-8 231µs ±42% name speed Reader_InspectWithBlockValidation-8 2.08GB/s ±35% Reader_InspectWithoutBlockValidation-8 46.8GB/s ±32% name alloc/op Reader_InspectWithBlockValidation-8 10.7MB ± 0% Reader_InspectWithoutBlockValidation-8 60.7kB ± 0% name allocs/op Reader_InspectWithBlockValidation-8 4.54k ± 0% Reader_InspectWithoutBlockValidation-8 2.29k ± 0% ``` --- v2/bench_test.go | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/v2/bench_test.go b/v2/bench_test.go index 6e766935..e413180b 100644 --- a/v2/bench_test.go +++ b/v2/bench_test.go @@ -119,6 +119,57 @@ func BenchmarkExtractV1UsingReader(b *testing.B) { }) } +// BenchmarkReader_InspectWithBlockValidation benchmarks Reader.Inspect with block hash validation +// for a randomly generated CARv2 file of size 10 MiB. +func BenchmarkReader_InspectWithBlockValidation(b *testing.B) { + path := filepath.Join(b.TempDir(), "bench-large-v2.car") + generateRandomCarV2File(b, path, 10<<20) // 10 MiB + defer os.Remove(path) + + info, err := os.Stat(path) + if err != nil { + b.Fatal(err) + } + b.SetBytes(info.Size()) + b.ReportAllocs() + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + benchmarkInspect(b, path, true) + } + }) +} + +// BenchmarkReader_InspectWithoutBlockValidation benchmarks Reader.Inspect without block hash +// validation for a randomly generated CARv2 file of size 10 MiB. +func BenchmarkReader_InspectWithoutBlockValidation(b *testing.B) { + path := filepath.Join(b.TempDir(), "bench-large-v2.car") + generateRandomCarV2File(b, path, 10<<20) // 10 MiB + defer os.Remove(path) + + info, err := os.Stat(path) + if err != nil { + b.Fatal(err) + } + b.SetBytes(info.Size()) + b.ReportAllocs() + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + benchmarkInspect(b, path, false) + } + }) +} + +func benchmarkInspect(b *testing.B, path string, validateBlockHash bool) { + reader, err := carv2.OpenReader(path) + if err != nil { + b.Fatal(err) + } + if _, err := reader.Inspect(validateBlockHash); err != nil { + b.Fatal(err) + } +} func generateRandomCarV2File(b *testing.B, path string, minTotalBlockSize int) { // Use fixed RNG for determinism across benchmarks. rng := rand.New(rand.NewSource(1413)) From 8696a191561d12bd45357feb1de2a9a43b3cfa8c Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Sun, 3 Jul 2022 09:22:25 +0100 Subject: [PATCH 31/37] Drop repeated package name from `CarStats` Cosmetic refactor to rename `car.CarStats` to `car.Stats`, which looks more fluent when using the API. --- v2/reader.go | 40 ++++++++++++++++++++-------------------- v2/reader_test.go | 12 ++++++------ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/v2/reader.go b/v2/reader.go index f67638af..df73f293 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -122,8 +122,8 @@ func (r *Reader) IndexReader() io.ReaderAt { return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) } -// CarStats is returned by an Inspect() call -type CarStats struct { +// Stats is returned by an Inspect() call +type Stats struct { Version uint64 Header Header Roots []cid.Cid @@ -142,7 +142,7 @@ type CarStats struct { } // Inspect does a quick scan of a CAR, performing basic validation of the format -// and returning a CarStats object that provides a high-level description of the +// and returning a Stats object that provides a high-level description of the // contents of the CAR. // Inspect works for CARv1 and CARv2 contents. A CARv1 will return an // uninitialized Header value. @@ -169,26 +169,26 @@ type CarStats struct { // them and have any reason to not trust the source. // // * Blocks use codecs that your system doesn't have access to—which may mean -// you can't traverse a DAG or use the contained data. CarStats#CodecCounts +// you can't traverse a DAG or use the contained data. Stats.CodecCounts // contains a list of codecs found in the CAR so this can be checked. // // * CIDs use multihashes that your system doesn't have access to—which will // mean you can't validate block hashes are correct (using validateBlockHash -// in this case will result in a failure). CarStats#MhTypeCounts contains a -// list of multihashes found in the CAR so this can bechecked. +// in this case will result in a failure). Stats.MhTypeCounts contains a +// list of multihashes found in the CAR so this can be checked. // // * The presence of IDENTITY CIDs, which may not be supported (or desired) by -// the consumer of the CAR. CarStats#CodecCounts can determine the presence +// the consumer of the CAR. Stats.CodecCounts can determine the presence // of IDENTITY CIDs. // // * Roots: the number of roots, duplicates, and whether they are related to the -// blocks contained within the CAR. CarStats contains a list of Roots and a +// blocks contained within the CAR. Stats contains a list of Roots and a // RootsPresent bool so further checks can be performed. // // * DAG completeness is not checked. Any properties relating to the DAG, or // DAGs contained within a CAR are the responsibility of the user to check. -func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { - stats := CarStats{ +func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { + stats := Stats{ Version: r.Version, Header: r.Header, CodecCounts: make(map[multicodec.Code]uint64), @@ -206,7 +206,7 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { // read roots, not using Roots(), because we need the offset setup in the data trader header, err := carv1.ReadHeader(dr, r.opts.MaxAllowedHeaderSize) if err != nil { - return CarStats{}, err + return Stats{}, err } stats.Roots = header.Roots var rootsPresentCount int @@ -219,7 +219,7 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { if err == io.EOF { // if the length of bytes read is non-zero when the error is EOF then signal an unclean EOF. if sectionLength > 0 { - return CarStats{}, io.ErrUnexpectedEOF + return Stats{}, io.ErrUnexpectedEOF } // otherwise, this is a normal ending break @@ -230,20 +230,20 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { break } if sectionLength > r.opts.MaxAllowedSectionSize { - return CarStats{}, util.ErrSectionTooLarge + return Stats{}, util.ErrSectionTooLarge } // decode just the CID bytes cidLen, c, err := cid.CidFromReader(dr) if err != nil { - return CarStats{}, err + return Stats{}, err } if sectionLength < uint64(cidLen) { // this case is handled different in the normal ReadNode() path since it // slurps in the whole section bytes and decodes CID from there - so an // error should come from a failing io.ReadFull - return CarStats{}, fmt.Errorf("section length shorter than CID length") + return Stats{}, fmt.Errorf("section length shorter than CID length") } // is this a root block? (also account for duplicate root CIDs) @@ -278,7 +278,7 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { } mh, err := multihash.SumStream(blockReader, cp.MhType, mhl) if err != nil { - return CarStats{}, err + return Stats{}, err } var gotCid cid.Cid switch cp.Version { @@ -287,15 +287,15 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { case 1: gotCid = cid.NewCidV1(cp.Codec, mh) default: - return CarStats{}, fmt.Errorf("invalid cid version: %d", cp.Version) + return Stats{}, fmt.Errorf("invalid cid version: %d", cp.Version) } if !gotCid.Equals(c) { - return CarStats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, gotCid) + return Stats{}, fmt.Errorf("mismatch in content integrity, expected: %s, got: %s", c, gotCid) } } else { // otherwise, skip over it if _, err := dr.Seek(int64(blockLength), io.SeekCurrent); err != nil { - return CarStats{}, err + return Stats{}, err } } @@ -329,7 +329,7 @@ func (r *Reader) Inspect(validateBlockHash bool) (CarStats, error) { // is intended to be a fast initial scan ind, size, err := index.ReadFromWithSize(r.IndexReader()) if err != nil { - return CarStats{}, err + return Stats{}, err } stats.IndexCodec = ind.Codec() stats.IndexSize = uint64(size) diff --git a/v2/reader_test.go b/v2/reader_test.go index 85605dad..7c43d931 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -280,12 +280,12 @@ func TestInspect(t *testing.T) { path string carHex string zerLenAsEOF bool - expectedStats carv2.CarStats + expectedStats carv2.Stats }{ { name: "IndexlessCarV2", path: "testdata/sample-v2-indexless.car", - expectedStats: carv2.CarStats{ + expectedStats: carv2.Stats{ Version: 2, Header: carv2.Header{ Characteristics: carv2.Characteristics{0, 0}, @@ -316,7 +316,7 @@ func TestInspect(t *testing.T) { // same payload as IndexlessCarV2, so only difference is the Version & Header name: "CarV1", path: "testdata/sample-v1.car", - expectedStats: carv2.CarStats{ + expectedStats: carv2.Stats{ Version: 1, Header: carv2.Header{}, Roots: []cid.Cid{mustCidDecode("bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy")}, @@ -342,7 +342,7 @@ func TestInspect(t *testing.T) { // same payload as IndexlessCarV2, so only difference is the Header name: "CarV2ProducedByBlockstore", path: "testdata/sample-rw-bs-v2.car", - expectedStats: carv2.CarStats{ + expectedStats: carv2.Stats{ Version: 2, Header: carv2.Header{ DataOffset: 1464, @@ -373,7 +373,7 @@ func TestInspect(t *testing.T) { name: "CarV1VersionWithZeroLenSectionIsOne", path: "testdata/sample-v1-with-zero-len-section.car", zerLenAsEOF: true, - expectedStats: carv2.CarStats{ + expectedStats: carv2.Stats{ Version: 1, Header: carv2.Header{}, Roots: []cid.Cid{mustCidDecode("bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy")}, @@ -403,7 +403,7 @@ func TestInspect(t *testing.T) { name: "IdentityCID", // 47 {version:1,roots:[identity cid]} 25 identity cid (dag-json {"identity":"block"}) carHex: "2f a265726f6f747381d82a581a0001a90200147b226964656e74697479223a22626c6f636b227d6776657273696f6e01 19 01a90200147b226964656e74697479223a22626c6f636b227d", - expectedStats: carv2.CarStats{ + expectedStats: carv2.Stats{ Version: 1, Roots: []cid.Cid{mustCidDecode("baguqeaaupmrgszdfnz2gs5dzei5ceytmn5rwwit5")}, RootsPresent: true, From bed12977a4aaa58eb4471600b2e45d22d982fb3d Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 4 Jul 2022 09:41:53 +0100 Subject: [PATCH 32/37] Return error when section length is invalid `varint` Return potential error when reading section error as varint. Add test to verify the error is indeed returned. Use `errors.New` instead of `fmt.Errorf` when no formatting is needed in error message. --- v2/reader.go | 4 +++- v2/reader_test.go | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/v2/reader.go b/v2/reader.go index df73f293..c34a0672 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -1,6 +1,7 @@ package car import ( + "errors" "fmt" "io" "math" @@ -224,6 +225,7 @@ func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { // otherwise, this is a normal ending break } + return Stats{}, err } if sectionLength == 0 && r.opts.ZeroLengthSectionAsEOF { // normal ending for this read mode @@ -243,7 +245,7 @@ func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { // this case is handled different in the normal ReadNode() path since it // slurps in the whole section bytes and decodes CID from there - so an // error should come from a failing io.ReadFull - return Stats{}, fmt.Errorf("section length shorter than CID length") + return Stats{}, errors.New("section length shorter than CID length") } // is this a root block? (also account for duplicate root CIDs) diff --git a/v2/reader_test.go b/v2/reader_test.go index 7c43d931..ed2b78e2 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -467,6 +467,11 @@ func TestInspectError(t *testing.T) { expectedInspectError: "section length shorter than CID length", validateBlockHash: true, }, + { + name: "BadSectionLength3", + carHex: "11a265726f6f7473f66776657273696f6e0180", + expectedInspectError: "unexpected EOF", + }, { name: "BadBlockHash(SanityCheck)", // this should pass because we don't ask the CID be validated even though it doesn't match // header cid data From a41506a2974cf088b4833cf8fa2af558d9786744 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 4 Jul 2022 20:19:12 +0100 Subject: [PATCH 33/37] Fix fuzz CI job --- .github/workflows/go-fuzz.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/go-fuzz.yml b/.github/workflows/go-fuzz.yml index 548a4a91..cb7b4b27 100644 --- a/.github/workflows/go-fuzz.yml +++ b/.github/workflows/go-fuzz.yml @@ -1,4 +1,4 @@ -on: [push, pull_request] +on: [ push, pull_request ] name: Go Fuzz jobs: @@ -21,8 +21,7 @@ jobs: go version go env - name: Run Fuzzing for 1m - with: - run: go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . + run: go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . v2: strategy: fail-fast: true @@ -42,7 +41,6 @@ jobs: go version go env - name: Run Fuzzing for 1m - with: - run: | - cd v2 - go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . + run: | + cd v2 + go test -v -fuzz=Fuzz${{ matrix.target }} -fuzztime=1m . From a971f7c697b5b5e042f6689465fd6c9f0a3a3997 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 4 Jul 2022 20:08:34 +0100 Subject: [PATCH 34/37] Revert changes to `index.Index` while keeping most of security fixes Revert the changes to `index.Index` interface such that it is the same as the current go-car main branch head. Reverting the changes, however, means that unmarshalling untrusted indices is indeed dangerous and should not be done on untrusted files. Note, the `carv2.Reader` APIs are changed to return errors as well as readers when getting `DataReader` and `IndexReader`. This is to accommodate issues detected by fuzz testing while removing boiler plate code in internal IO reader conversion. This is a breaking change to the current API but should be straight forward to roll out. Remove index fuzz tests and change inspection to only read the index codec instead of reading the entire index. --- v2/bench_test.go | 6 +- v2/blockstore/readonly.go | 59 ++++++++--- v2/blockstore/readonly_test.go | 4 +- v2/blockstore/readwrite.go | 13 ++- v2/blockstore/readwrite_test.go | 49 ++++++---- v2/example_test.go | 6 +- v2/fuzz_test.go | 69 ++++--------- v2/index/example_test.go | 36 +++---- v2/index/index.go | 64 +++++------- v2/index/index_test.go | 6 +- v2/index/indexsorted.go | 141 +++++++++------------------ v2/index/indexsorted_test.go | 15 +-- v2/index/mhindexsorted.go | 74 +++++--------- v2/index/mhindexsorted_test.go | 7 +- v2/index/testutil/equal_index.go | 71 -------------- v2/index_gen.go | 12 ++- v2/index_gen_test.go | 7 +- v2/internal/io/fullReaderAt.go | 20 ---- v2/internal/io/offset_read_seeker.go | 30 +----- v2/reader.go | 37 ++++--- v2/reader_test.go | 39 +++++--- v2/writer_test.go | 11 ++- 22 files changed, 304 insertions(+), 472 deletions(-) delete mode 100644 v2/index/testutil/equal_index.go delete mode 100644 v2/internal/io/fullReaderAt.go diff --git a/v2/bench_test.go b/v2/bench_test.go index e413180b..2f7dcc63 100644 --- a/v2/bench_test.go +++ b/v2/bench_test.go @@ -108,7 +108,11 @@ func BenchmarkExtractV1UsingReader(b *testing.B) { if err != nil { b.Fatal(err) } - _, err = io.Copy(dst, reader.DataReader()) + dr, err := reader.DataReader() + if err != nil { + b.Fatal(err) + } + _, err = io.Copy(dst, dr) if err != nil { b.Fatal(err) } diff --git a/v2/blockstore/readonly.go b/v2/blockstore/readonly.go index 307486d7..32c49046 100644 --- a/v2/blockstore/readonly.go +++ b/v2/blockstore/readonly.go @@ -120,15 +120,28 @@ func NewReadOnly(backing io.ReaderAt, idx index.Index, opts ...carv2.Option) (*R } if idx == nil { if v2r.Header.HasIndex() { - idx, err = index.ReadFrom(v2r.IndexReader()) + ir, err := v2r.IndexReader() if err != nil { return nil, err } - } else if idx, err = generateIndex(v2r.DataReader(), opts...); err != nil { - return nil, err + idx, err = index.ReadFrom(ir) + if err != nil { + return nil, err + } + } else { + dr, err := v2r.DataReader() + if err != nil { + return nil, err + } + if idx, err = generateIndex(dr, opts...); err != nil { + return nil, err + } } } - b.backing = v2r.DataReader() + b.backing, err = v2r.DataReader() + if err != nil { + return nil, err + } b.idx = idx return b, nil default: @@ -142,7 +155,11 @@ func readVersion(at io.ReaderAt, opts ...carv2.Option) (uint64, error) { case io.Reader: rr = r default: - rr = internalio.NewOffsetReadSeeker(r, 0) + var err error + rr, err = internalio.NewOffsetReadSeeker(r, 0) + if err != nil { + return 0, err + } } return carv2.ReadVersion(rr, opts...) } @@ -157,7 +174,11 @@ func generateIndex(at io.ReaderAt, opts ...carv2.Option) (index.Index, error) { return nil, err } default: - rs = internalio.NewOffsetReadSeeker(r, 0) + var err error + rs, err = internalio.NewOffsetReadSeeker(r, 0) + if err != nil { + return nil, err + } } // Note, we do not set any write options so that all write options fall back onto defaults. @@ -183,7 +204,7 @@ func OpenReadOnly(path string, opts ...carv2.Option) (*ReadOnly, error) { } func (b *ReadOnly) readBlock(idx int64) (cid.Cid, []byte, error) { - r, err := internalio.NewOffsetReadSeekerWithError(b.backing, idx) + r, err := internalio.NewOffsetReadSeeker(b.backing, idx) if err != nil { return cid.Cid{}, nil, err } @@ -216,8 +237,11 @@ func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) { var fnFound bool var fnErr error err := b.idx.GetAll(key, func(offset uint64) bool { - uar := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) - var err error + uar, err := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + if err != nil { + fnErr = err + return false + } _, err = varint.ReadUvarint(uar) if err != nil { fnErr = err @@ -317,7 +341,11 @@ func (b *ReadOnly) GetSize(ctx context.Context, key cid.Cid) (int, error) { fnSize := -1 var fnErr error err := b.idx.GetAll(key, func(offset uint64) bool { - rdr := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + rdr, err := internalio.NewOffsetReadSeeker(b.backing, int64(offset)) + if err != nil { + fnErr = err + return false + } sectionLen, err := varint.ReadUvarint(rdr) if err != nil { fnErr = err @@ -401,7 +429,10 @@ func (b *ReadOnly) AllKeysChan(ctx context.Context) (<-chan cid.Cid, error) { } // TODO we may use this walk for populating the index, and we need to be able to iterate keys in this way somewhere for index generation. In general though, when it's asked for all keys from a blockstore with an index, we should iterate through the index when possible rather than linear reads through the full car. - rdr := internalio.NewOffsetReadSeeker(b.backing, 0) + rdr, err := internalio.NewOffsetReadSeeker(b.backing, 0) + if err != nil { + return nil, err + } header, err := carv1.ReadHeader(rdr, b.opts.MaxAllowedHeaderSize) if err != nil { b.mu.RUnlock() // don't hold the mutex forever @@ -492,7 +523,11 @@ func (b *ReadOnly) HashOnRead(bool) { // Roots returns the root CIDs of the backing CAR. func (b *ReadOnly) Roots() ([]cid.Cid, error) { - header, err := carv1.ReadHeader(internalio.NewOffsetReadSeeker(b.backing, 0), b.opts.MaxAllowedHeaderSize) + ors, err := internalio.NewOffsetReadSeeker(b.backing, 0) + if err != nil { + return nil, err + } + header, err := carv1.ReadHeader(ors, b.opts.MaxAllowedHeaderSize) if err != nil { return nil, fmt.Errorf("error reading car header: %w", err) } diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index f55a1e50..5a947a75 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -226,7 +226,9 @@ func newV1ReaderFromV2File(t *testing.T, carv2Path string, zeroLenSectionAsEOF b t.Cleanup(func() { f.Close() }) v2r, err := carv2.NewReader(f) require.NoError(t, err) - v1r, err := newV1Reader(v2r.DataReader(), zeroLenSectionAsEOF) + dr, err := v2r.DataReader() + require.NoError(t, err) + v1r, err := newV1Reader(dr, zeroLenSectionAsEOF) require.NoError(t, err) return v1r } diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 0fad9893..774f37ff 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -139,7 +139,7 @@ func OpenReadWrite(path string, roots []cid.Cid, opts ...carv2.Option) (*ReadWri offset = 0 } rwbs.dataWriter = internalio.NewOffsetWriter(rwbs.f, offset) - v1r, err := internalio.NewOffsetReadSeekerWithError(rwbs.f, offset) + v1r, err := internalio.NewOffsetReadSeeker(rwbs.f, offset) if err != nil { return nil, err } @@ -169,11 +169,11 @@ func (b *ReadWrite) initWithRoots(v2 bool, roots []cid.Cid) error { return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter) } -func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Option) error { +func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid) error { // On resumption it is expected that the CARv2 Pragma, and the CARv1 header is successfully written. // Otherwise we cannot resume from the file. // Read pragma to assert if b.f is indeed a CARv2. - version, err := carv2.ReadVersion(b.f, opts...) + version, err := carv2.ReadVersion(b.f) if err != nil { // The file is not a valid CAR file and cannot resume from it. // Or the write must have failed before pragma was written. @@ -193,7 +193,7 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Opti // Check if file was finalized by trying to read the CARv2 header. // We check because if finalized the CARv1 reader behaviour needs to be adjusted since // EOF will not signify end of CARv1 payload. i.e. index is most likely present. - r, err := internalio.NewOffsetReadSeekerWithError(b.f, carv2.PragmaSize) + r, err := internalio.NewOffsetReadSeeker(b.f, carv2.PragmaSize) if err != nil { return err } @@ -223,7 +223,10 @@ func (b *ReadWrite) resumeWithRoots(v2 bool, roots []cid.Cid, opts ...carv2.Opti } // Use the given CARv1 padding to instantiate the CARv1 reader on file. - v1r := internalio.NewOffsetReadSeeker(b.ronly.backing, 0) + v1r, err := internalio.NewOffsetReadSeeker(b.ronly.backing, 0) + if err != nil { + return err + } header, err := carv1.ReadHeader(v1r, b.opts.MaxAllowedHeaderSize) if err != nil { // Cannot read the CARv1 header; the file is most likely corrupt. diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 6f64ac72..11cc99a2 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -21,7 +21,6 @@ import ( carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/blockstore" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" @@ -488,7 +487,9 @@ func TestBlockstoreResumption(t *testing.T) { wantPayloadReader, err := carv1.NewCarReader(v1f) require.NoError(t, err) - gotPayloadReader, err := carv1.NewCarReader(v2r.DataReader()) + dr, err := v2r.DataReader() + require.NoError(t, err) + gotPayloadReader, err := carv1.NewCarReader(dr) require.NoError(t, err) require.Equal(t, wantPayloadReader.Header, gotPayloadReader.Header) @@ -516,11 +517,15 @@ func TestBlockstoreResumption(t *testing.T) { // Assert index in resumed from file is identical to index generated from the data payload portion of the generated CARv2 file. _, err = v1f.Seek(0, io.SeekStart) require.NoError(t, err) - gotIdx, err := index.ReadFrom(v2r.IndexReader()) + ir, err := v2r.IndexReader() + require.NoError(t, err) + gotIdx, err := index.ReadFrom(ir) require.NoError(t, err) - wantIdx, err := carv2.GenerateIndex(v2r.DataReader()) + dr, err = v2r.DataReader() require.NoError(t, err) - testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) + wantIdx, err := carv2.GenerateIndex(dr) + require.NoError(t, err) + require.Equal(t, wantIdx, gotIdx) } func TestBlockstoreResumptionIsSupportedOnFinalizedFile(t *testing.T) { @@ -829,29 +834,37 @@ func TestOpenReadWrite_WritesIdentityCIDsWhenOptionIsEnabled(t *testing.T) { t.Cleanup(func() { require.NoError(t, r.Close()) }) require.True(t, r.Header.HasIndex()) - ir := r.IndexReader() + ir, err := r.IndexReader() + require.NoError(t, err) require.NotNil(t, ir) gotIdx, err := index.ReadFrom(ir) require.NoError(t, err) // Determine expected offset as the length of header plus one - header, err := carv1.ReadHeader(r.DataReader(), carv1.DefaultMaxAllowedHeaderSize) + dr, err := r.DataReader() + require.NoError(t, err) + header, err := carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize) require.NoError(t, err) object, err := cbor.DumpObject(header) require.NoError(t, err) expectedOffset := len(object) + 1 // Assert index is iterable and has exactly one record with expected multihash and offset. - var count int - err = gotIdx.ForEach(func(mh multihash.Multihash, offset uint64) error { - count++ - require.Equal(t, idmh, mh) - require.Equal(t, uint64(expectedOffset), offset) - return nil - }) - require.NoError(t, err) - require.Equal(t, 1, count) + switch idx := gotIdx.(type) { + case index.IterableIndex: + var i int + err := idx.ForEach(func(mh multihash.Multihash, offset uint64) error { + i++ + require.Equal(t, idmh, mh) + require.Equal(t, uint64(expectedOffset), offset) + return nil + }) + require.NoError(t, err) + require.Equal(t, 1, i) + default: + require.Failf(t, "unexpected index type", "wanted %v but got %v", multicodec.CarMultihashIndexSorted, idx.Codec()) + } } func TestOpenReadWrite_ErrorsWhenWritingTooLargeOfACid(t *testing.T) { @@ -914,7 +927,9 @@ func TestReadWrite_ReWritingCARv1WithIdentityCidIsIdenticalToOriginalWithOptions // Note, we hash instead of comparing bytes to avoid excessive memory usage when sample CARv1 is large. hasher := sha512.New() - gotWritten, err := io.Copy(hasher, v2r.DataReader()) + dr, err := v2r.DataReader() + require.NoError(t, err) + gotWritten, err := io.Copy(hasher, dr) require.NoError(t, err) gotSum := hasher.Sum(nil) diff --git a/v2/example_test.go b/v2/example_test.go index 53dfa349..57378aea 100644 --- a/v2/example_test.go +++ b/v2/example_test.go @@ -51,7 +51,11 @@ func ExampleWrapV1File() { if err != nil { panic(err) } - inner, err := ioutil.ReadAll(cr.DataReader()) + dr, err := cr.DataReader() + if err != nil { + panic(err) + } + inner, err := ioutil.ReadAll(dr) if err != nil { panic(err) } diff --git a/v2/fuzz_test.go b/v2/fuzz_test.go index c7473750..c45d83cb 100644 --- a/v2/fuzz_test.go +++ b/v2/fuzz_test.go @@ -12,7 +12,6 @@ import ( "testing" car "github.com/ipld/go-car/v2" - "github.com/ipld/go-car/v2/index" "github.com/ipld/go-car/v2/internal/carv1" ) @@ -73,48 +72,15 @@ func FuzzReader(f *testing.F) { } subject.Roots() - ir := subject.IndexReader() - if ir != nil { - index.ReadFrom(ir) + _, err = subject.IndexReader() + if err != nil { + return } - car.GenerateIndex(subject.DataReader()) - }) -} - -func FuzzIndex(f *testing.F) { - files, err := filepath.Glob("testdata/*.car") - if err != nil { - f.Fatal(err) - } - for _, fname := range files { - func() { - file, err := os.Open(fname) - if err != nil { - f.Fatal(err) - } - defer file.Close() - subject, err := car.NewReader(file) - if err != nil { - return - } - indexRdr := subject.IndexReader() - if indexRdr == nil { - return - } - _, n, err := index.ReadFromWithSize(indexRdr) - if err != nil { - return - } - data, err := io.ReadAll(io.NewSectionReader(indexRdr, 0, n)) - if err != nil { - f.Fatal(err) - } - f.Add(data) - }() - } - - f.Fuzz(func(t *testing.T, data []byte) { - index.ReadFrom(bytes.NewReader(data)) + dr, err := subject.DataReader() + if err != nil { + return + } + car.GenerateIndex(dr) }) } @@ -137,15 +103,18 @@ func FuzzInspect(f *testing.F) { if err != nil { t.Fatal("second NewReader on same data failed", err.Error()) } - - if i := reader.IndexReader(); i != nil { - _, err = index.ReadFrom(i) - if err != nil { - return - } + i, err := reader.IndexReader() + if err != nil { + return + } + // FIXME: Once indexes are safe to parse, do not skip .car with index in the differential fuzzing. + if i == nil { + return + } + dr, err := reader.DataReader() + if err != nil { + return } - - dr := reader.DataReader() _, err = carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize) if err != nil { diff --git a/v2/index/example_test.go b/v2/index/example_test.go index c6f83ea2..d2a9da54 100644 --- a/v2/index/example_test.go +++ b/v2/index/example_test.go @@ -5,10 +5,10 @@ import ( "io" "io/ioutil" "os" + "reflect" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" - "github.com/multiformats/go-multihash" ) // ExampleReadFrom unmarshalls an index from an indexed CARv2 file, and for each root CID prints the @@ -28,7 +28,11 @@ func ExampleReadFrom() { } // Read and unmarshall index within CARv2 file. - idx, err := index.ReadFrom(cr.IndexReader()) + ir, err := cr.IndexReader() + if err != nil { + panic(err) + } + idx, err := index.ReadFrom(ir) if err != nil { panic(err) } @@ -62,7 +66,11 @@ func ExampleWriteTo() { }() // Read and unmarshall index within CARv2 file. - idx, err := index.ReadFrom(cr.IndexReader()) + ir, err := cr.IndexReader() + if err != nil { + panic(err) + } + idx, err := index.ReadFrom(ir) if err != nil { panic(err) } @@ -94,27 +102,13 @@ func ExampleWriteTo() { panic(err) } - // Expect indices to be equal - collect all of the multihashes and their - // offsets from the first and compare to the second - mha := make(map[string]uint64, 0) - _ = idx.ForEach(func(mh multihash.Multihash, off uint64) error { - mha[mh.HexString()] = off - return nil - }) - var count int - _ = reReadIdx.ForEach(func(mh multihash.Multihash, off uint64) error { - count++ - if expectedOffset, ok := mha[mh.HexString()]; !ok || expectedOffset != off { - panic("expected to get the same index as the CARv2 file") - } - return nil - }) - if count != len(mha) { + // Expect indices to be equal. + if reflect.DeepEqual(idx, reReadIdx) { + fmt.Printf("Saved index file matches the index embedded in CARv2 at %v.\n", src) + } else { panic("expected to get the same index as the CARv2 file") } - fmt.Printf("Saved index file matches the index embedded in CARv2 at %v.\n", src) - // Output: // Saved index file matches the index embedded in CARv2 at ../testdata/sample-wrapped-v2.car. } diff --git a/v2/index/index.go b/v2/index/index.go index 3a2b3f1d..911eaa7a 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -44,17 +44,12 @@ type ( Marshal(w io.Writer) (uint64, error) // Unmarshal decodes the index from its serial form. - // Deprecated: This function is slurpy and will copy everything into memory. + // Note, this function will copy the entire index into memory. + // + // Do not unmarshal index from untrusted CARv2 files. Instead the index should be + // regenerated from the CARv2 data payload. Unmarshal(r io.Reader) error - // UnmarshalLazyRead lazily decodes the index from its serial form. It is a - // safer alternative to to Unmarshal, particularly when reading index data - // from untrusted sources (which is not recommended) but also in more - // constrained memory environments. - // Instead of slurping UnmarshalLazyRead will keep a reference to the the - // io.ReaderAt passed in and ask for data as needed. - UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) - // Load inserts a number of records into the index. // Note that Index will load all given records. Any filtering of the records such as // exclusion of CIDs with multihash.IDENTITY code must occur prior to calling this function. @@ -74,17 +69,17 @@ type ( // meaning that no callbacks happen, // ErrNotFound is returned. GetAll(cid.Cid, func(uint64) bool) error + } + + // IterableIndex is an index which support iterating over it's elements + IterableIndex interface { + Index // ForEach takes a callback function that will be called // on each entry in the index. The arguments to the callback are // the multihash of the element, and the offset in the car file // where the element appears. // - // Note that index with codec multicodec.CarIndexSorted does not support ForEach enumeration. - // Because this index type only contains the multihash digest and not the code. - // Calling ForEach on this index type will result in error. - // Use multicodec.CarMultihashIndexSorted index type instead. - // // If the callback returns a non-nil error, the iteration is aborted, // and the ForEach function returns the error to the user. // @@ -94,12 +89,6 @@ type ( // The order of calls to the given function is deterministic, but entirely index-specific. ForEach(func(multihash.Multihash, uint64) error) error } - - // IterableIndex is an index which support iterating over it's elements - // Deprecated: IterableIndex has been moved into Index. Just use Index now. - IterableIndex interface { - Index - } ) // GetFirst is a wrapper over Index.GetAll, returning the offset for the first @@ -143,32 +132,29 @@ func WriteTo(idx Index, w io.Writer) (uint64, error) { // ReadFrom reads index from r. // The reader decodes the index by reading the first byte to interpret the encoding. // Returns error if the encoding is not known. +// // Attempting to read index data from untrusted sources is not recommended. -func ReadFrom(r io.ReaderAt) (Index, error) { - idx, _, err := ReadFromWithSize(r) - return idx, err -} - -// ReadFromWithSize is just like ReadFrom but return the size of the Index. -// The size is only valid when err != nil. -// Attempting to read index data from untrusted sources is not recommended. -func ReadFromWithSize(r io.ReaderAt) (Index, int64, error) { - code, err := varint.ReadUvarint(internalio.NewOffsetReadSeeker(r, 0)) +// Instead the index should be regenerated from the CARv2 data payload. +func ReadFrom(r io.Reader) (Index, error) { + codec, err := ReadCodec(r) if err != nil { - return nil, 0, err + return nil, err } - codec := multicodec.Code(code) idx, err := New(codec) if err != nil { - return nil, 0, err + return nil, err } - rdr, err := internalio.NewOffsetReadSeekerWithError(r, int64(varint.UvarintSize(code))) - if err != nil { - return nil, 0, err + if err := idx.Unmarshal(r); err != nil { + return nil, err } - n, err := idx.UnmarshalLazyRead(rdr) + return idx, nil +} + +// ReadCodec reads the codec of the index by decoding the first varint read from r. +func ReadCodec(r io.Reader) (multicodec.Code, error) { + code, err := varint.ReadUvarint(internalio.ToByteReader(r)) if err != nil { - return nil, 0, err + return 0, err } - return idx, n, nil + return multicodec.Code(code), nil } diff --git a/v2/index/index_test.go b/v2/index/index_test.go index 0d380caf..972b4c66 100644 --- a/v2/index/index_test.go +++ b/v2/index/index_test.go @@ -8,7 +8,6 @@ import ( "testing" blocks "github.com/ipfs/go-block-format" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/ipld/go-car/v2/internal/carv1/util" "github.com/multiformats/go-multicodec" @@ -55,6 +54,9 @@ func TestReadFrom(t *testing.T) { subject, err := ReadFrom(idxf) require.NoError(t, err) + _, err = idxf.Seek(0, io.SeekStart) + require.NoError(t, err) + idxf2, err := os.Open("../testdata/sample-multihash-index-sorted.carindex") require.NoError(t, err) t.Cleanup(func() { require.NoError(t, idxf2.Close()) }) @@ -126,7 +128,7 @@ func TestWriteTo(t *testing.T) { require.NoError(t, err) // Assert they are equal - testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) + require.Equal(t, wantIdx, gotIdx) } func TestMarshalledIndexStartsWithCodec(t *testing.T) { diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index 60d0e87d..aeed4c11 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -5,22 +5,16 @@ import ( "encoding/binary" "errors" "fmt" + internalio "github.com/ipld/go-car/v2/internal/io" "io" "sort" - "github.com/ipld/go-car/v2/internal/errsort" - internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/ipfs/go-cid" "github.com/multiformats/go-multihash" ) -type sizedReaderAt interface { - io.ReaderAt - Size() int64 -} - var _ Index = (*multiWidthIndex)(nil) type ( @@ -32,7 +26,7 @@ type ( singleWidthIndex struct { width uint32 len uint64 // in struct, len is #items. when marshaled, it's saved as #bytes. - index sizedReaderAt + index []byte } multiWidthIndex map[uint32]singleWidthIndex ) @@ -60,24 +54,27 @@ func (s *singleWidthIndex) Marshal(w io.Writer) (uint64, error) { return 0, err } l += 4 - sz := s.index.Size() - if err := binary.Write(w, binary.LittleEndian, sz); err != nil { + if err := binary.Write(w, binary.LittleEndian, int64(len(s.index))); err != nil { return l, err } l += 8 - n, err := io.Copy(w, io.NewSectionReader(s.index, 0, sz)) + n, err := w.Write(s.index) return l + uint64(n), err } -// Unmarshal decodes the index from its serial form. -// Deprecated: This function is slurpy and will copy the index in memory. func (s *singleWidthIndex) Unmarshal(r io.Reader) error { var width uint32 if err := binary.Read(r, binary.LittleEndian, &width); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } return err } var dataLen uint64 if err := binary.Read(r, binary.LittleEndian, &dataLen); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } return err } @@ -89,26 +86,10 @@ func (s *singleWidthIndex) Unmarshal(r io.Reader) error { if _, err := io.ReadFull(r, buf); err != nil { return err } - s.index = bytes.NewReader(buf) + s.index = buf return nil } -func (s *singleWidthIndex) UnmarshalLazyRead(r io.ReaderAt) (indexSize int64, err error) { - var b [12]byte - _, err = internalio.FullReadAt(r, b[:], 0) - if err != nil { - return 0, err - } - - width := binary.LittleEndian.Uint32(b[:4]) - dataLen := binary.LittleEndian.Uint64(b[4:12]) - if err := s.checkUnmarshalLengths(width, dataLen, uint64(len(b))); err != nil { - return 0, err - } - s.index = io.NewSectionReader(r, int64(len(b)), int64(dataLen)) - return int64(dataLen) + int64(len(b)), nil -} - func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra uint64) error { if width <= 8 { return errors.New("malformed index; width must be bigger than 8") @@ -129,6 +110,10 @@ func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra ui return nil } +func (s *singleWidthIndex) Less(i int, digest []byte) bool { + return bytes.Compare(digest[:], s.index[i*int(s.width):((i+1)*int(s.width)-8)]) <= 0 +} + func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { d, err := multihash.Decode(c.Hash()) if err != nil { @@ -138,35 +123,18 @@ func (s *singleWidthIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { } func (s *singleWidthIndex) getAll(d []byte, fn func(uint64) bool) error { - digestLen := int64(s.width) - 8 - b := make([]byte, digestLen) - idxI, err := errsort.Search(int(s.len), func(i int) (bool, error) { - digestStart := int64(i) * int64(s.width) - _, err := internalio.FullReadAt(s.index, b, digestStart) - if err != nil { - return false, err - } - return bytes.Compare(d, b) <= 0, nil + idx := sort.Search(int(s.len), func(i int) bool { + return s.Less(i, d) }) - if err != nil { - return err - } - idx := int64(idxI) var any bool for ; uint64(idx) < s.len; idx++ { - digestStart := idx * int64(s.width) - offsetEnd := digestStart + int64(s.width) + digestStart := idx * int(s.width) + offsetEnd := (idx + 1) * int(s.width) digestEnd := offsetEnd - 8 - digestLen := digestEnd - digestStart - b := make([]byte, offsetEnd-digestStart) - _, err := internalio.FullReadAt(s.index, b, digestStart) - if err != nil { - return err - } - if bytes.Equal(d, b[:digestLen]) { + if bytes.Equal(d[:], s.index[digestStart:digestEnd]) { any = true - offset := binary.LittleEndian.Uint64(b[digestLen:]) + offset := binary.LittleEndian.Uint64(s.index[digestEnd:offsetEnd]) if !fn(offset) { // User signalled to stop searching; therefore, break. break @@ -200,19 +168,13 @@ func (s *singleWidthIndex) Load(items []Record) error { } func (s *singleWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { - segmentCount := s.index.Size() / int64(s.width) - for i := int64(0); i < segmentCount; i++ { - digestStart := i * int64(s.width) - offsetEnd := digestStart + int64(s.width) + segmentCount := len(s.index) / int(s.width) + for i := 0; i < segmentCount; i++ { + digestStart := i * int(s.width) + offsetEnd := (i + 1) * int(s.width) digestEnd := offsetEnd - 8 - digestLen := digestEnd - digestStart - b := make([]byte, offsetEnd-digestStart) - _, err := internalio.FullReadAt(s.index, b, digestStart) - if err != nil { - return err - } - digest := b[:digestLen] - offset := binary.LittleEndian.Uint64(b[digestLen:]) + digest := s.index[digestStart:digestEnd] + offset := binary.LittleEndian.Uint64(s.index[digestEnd:offsetEnd]) if err := f(digest, offset); err != nil { return err } @@ -265,49 +227,38 @@ func (m *multiWidthIndex) Marshal(w io.Writer) (uint64, error) { } func (m *multiWidthIndex) Unmarshal(r io.Reader) error { + reader := internalio.ToByteReadSeeker(r) var l int32 - if err := binary.Read(r, binary.LittleEndian, &l); err != nil { - return err - } - for i := 0; i < int(l); i++ { - s := singleWidthIndex{} - if err := s.Unmarshal(r); err != nil { - return err + if err := binary.Read(reader, binary.LittleEndian, &l); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF } - (*m)[s.width] = s + return err } - return nil -} - -func (m *multiWidthIndex) UnmarshalLazyRead(r io.ReaderAt) (sum int64, err error) { - var b [4]byte - _, err = internalio.FullReadAt(r, b[:], 0) + sum, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } - count := binary.LittleEndian.Uint32(b[:4]) - if int32(count) < 0 { - return 0, errors.New("index too big; multiWidthIndex count is overflowing int32") + if int32(l) < 0 { + return errors.New("index too big; multiWidthIndex count is overflowing int32") } - sum += int64(len(b)) - for ; count > 0; count-- { + for i := 0; i < int(l); i++ { s := singleWidthIndex{} - or, err := internalio.NewOffsetReadSeekerWithError(r, sum) - if err != nil { - return 0, err + if err := s.Unmarshal(r); err != nil { + return err } - n, err := s.UnmarshalLazyRead(or) + n, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } oldSum := sum sum += n if sum < oldSum { - return 0, errors.New("index too big; multiWidthIndex len is overflowing int64") + return errors.New("index too big; multiWidthIndex len is overflowing int64") } (*m)[s.width] = s } - return sum, nil + return nil } func (m *multiWidthIndex) Load(items []Record) error { @@ -339,17 +290,13 @@ func (m *multiWidthIndex) Load(items []Record) error { s := singleWidthIndex{ width: uint32(rcrdWdth), len: uint64(len(lst)), - index: bytes.NewReader(compact), + index: compact, } (*m)[uint32(width)+8] = s } return nil } -func (m *multiWidthIndex) ForEach(func(multihash.Multihash, uint64) error) error { - return fmt.Errorf("%s does not support ForEach enumeration; use %s instead", multicodec.CarIndexSorted, multicodec.CarMultihashIndexSorted) -} - func (m *multiWidthIndex) forEachDigest(f func(digest []byte, offset uint64) error) error { sizes := make([]uint32, 0, len(*m)) for k := range *m { diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index 8e1a4527..5c1ee449 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -1,27 +1,14 @@ package index import ( - "bytes" "encoding/binary" "testing" "github.com/ipfs/go-merkledag" "github.com/multiformats/go-multicodec" - "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" ) -func TestSortedIndex_ErrorsOnForEach(t *testing.T) { - subject, err := New(multicodec.CarIndexSorted) - require.NoError(t, err) - err = subject.ForEach(func(multihash.Multihash, uint64) error { return nil }) - require.Error(t, err) - require.Equal(t, - "car-index-sorted does not support ForEach enumeration; use car-multihash-index-sorted instead", - err.Error(), - ) -} - func TestSortedIndexCodec(t *testing.T) { require.Equal(t, multicodec.CarIndexSorted, newSorted().Codec()) } @@ -64,7 +51,7 @@ func TestSingleWidthIndex_GetAll(t *testing.T) { subject := &singleWidthIndex{ width: 9, len: uint64(l), - index: bytes.NewReader(buf), + index: buf, } var foundCount int diff --git a/v2/index/mhindexsorted.go b/v2/index/mhindexsorted.go index 0200f700..e0ef675d 100644 --- a/v2/index/mhindexsorted.go +++ b/v2/index/mhindexsorted.go @@ -3,17 +3,18 @@ package index import ( "encoding/binary" "errors" + internalio "github.com/ipld/go-car/v2/internal/io" "io" "sort" "github.com/ipfs/go-cid" - internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" ) var ( - _ Index = (*MultihashIndexSorted)(nil) + _ Index = (*MultihashIndexSorted)(nil) + _ IterableIndex = (*MultihashIndexSorted)(nil) ) type ( @@ -42,34 +43,14 @@ func (m *multiWidthCodedIndex) Marshal(w io.Writer) (uint64, error) { func (m *multiWidthCodedIndex) Unmarshal(r io.Reader) error { if err := binary.Read(r, binary.LittleEndian, &m.code); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } return err } return m.multiWidthIndex.Unmarshal(r) } -func (m *multiWidthCodedIndex) UnmarshalLazyRead(r io.ReaderAt) (int64, error) { - var b [8]byte - _, err := internalio.FullReadAt(r, b[:], 0) - if err != nil { - return 0, err - } - m.code = binary.LittleEndian.Uint64(b[:8]) - rdr, err := internalio.NewOffsetReadSeekerWithError(r, int64(len(b))) - if err != nil { - return 0, err - } - sum, err := m.multiWidthIndex.UnmarshalLazyRead(rdr) - if err != nil { - return 0, err - } - oldSum := sum - sum += int64(len(b)) - if sum < oldSum { - return 0, errors.New("index too big; multiWidthCodedIndex len is overflowing") - } - return sum, nil -} - func (m *multiWidthCodedIndex) forEach(f func(mh multihash.Multihash, offset uint64) error) error { return m.multiWidthIndex.forEachDigest(func(digest []byte, offset uint64) error { mh, err := multihash.Encode(digest, m.code) @@ -117,49 +98,38 @@ func (m *MultihashIndexSorted) sortedMultihashCodes() []uint64 { } func (m *MultihashIndexSorted) Unmarshal(r io.Reader) error { + reader := internalio.ToByteReadSeeker(r) var l int32 - if err := binary.Read(r, binary.LittleEndian, &l); err != nil { - return err - } - for i := 0; i < int(l); i++ { - mwci := newMultiWidthCodedIndex() - if err := mwci.Unmarshal(r); err != nil { - return err + if err := binary.Read(reader, binary.LittleEndian, &l); err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF } - m.put(mwci) + return err } - return nil -} - -func (m *MultihashIndexSorted) UnmarshalLazyRead(r io.ReaderAt) (sum int64, err error) { - var b [4]byte - _, err = internalio.FullReadAt(r, b[:], 0) + sum, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } - sum += int64(len(b)) - count := binary.LittleEndian.Uint32(b[:4]) - if int32(count) < 0 { - return 0, errors.New("index too big; MultihashIndexSorted count is overflowing int32") + if int32(l) < 0 { + return errors.New("index too big; MultihashIndexSorted count is overflowing int32") } - for ; count > 0; count-- { + for i := 0; i < int(l); i++ { mwci := newMultiWidthCodedIndex() - or, err := internalio.NewOffsetReadSeekerWithError(r, sum) - if err != nil { - return 0, err + if err := mwci.Unmarshal(r); err != nil { + return err } - n, err := mwci.UnmarshalLazyRead(or) + n, err := reader.Seek(0, io.SeekCurrent) if err != nil { - return 0, err + return err } oldSum := sum sum += n if sum < oldSum { - return 0, errors.New("index too big; MultihashIndexSorted sum is overflowing int64") + return errors.New("index too big; MultihashIndexSorted len is overflowing int64") } m.put(mwci) } - return sum, nil + return nil } func (m *MultihashIndexSorted) put(mwci *multiWidthCodedIndex) { diff --git a/v2/index/mhindexsorted_test.go b/v2/index/mhindexsorted_test.go index 7704d3a2..520157e4 100644 --- a/v2/index/mhindexsorted_test.go +++ b/v2/index/mhindexsorted_test.go @@ -53,11 +53,14 @@ func TestMultiWidthCodedIndex_StableIterate(t *testing.T) { records = append(records, generateIndexRecords(t, multihash.IDENTITY, rng)...) // Create a new mh sorted index and load randomly generated records into it. - subject, err := index.New(multicodec.CarMultihashIndexSorted) + idx, err := index.New(multicodec.CarMultihashIndexSorted) require.NoError(t, err) - err = subject.Load(records) + err = idx.Load(records) require.NoError(t, err) + subject, ok := idx.(index.IterableIndex) + require.True(t, ok) + mh := make([]multihash.Multihash, 0, len(records)) require.NoError(t, subject.ForEach(func(m multihash.Multihash, _ uint64) error { mh = append(mh, m) diff --git a/v2/index/testutil/equal_index.go b/v2/index/testutil/equal_index.go deleted file mode 100644 index 43d0b3e9..00000000 --- a/v2/index/testutil/equal_index.go +++ /dev/null @@ -1,71 +0,0 @@ -package testutil - -import ( - "sync" - "testing" - - "github.com/multiformats/go-multihash" - "github.com/stretchr/testify/require" -) - -type Index interface { - ForEach(func(multihash.Multihash, uint64) error) error -} - -// insertUint64 perform one round of insertion sort on the last element -func insertUint64(s []uint64) { - switch len(s) { - case 0, 1: - return - default: - cur := s[len(s)-1] - for j := len(s) - 1; j > 0; { - j-- - if cur >= s[j] { - s[j+1] = cur - break - } - s[j+1] = s[j] - } - } -} - -func AssertIdenticalIndexes(t *testing.T, a, b Index) { - var wg sync.WaitGroup - // key is multihash.Multihash.HexString - var aCount uint - var aErr error - aMap := make(map[string][]uint64) - wg.Add(1) - - go func() { - defer wg.Done() - aErr = a.ForEach(func(mh multihash.Multihash, off uint64) error { - aCount++ - str := mh.HexString() - slice := aMap[str] - slice = append(slice, off) - insertUint64(slice) - aMap[str] = slice - return nil - }) - }() - - var bCount uint - bMap := make(map[string][]uint64) - bErr := b.ForEach(func(mh multihash.Multihash, off uint64) error { - bCount++ - str := mh.HexString() - slice := bMap[str] - slice = append(slice, off) - insertUint64(slice) - bMap[str] = slice - return nil - }) - wg.Wait() - require.NoError(t, aErr) - require.NoError(t, bErr) - - require.Equal(t, aCount, bCount) - require.Equal(t, aMap, bMap) -} diff --git a/v2/index_gen.go b/v2/index_gen.go index 33ba7800..b0b87453 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -209,10 +209,18 @@ func ReadOrGenerateIndex(rs io.ReadSeeker, opts ...Option) (index.Index, error) } // If index is present, then no need to generate; decode and return it. if v2r.Header.HasIndex() { - return index.ReadFrom(v2r.IndexReader()) + ir, err := v2r.IndexReader() + if err != nil { + return nil, err + } + return index.ReadFrom(ir) } // Otherwise, generate index from CARv1 payload wrapped within CARv2 format. - return GenerateIndex(v2r.DataReader(), opts...) + dr, err := v2r.DataReader() + if err != nil { + return nil, err + } + return GenerateIndex(dr, opts...) default: return nil, fmt.Errorf("unknown version %v", version) } diff --git a/v2/index_gen_test.go b/v2/index_gen_test.go index 43a9c2ac..11011e81 100644 --- a/v2/index_gen_test.go +++ b/v2/index_gen_test.go @@ -9,7 +9,6 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" internalio "github.com/ipld/go-car/v2/internal/io" "github.com/multiformats/go-multicodec" @@ -48,7 +47,9 @@ func TestGenerateIndex(t *testing.T) { t.Cleanup(func() { assert.NoError(t, v2.Close()) }) reader, err := carv2.NewReader(v2) require.NoError(t, err) - want, err := index.ReadFrom(reader.IndexReader()) + ir, err := reader.IndexReader() + require.NoError(t, err) + want, err := index.ReadFrom(ir) require.NoError(t, err) return want }, @@ -104,7 +105,7 @@ func TestGenerateIndex(t *testing.T) { if want == nil { require.Nil(t, got) } else { - testutil.AssertIdenticalIndexes(t, want, got) + require.Equal(t, want, got) } } } diff --git a/v2/internal/io/fullReaderAt.go b/v2/internal/io/fullReaderAt.go deleted file mode 100644 index 57f26685..00000000 --- a/v2/internal/io/fullReaderAt.go +++ /dev/null @@ -1,20 +0,0 @@ -package io - -import "io" - -func FullReadAt(r io.ReaderAt, b []byte, off int64) (sum int64, err error) { - for int64(len(b)) > sum { - n, err := r.ReadAt(b[sum:], off+sum) - sum += int64(n) - if err != nil { - if err == io.EOF { - if sum < int64(len(b)) { - return sum, io.ErrUnexpectedEOF - } - return sum, nil - } - return sum, err - } - } - return sum, nil -} diff --git a/v2/internal/io/offset_read_seeker.go b/v2/internal/io/offset_read_seeker.go index bbdcf4c6..b3899ab7 100644 --- a/v2/internal/io/offset_read_seeker.go +++ b/v2/internal/io/offset_read_seeker.go @@ -35,15 +35,7 @@ type ReadSeekerAt interface { // NewOffsetReadSeeker returns an ReadSeekerAt that reads from r // starting offset offset off and stops with io.EOF when r reaches its end. // The Seek function will panic if whence io.SeekEnd is passed. -func NewOffsetReadSeeker(r io.ReaderAt, off int64) ReadSeekerAt { - nr, err := NewOffsetReadSeekerWithError(r, off) - if err != nil { - return erroringReader{err} - } - return nr -} - -func NewOffsetReadSeekerWithError(r io.ReaderAt, off int64) (ReadSeekerAt, error) { +func NewOffsetReadSeeker(r io.ReaderAt, off int64) (ReadSeekerAt, error) { if or, ok := r.(*offsetReadSeeker); ok { oldBase := or.base newBase := or.base + off @@ -128,23 +120,3 @@ func (o *offsetReadSeeker) Seek(offset int64, whence int) (int64, error) { func (o *offsetReadSeeker) Position() int64 { return o.off - o.base } - -type erroringReader struct { - err error -} - -func (e erroringReader) Read(_ []byte) (int, error) { - return 0, e.err -} - -func (e erroringReader) ReadAt(_ []byte, n int64) (int, error) { - return 0, e.err -} - -func (e erroringReader) ReadByte() (byte, error) { - return 0, e.err -} - -func (e erroringReader) Seek(_ int64, _ int) (int64, error) { - return 0, e.err -} diff --git a/v2/reader.go b/v2/reader.go index c34a0672..4628fd89 100644 --- a/v2/reader.go +++ b/v2/reader.go @@ -56,8 +56,10 @@ func NewReader(r io.ReaderAt, opts ...Option) (*Reader, error) { } cr.opts = ApplyOptions(opts...) - or := internalio.NewOffsetReadSeeker(r, 0) - var err error + or, err := internalio.NewOffsetReadSeeker(r, 0) + if err != nil { + return nil, err + } cr.Version, err = ReadVersion(or, opts...) if err != nil { return nil, err @@ -82,7 +84,11 @@ func (r *Reader) Roots() ([]cid.Cid, error) { if r.roots != nil { return r.roots, nil } - header, err := carv1.ReadHeader(r.DataReader(), r.opts.MaxAllowedHeaderSize) + dr, err := r.DataReader() + if err != nil { + return nil, err + } + header, err := carv1.ReadHeader(dr, r.opts.MaxAllowedHeaderSize) if err != nil { return nil, err } @@ -106,9 +112,9 @@ type SectionReader interface { } // DataReader provides a reader containing the data payload in CARv1 format. -func (r *Reader) DataReader() SectionReader { +func (r *Reader) DataReader() (SectionReader, error) { if r.Version == 2 { - return io.NewSectionReader(r.r, int64(r.Header.DataOffset), int64(r.Header.DataSize)) + return io.NewSectionReader(r.r, int64(r.Header.DataOffset), int64(r.Header.DataSize)), nil } return internalio.NewOffsetReadSeeker(r.r, 0) } @@ -116,9 +122,9 @@ func (r *Reader) DataReader() SectionReader { // IndexReader provides an io.Reader containing the index for the data payload if the index is // present. Otherwise, returns nil. // Note, this function will always return nil if the backing payload represents a CARv1. -func (r *Reader) IndexReader() io.ReaderAt { +func (r *Reader) IndexReader() (io.Reader, error) { if r.Version == 1 || !r.Header.HasIndex() { - return nil + return nil, nil } return internalio.NewOffsetReadSeeker(r.r, int64(r.Header.IndexOffset)) } @@ -139,7 +145,6 @@ type Stats struct { MaxBlockLength uint64 MinBlockLength uint64 IndexCodec multicodec.Code - IndexSize uint64 } // Inspect does a quick scan of a CAR, performing basic validation of the format @@ -201,7 +206,10 @@ func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { var minCidLength uint64 = math.MaxUint64 var minBlockLength uint64 = math.MaxUint64 - dr := r.DataReader() + dr, err := r.DataReader() + if err != nil { + return Stats{}, err + } bdr := internalio.ToByteReader(dr) // read roots, not using Roots(), because we need the offset setup in the data trader @@ -327,14 +335,15 @@ func (r *Reader) Inspect(validateBlockHash bool) (Stats, error) { } if stats.Version != 1 && stats.Header.HasIndex() { - // performs an UnmarshalLazyRead which should have its own validation and - // is intended to be a fast initial scan - ind, size, err := index.ReadFromWithSize(r.IndexReader()) + idxr, err := r.IndexReader() + if err != nil { + return Stats{}, err + } + idx, err := index.ReadFrom(idxr) if err != nil { return Stats{}, err } - stats.IndexCodec = ind.Codec() - stats.IndexSize = uint64(size) + stats.IndexCodec = idx.Codec() } return stats, nil diff --git a/v2/reader_test.go b/v2/reader_test.go index ed2b78e2..cdeb74d7 100644 --- a/v2/reader_test.go +++ b/v2/reader_test.go @@ -11,7 +11,6 @@ import ( "github.com/ipfs/go-cid" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" @@ -141,7 +140,9 @@ func TestReader_WithCarV1Consistency(t *testing.T) { gotRoots, err := subject.Roots() require.NoError(t, err) require.Equal(t, wantReader.Header.Roots, gotRoots) - require.Nil(t, subject.IndexReader()) + ir, err := subject.IndexReader() + require.Nil(t, ir) + require.NoError(t, err) }) } } @@ -173,13 +174,16 @@ func TestReader_WithCarV2Consistency(t *testing.T) { require.NoError(t, err) require.Equal(t, wantReader.Header.Roots, gotRoots) - gotIndexReader := subject.IndexReader() + gotIndexReader, err := subject.IndexReader() + require.NoError(t, err) require.NotNil(t, gotIndexReader) gotIndex, err := index.ReadFrom(gotIndexReader) require.NoError(t, err) - wantIndex, err := carv2.GenerateIndex(subject.DataReader()) + dr, err := subject.DataReader() require.NoError(t, err) - testutil.AssertIdenticalIndexes(t, wantIndex, gotIndex) + wantIndex, err := carv2.GenerateIndex(dr) + require.NoError(t, err) + require.Equal(t, wantIndex, gotIndex) }) } } @@ -187,13 +191,15 @@ func TestReader_WithCarV2Consistency(t *testing.T) { func TestOpenReader_DoesNotPanicForReadersCreatedBeforeClosure(t *testing.T) { subject, err := carv2.OpenReader("testdata/sample-wrapped-v2.car") require.NoError(t, err) - dReaderBeforeClosure := subject.DataReader() - iReaderBeforeClosure := subject.IndexReader() + dReaderBeforeClosure, err := subject.DataReader() + require.NoError(t, err) + iReaderBeforeClosure, err := subject.IndexReader() + require.NoError(t, err) require.NoError(t, subject.Close()) buf := make([]byte, 1) - panicTest := func(r io.ReaderAt) { - _, err := r.ReadAt(buf, 0) + panicTest := func(r io.Reader) { + _, err := r.Read(buf) require.EqualError(t, err, "mmap: closed") } @@ -205,12 +211,14 @@ func TestOpenReader_DoesNotPanicForReadersCreatedAfterClosure(t *testing.T) { subject, err := carv2.OpenReader("testdata/sample-wrapped-v2.car") require.NoError(t, err) require.NoError(t, subject.Close()) - dReaderAfterClosure := subject.DataReader() - iReaderAfterClosure := subject.IndexReader() + dReaderAfterClosure, err := subject.DataReader() + require.NoError(t, err) + iReaderAfterClosure, err := subject.IndexReader() + require.NoError(t, err) buf := make([]byte, 1) - panicTest := func(r io.ReaderAt) { - _, err := r.ReadAt(buf, 0) + panicTest := func(r io.Reader) { + _, err := r.Read(buf) require.EqualError(t, err, "mmap: closed") } @@ -237,7 +245,9 @@ func TestReader_ReturnsNilWhenThereIsNoIndex(t *testing.T) { subject, err := carv2.OpenReader(tt.path) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, subject.Close()) }) - require.Nil(t, subject.IndexReader()) + ir, err := subject.IndexReader() + require.NoError(t, err) + require.Nil(t, ir) }) } } @@ -365,7 +375,6 @@ func TestInspect(t *testing.T) { MaxBlockLength: 9, MinBlockLength: 4, IndexCodec: multicodec.CarMultihashIndexSorted, - IndexSize: 148, }, }, // same as CarV1 but with a zero-byte EOF to test options diff --git a/v2/writer_test.go b/v2/writer_test.go index 1bf3ca3d..12dcd6a9 100644 --- a/v2/writer_test.go +++ b/v2/writer_test.go @@ -9,7 +9,6 @@ import ( "testing" "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/index/testutil" "github.com/ipld/go-car/v2/internal/carv1" "github.com/stretchr/testify/require" @@ -48,16 +47,20 @@ func TestWrapV1(t *testing.T) { require.NoError(t, err) wantPayload, err := ioutil.ReadAll(sf) require.NoError(t, err) - gotPayload, err := ioutil.ReadAll(subject.DataReader()) + dr, err := subject.DataReader() + require.NoError(t, err) + gotPayload, err := ioutil.ReadAll(dr) require.NoError(t, err) require.Equal(t, wantPayload, gotPayload) // Assert embedded index in CARv2 is same as index generated from the original CARv1. wantIdx, err := GenerateIndexFromFile(src) require.NoError(t, err) - gotIdx, err := index.ReadFrom(subject.IndexReader()) + ir, err := subject.IndexReader() + require.NoError(t, err) + gotIdx, err := index.ReadFrom(ir) require.NoError(t, err) - testutil.AssertIdenticalIndexes(t, wantIdx, gotIdx) + require.Equal(t, wantIdx, gotIdx) } func TestExtractV1(t *testing.T) { From dec4ca192e5f8cdb55c3dc42520206a070b3e885 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Wed, 6 Jul 2022 09:32:31 +0100 Subject: [PATCH 35/37] Revert changes to `insertionindex` Revert changes to serialization of `insertionindex` postponed until the streaming index work stream. --- v2/blockstore/insertionindex.go | 42 ++++++++++++++++++++++++++------- v2/go.mod | 1 + v2/go.sum | 1 + 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/v2/blockstore/insertionindex.go b/v2/blockstore/insertionindex.go index 192eb5c3..1e480b3f 100644 --- a/v2/blockstore/insertionindex.go +++ b/v2/blockstore/insertionindex.go @@ -2,6 +2,7 @@ package blockstore import ( "bytes" + "encoding/binary" "errors" "fmt" "io" @@ -11,6 +12,7 @@ import ( "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" "github.com/petar/GoLLRB/llrb" + cbor "github.com/whyrusleeping/cbor/go" ) // This index is intended to be efficient for random-access, in-memory lookups @@ -106,7 +108,37 @@ func (ii *insertionIndex) GetAll(c cid.Cid, fn func(uint64) bool) error { } func (ii *insertionIndex) Marshal(w io.Writer) (uint64, error) { - return 0, fmt.Errorf("unimplemented, index type not intended for serialization") + l := uint64(0) + if err := binary.Write(w, binary.LittleEndian, int64(ii.items.Len())); err != nil { + return l, err + } + l += 8 + + var err error + iter := func(i llrb.Item) bool { + if err = cbor.Encode(w, i.(recordDigest).Record); err != nil { + return false + } + return true + } + ii.items.AscendGreaterOrEqual(ii.items.Min(), iter) + return l, err +} + +func (ii *insertionIndex) Unmarshal(r io.Reader) error { + var length int64 + if err := binary.Read(r, binary.LittleEndian, &length); err != nil { + return err + } + d := cbor.NewDecoder(r) + for i := int64(0); i < length; i++ { + var rec index.Record + if err := d.Decode(&rec); err != nil { + return err + } + ii.items.InsertNoReplace(newRecordDigest(rec)) + } + return nil } func (ii *insertionIndex) ForEach(f func(multihash.Multihash, uint64) error) error { @@ -123,14 +155,6 @@ func (ii *insertionIndex) ForEach(f func(multihash.Multihash, uint64) error) err return errr } -func (ii *insertionIndex) Unmarshal(r io.Reader) error { - return fmt.Errorf("unimplemented, index type not intended for deserialization") -} - -func (ii *insertionIndex) UnmarshalLazyRead(r io.ReaderAt) (int64, error) { - return 0, fmt.Errorf("unimplemented, index type not intended for deserialization") -} - func (ii *insertionIndex) Codec() multicodec.Code { return insertionIndexCodec } diff --git a/v2/go.mod b/v2/go.mod index 3ac37f97..b2686a7c 100644 --- a/v2/go.mod +++ b/v2/go.mod @@ -18,6 +18,7 @@ require ( github.com/multiformats/go-varint v0.0.6 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 github.com/stretchr/testify v1.7.0 + github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 golang.org/x/exp v0.0.0-20210615023648-acb5c1269671 ) diff --git a/v2/go.sum b/v2/go.sum index d81ea5eb..1b7eb9f9 100644 --- a/v2/go.sum +++ b/v2/go.sum @@ -880,6 +880,7 @@ github.com/warpfork/go-testmark v0.3.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2r github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a h1:G++j5e0OC488te356JvdhaM8YS6nMsjLAYF7JxCv07w= github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= +github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 h1:5HZfQkwe0mIfyDmc1Em5GqlNRzcdtlv4HTNmdpt7XH0= github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11/go.mod h1:Wlo/SzPmxVp6vXpGt/zaXhHH0fn4IxgqZc82aKg6bpQ= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 h1:WXhVOwj2USAXB5oMDwRl3piOux2XMV9TANaYxXHdkoE= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158/go.mod h1:Xj/M2wWU+QdTdRbu/L/1dIZY8/Wb2K9pAhtroQuxJJI= From 80bb0d51cfc4f85904dc8ddf30ee55dcc6dba9ae Mon Sep 17 00:00:00 2001 From: Jorropo Date: Wed, 6 Jul 2022 10:42:52 +0200 Subject: [PATCH 36/37] ci: remove the reverted FuzzIndex fuzzer --- .github/workflows/go-fuzz.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go-fuzz.yml b/.github/workflows/go-fuzz.yml index cb7b4b27..830fc9ec 100644 --- a/.github/workflows/go-fuzz.yml +++ b/.github/workflows/go-fuzz.yml @@ -26,7 +26,7 @@ jobs: strategy: fail-fast: true matrix: - target: [ "BlockReader", "Reader", "Index", "Inspect" ] + target: [ "BlockReader", "Reader", "Inspect" ] runs-on: ubuntu-latest name: Fuzz V2 ${{ matrix.target }} steps: From d68cd32008c803faafabc04cb1d8d016609fbfe8 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Wed, 6 Jul 2022 10:12:15 +0100 Subject: [PATCH 37/37] Bump version in prep for releasing go-car `v0` --- version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.json b/version.json index 1f94dbba..372b6eab 100644 --- a/version.json +++ b/version.json @@ -1,3 +1,3 @@ { - "version": "v0.3.3" + "version": "v0.4.0" }