Skip to content

Commit

Permalink
fix: don't allow go-merkledag to reorder loaded links (#675)
Browse files Browse the repository at this point in the history
* fix: don't allow go-merkledag to reorder loaded links

Ref: #673 (comment)

Ideally we can remove go-merkledag entirely from here because most (all?) of
the deal-making infrastructure uses go-codec-dagpb + go-ipld-prime traversals
and post-decode link-reordering is a subtle difference that impacts CAR
ordering for non-spec-compliant DAG-PB blocks.

* test: add test for CarOffsetWriter dag traverse order
  • Loading branch information
rvagg authored Aug 3, 2022
1 parent de35314 commit cfcd483
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 4 deletions.
13 changes: 9 additions & 4 deletions car/car_offset_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,13 @@ func (s *CarOffsetWriter) writeBlocks(ctx context.Context, w io.Writer, headerSi
return nil, fmt.Errorf("getting block %s: %w", c, err)
}

// take a copy of the links array before nd.RawData() triggers a sort
links := make([]*format.Link, len(nd.Links()))
copy(links, nd.Links())
byts := nd.RawData()

// Get the size of the block and metadata
ldsize := util.LdSize(nd.Cid().Bytes(), nd.RawData())
ldsize := util.LdSize(nd.Cid().Bytes(), byts)

// Check if the offset from which to start writing is in or before this
// block
Expand All @@ -121,7 +126,7 @@ func (s *CarOffsetWriter) writeBlocks(ctx context.Context, w io.Writer, headerSi

// Write the block data to the writer, starting at the block offset
_, err = skipWrite(w, blockWriteOffset, func(sw io.Writer) (int, error) {
return 0, util.LdWrite(sw, nd.Cid().Bytes(), nd.RawData())
return 0, util.LdWrite(sw, nd.Cid().Bytes(), byts)
})
if err != nil {
return nil, fmt.Errorf("writing CAR block %s: %w", c, err)
Expand All @@ -132,13 +137,13 @@ func (s *CarOffsetWriter) writeBlocks(ctx context.Context, w io.Writer, headerSi
s.blockInfos.Put(nd.Cid(), &BlockInfo{
offset: offset,
size: ldsize,
links: nd.Links(),
links: links,
})

offset = nextBlockOffset

// Return any links from this block to other DAG blocks
return nd.Links(), nil
return links, nil
}

seen := cid.NewSet()
Expand Down
56 changes: 56 additions & 0 deletions car/car_offset_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"math/rand"
"testing"

blocks "github.com/ipfs/go-block-format"
"github.com/ipfs/go-blockservice"
"github.com/ipfs/go-cidutil"
"github.com/ipfs/go-datastore"
Expand All @@ -15,13 +16,68 @@ import (
chunk "github.com/ipfs/go-ipfs-chunker"
format "github.com/ipfs/go-ipld-format"
"github.com/ipfs/go-merkledag"
merkledagpb "github.com/ipfs/go-merkledag/pb"
"github.com/ipfs/go-unixfs/importer/balanced"
"github.com/ipfs/go-unixfs/importer/helpers"
"github.com/ipld/go-car"
mh "github.com/multiformats/go-multihash"
"github.com/stretchr/testify/require"
)

func TestCarOffsetWriterDagOrder(t *testing.T) {
ctx := context.Background()
ds := dss.MutexWrap(datastore.NewMapDatastore())
bs := bstore.NewBlockstore(ds)
bserv := blockservice.New(bs, nil)

// make a 3 block DAG, the links in the root aren't in DAG-PB spec sort order,
// they should be byte-order sorted by `Name` field, but we switch them here
// so we can test that traversal happens based on the links as they appear in
// the encoded bytes, which is what we get with go-codec-dagpb + go-ipld-prime
aaaa := "aaaa"
aaaaBlk := merkledag.NewRawNode([]byte(aaaa))
require.NoError(t, bserv.AddBlock(ctx, aaaaBlk))
bbbb := "bbbb"
bbbbBlk := merkledag.NewRawNode([]byte(bbbb))
require.NoError(t, bserv.AddBlock(ctx, bbbbBlk))
pbn := &merkledagpb.PBNode{
Links: []*merkledagpb.PBLink{
{Hash: bbbbBlk.Cid().Bytes(), Name: &bbbb},
{Hash: aaaaBlk.Cid().Bytes(), Name: &aaaa},
},
}
rootByts, err := pbn.Marshal()
require.NoError(t, err)
rootBlk := blocks.NewBlock(rootByts)
require.NoError(t, bserv.AddBlock(ctx, rootBlk))

// execute
fullCarCow := NewCarOffsetWriter(rootBlk.Cid(), bs, NewBlockInfoCache())
var fullBuff bytes.Buffer
require.NoError(t, fullCarCow.Write(context.Background(), &fullBuff, 0))

// decode result
reader, err := car.NewCarReader(bytes.NewReader(fullBuff.Bytes()))
require.NoError(t, err)

// header
require.Equal(t, 1, len(reader.Header.Roots))
require.Equal(t, rootBlk.Cid(), reader.Header.Roots[0])
blk, err := reader.Next()
require.NoError(t, err)

// 3 blocks, in the correct, unsorted order - root, bbbb, aaaa
require.Equal(t, blk.Cid(), rootBlk.Cid())
blk, err = reader.Next()
require.NoError(t, err)
require.Equal(t, blk.Cid(), bbbbBlk.Cid())
blk, err = reader.Next()
require.NoError(t, err)
require.Equal(t, blk.Cid(), aaaaBlk.Cid())
_, err = reader.Next()
require.Error(t, err)
}

func TestCarOffsetWriter(t *testing.T) {
ds := dss.MutexWrap(datastore.NewMapDatastore())
bs := bstore.NewBlockstore(ds)
Expand Down

0 comments on commit cfcd483

Please sign in to comment.