diff --git a/data/builder/dir_test.go b/data/builder/dir_test.go index a2b0698..0932f8f 100644 --- a/data/builder/dir_test.go +++ b/data/builder/dir_test.go @@ -3,11 +3,14 @@ package builder import ( "bytes" "fmt" + "io" "os" "path/filepath" + "strconv" "testing" "github.com/ipfs/go-cid" + ipfsutil "github.com/ipfs/go-ipfs-util" "github.com/ipfs/go-unixfsnode" dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" @@ -19,11 +22,7 @@ func mkEntries(cnt int, ls *ipld.LinkSystem) ([]dagpb.PBLink, error) { entries := make([]dagpb.PBLink, 0, cnt) for i := 0; i < cnt; i++ { r := bytes.NewBufferString(fmt.Sprintf("%d", i)) - f, s, err := BuildUnixFSFile(r, "", ls) - if err != nil { - return nil, err - } - e, err := BuildUnixFSDirectoryEntry(fmt.Sprintf("file %d", i), int64(s), f) + e, err := mkEntry(r, fmt.Sprintf("file %d", i), ls) if err != nil { return nil, err } @@ -32,6 +31,42 @@ func mkEntries(cnt int, ls *ipld.LinkSystem) ([]dagpb.PBLink, error) { return entries, nil } +func mkEntry(r io.Reader, name string, ls *ipld.LinkSystem) (dagpb.PBLink, error) { + f, s, err := BuildUnixFSFile(r, "", ls) + if err != nil { + return nil, err + } + return BuildUnixFSDirectoryEntry(name, int64(s), f) +} + +func TestBuildUnixFSFileWrappedInDirectory_Reference(t *testing.T) { + for _, tc := range referenceTestCases { + t.Run(strconv.Itoa(tc.size), func(t *testing.T) { + buf := make([]byte, tc.size) + ipfsutil.NewSeededRand(0xdeadbeef).Read(buf) + r := bytes.NewReader(buf) + + ls := cidlink.DefaultLinkSystem() + storage := cidlink.Memory{} + ls.StorageReadOpener = storage.OpenRead + ls.StorageWriteOpener = storage.OpenWrite + + e, err := mkEntry(r, fmt.Sprintf("%d", tc.size), &ls) + require.NoError(t, err) + d, sz, err := BuildUnixFSDirectory([]dagpb.PBLink{e}, &ls) + require.NoError(t, err) + require.Equal(t, tc.wrappedExpected.String(), d.(cidlink.Link).Cid.String()) + + // check sz is the stored size of all blocks in the generated DAG + var totStored int + for _, blk := range storage.Bag { + totStored += len(blk) + } + require.Equal(t, totStored, int(sz)) + }) + } +} + func TestBuildUnixFSDirectory(t *testing.T) { ls := cidlink.DefaultLinkSystem() storage := cidlink.Memory{} diff --git a/data/builder/file.go b/data/builder/file.go index a522784..99bd5d6 100644 --- a/data/builder/file.go +++ b/data/builder/file.go @@ -9,6 +9,7 @@ import ( "github.com/ipfs/go-unixfsnode/data" dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" basicnode "github.com/ipld/go-ipld-prime/node/basic" "github.com/multiformats/go-multicodec" @@ -18,6 +19,38 @@ import ( _ "github.com/ipld/go-ipld-prime/codec/raw" ) +type fileShardMeta struct { + link datamodel.Link + byteSize uint64 + storedSize uint64 +} + +type fileShards []fileShardMeta + +func (fs fileShards) totalByteSize() uint64 { + var total uint64 + for _, f := range fs { + total += f.byteSize + } + return total +} + +func (fs fileShards) totalStoredSize() uint64 { + var total uint64 + for _, f := range fs { + total += f.storedSize + } + return total +} + +func (fs fileShards) byteSizes() []uint64 { + sizes := make([]uint64, len(fs)) + for i, f := range fs { + sizes[i] = f.byteSize + } + return sizes +} + // BuildUnixFSFile creates a dag of ipld Nodes representing file data. // This recreates the functionality previously found in // github.com/ipfs/go-unixfs/importer/balanced, but tailored to the @@ -28,31 +61,29 @@ import ( // data nodes are stored as raw bytes. // ref: https://github.com/ipfs/go-mfs/blob/1b1fd06cff048caabeddb02d4dbf22d2274c7971/file.go#L50 func BuildUnixFSFile(r io.Reader, chunker string, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { - s, err := chunk.FromString(r, chunker) + src, err := chunk.FromString(r, chunker) if err != nil { return nil, 0, err } - var prev []ipld.Link - var prevLen []uint64 + var prev fileShards depth := 1 for { - root, size, err := fileTreeRecursive(depth, prev, prevLen, s, ls) + next, err := fileTreeRecursive(depth, prev, src, ls) if err != nil { return nil, 0, err } - if prev != nil && prev[0] == root { - if root == nil { + if prev != nil && prev[0].link == next.link { + if next.link == nil { node := basicnode.NewBytes([]byte{}) link, err := ls.Store(ipld.LinkContext{}, leafLinkProto, node) return link, 0, err } - return root, size, nil + return next.link, next.storedSize, nil } - prev = []ipld.Link{root} - prevLen = []uint64{size} + prev = []fileShardMeta{next} depth++ } } @@ -75,102 +106,122 @@ var leafLinkProto = cidlink.LinkPrototype{ }, } -func fileTreeRecursive(depth int, children []ipld.Link, childLen []uint64, src chunk.Splitter, ls *ipld.LinkSystem) (ipld.Link, uint64, error) { - if depth == 1 && len(children) > 0 { - return nil, 0, fmt.Errorf("leaf nodes cannot have children") - } else if depth == 1 { +// fileTreeRecursive packs a file into chunks recursively, returning a root for +// this level of recursion, the number of file bytes consumed for this level of +// recursion and and the number of bytes used to store this level of recursion. +func fileTreeRecursive( + depth int, + children fileShards, + src chunk.Splitter, + ls *ipld.LinkSystem, +) (fileShardMeta, error) { + if depth == 1 { + // file leaf, next chunk, encode as raw bytes, store and retuen + if len(children) > 0 { + return fileShardMeta{}, fmt.Errorf("leaf nodes cannot have children") + } leaf, err := src.NextBytes() - if err == io.EOF { - return nil, 0, nil - } else if err != nil { - return nil, 0, err + if err != nil { + if err == io.EOF { + return fileShardMeta{}, nil + } + return fileShardMeta{}, err } node := basicnode.NewBytes(leaf) - return sizedStore(ls, leafLinkProto, node) + l, sz, err := sizedStore(ls, leafLinkProto, node) + if err != nil { + return fileShardMeta{}, err + } + return fileShardMeta{link: l, byteSize: uint64(len(leaf)), storedSize: sz}, nil } - // depth > 1. - totalSize := uint64(0) - blksizes := make([]uint64, 0, DefaultLinksPerBlock) + + // depth > 1 + if children == nil { - children = make([]ipld.Link, 0) - } else { - for i := range children { - blksizes = append(blksizes, childLen[i]) - totalSize += childLen[i] - } + children = make(fileShards, 0) } + + // fill up the links for this level, if we need to go beyond + // DefaultLinksPerBlock we'll end up back here making a parallel tree for len(children) < DefaultLinksPerBlock { - nxt, sz, err := fileTreeRecursive(depth-1, nil, nil, src, ls) + // descend down toward the leaves + next, err := fileTreeRecursive(depth-1, nil, src, ls) if err != nil { - return nil, 0, err - } else if nxt == nil { - // eof + return fileShardMeta{}, err + } else if next.link == nil { // eof break } - totalSize += sz - children = append(children, nxt) - childLen = append(childLen, sz) - blksizes = append(blksizes, sz) + children = append(children, next) } + if len(children) == 0 { - // empty case. - return nil, 0, nil + // empty case + return fileShardMeta{}, nil } else if len(children) == 1 { // degenerate case - return children[0], childLen[0], nil + return children[0], nil } - // make the unixfs node. + // make the unixfs node node, err := BuildUnixFS(func(b *Builder) { - FileSize(b, totalSize) - BlockSizes(b, blksizes) + FileSize(b, children.totalByteSize()) + BlockSizes(b, children.byteSizes()) }) if err != nil { - return nil, 0, err + return fileShardMeta{}, err + } + pbn, err := packFileChildren(node, children) + if err != nil { + return fileShardMeta{}, err } - // Pack into the dagpb node. + link, sz, err := sizedStore(ls, fileLinkProto, pbn) + if err != nil { + return fileShardMeta{}, err + } + return fileShardMeta{ + link: link, + byteSize: children.totalByteSize(), + storedSize: children.totalStoredSize() + sz, + }, nil +} + +func packFileChildren(node data.UnixFSData, children fileShards) (datamodel.Node, error) { dpbb := dagpb.Type.PBNode.NewBuilder() pbm, err := dpbb.BeginMap(2) if err != nil { - return nil, 0, err + return nil, err } pblb, err := pbm.AssembleEntry("Links") if err != nil { - return nil, 0, err + return nil, err } pbl, err := pblb.BeginList(int64(len(children))) if err != nil { - return nil, 0, err + return nil, err } - for i, c := range children { - pbln, err := BuildUnixFSDirectoryEntry("", int64(blksizes[i]), c) + for _, c := range children { + pbln, err := BuildUnixFSDirectoryEntry("", int64(c.storedSize), c.link) if err != nil { - return nil, 0, err + return nil, err } if err = pbl.AssembleValue().AssignNode(pbln); err != nil { - return nil, 0, err + return nil, err } } if err = pbl.Finish(); err != nil { - return nil, 0, err + return nil, err } if err = pbm.AssembleKey().AssignString("Data"); err != nil { - return nil, 0, err + return nil, err } if err = pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(node)); err != nil { - return nil, 0, err + return nil, err } if err = pbm.Finish(); err != nil { - return nil, 0, err - } - pbn := dpbb.Build() - - link, _, err := sizedStore(ls, fileLinkProto, pbn) - if err != nil { - return nil, 0, err + return nil, err } - return link, totalSize, nil + return dpbb.Build(), nil } // BuildUnixFSDirectoryEntry creates the link to a file or directory as it appears within a unixfs directory. diff --git a/data/builder/file_test.go b/data/builder/file_test.go index de3803e..2673d48 100644 --- a/data/builder/file_test.go +++ b/data/builder/file_test.go @@ -3,49 +3,78 @@ package builder import ( "bytes" "context" + "strconv" "testing" "github.com/ipfs/go-cid" - u "github.com/ipfs/go-ipfs-util" + ipfsutil "github.com/ipfs/go-ipfs-util" "github.com/ipfs/go-unixfsnode/file" dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/stretchr/testify/require" ) -func TestBuildUnixFSFile(t *testing.T) { - buf := make([]byte, 10*1024*1024) - u.NewSeededRand(0xdeadbeef).Read(buf) - r := bytes.NewReader(buf) +// referenceTestCases using older IPFS libraries, both bare forms of files sharded across raw leaves +// with CIDv1 and the same but wrapped in a directory with the name of the number of bytes. +var referenceTestCases = []struct { + size int + bareExpected cid.Cid + wrappedExpected cid.Cid +}{ + { + size: 1024, + bareExpected: cid.MustParse("bafkreibhn6gptq26tcez7zjklms4242pmhpiuql62ua2wlihyxdxua2nsa"), + wrappedExpected: cid.MustParse("bafybeig6rdrdonmqxao32uihcnnbief4qdrjg4aktfo5fmu4cdeqya3glm"), + }, + { + size: 10 * 1024, + bareExpected: cid.MustParse("bafkreicesdeiwmnqq6i44so2sebekotb5zz7ymxv7fnbnynzrtftomk5l4"), + wrappedExpected: cid.MustParse("bafybeihzqusxng5zb3qjtmkjizvwrv3jer2nafvcwlzhzs2p7sh7mswnsi"), + }, + { + size: 100 * 1024, + bareExpected: cid.MustParse("bafkreie72qttha6godppjndnmbyssddzh2ty2uog7cxwu3d5pzgn7nl72m"), + wrappedExpected: cid.MustParse("bafybeidxgheulpeflagdewrjl7oe6loqtpfxncpieu6flje5hqbmgl5q7u"), + }, + { + size: 10 * 1024 * 1024, + // https://github.com/ipfs/go-unixfs/blob/a7243ebfc36eaa89d79a39d3cef3fa1e60f7e49e/importer/importer_test.go#L49C1-L49C1 + // QmZN1qquw84zhV4j6vT56tCcmFxaDaySL1ezTXFvMdNmrK, but with --cid-version=1 all the way through the DAG + bareExpected: cid.MustParse("bafybeieyxejezqto5xwcxtvh5tskowwxrn3hmbk3hcgredji3g7abtnfkq"), + wrappedExpected: cid.MustParse("bafybeieyal5cus7e4bazoffk5f2ltvlowjyne3z3axupo7lvvyq7dmy37m"), + }, +} - ls := cidlink.DefaultLinkSystem() - storage := cidlink.Memory{} - ls.StorageReadOpener = storage.OpenRead - ls.StorageWriteOpener = storage.OpenWrite +func TestBuildUnixFSFile_Reference(t *testing.T) { + for _, tc := range referenceTestCases { + t.Run(strconv.Itoa(tc.size), func(t *testing.T) { + buf := make([]byte, tc.size) + ipfsutil.NewSeededRand(0xdeadbeef).Read(buf) + r := bytes.NewReader(buf) - f, _, err := BuildUnixFSFile(r, "", &ls) - if err != nil { - t.Fatal(err) - } + ls := cidlink.DefaultLinkSystem() + storage := cidlink.Memory{} + ls.StorageReadOpener = storage.OpenRead + ls.StorageWriteOpener = storage.OpenWrite - // Note: this differs from the previous - // go-unixfs version of this test (https://github.com/ipfs/go-unixfs/blob/master/importer/importer_test.go#L50) - // because this library enforces CidV1 encoding. - expected, err := cid.Decode("bafybeieyxejezqto5xwcxtvh5tskowwxrn3hmbk3hcgredji3g7abtnfkq") - if err != nil { - t.Fatal(err) - } - if !expected.Equals(f.(cidlink.Link).Cid) { - t.Fatalf("expected CID %s, got CID %s", expected, f) - } - if _, err := storage.OpenRead(ipld.LinkContext{}, f); err != nil { - t.Fatal("expected top of file to be in store.") + f, sz, err := BuildUnixFSFile(r, "", &ls) + require.NoError(t, err) + require.Equal(t, tc.bareExpected.String(), f.(cidlink.Link).Cid.String()) + + // check sz is the stored size of all blocks in the generated DAG + var totStored int + for _, blk := range storage.Bag { + totStored += len(blk) + } + require.Equal(t, totStored, int(sz)) + }) } } func TestUnixFSFileRoundtrip(t *testing.T) { buf := make([]byte, 10*1024*1024) - u.NewSeededRand(0xdeadbeef).Read(buf) + ipfsutil.NewSeededRand(0xdeadbeef).Read(buf) r := bytes.NewReader(buf) ls := cidlink.DefaultLinkSystem() diff --git a/test/partial_file_access_test.go b/test/partial_file_access_test.go index fffe74f..5c72b6a 100644 --- a/test/partial_file_access_test.go +++ b/test/partial_file_access_test.go @@ -6,7 +6,7 @@ import ( "io" "testing" - u "github.com/ipfs/go-ipfs-util" + ipfsutil "github.com/ipfs/go-ipfs-util" "github.com/ipfs/go-unixfsnode/data/builder" "github.com/ipfs/go-unixfsnode/file" dagpb "github.com/ipld/go-codec-dagpb" @@ -21,7 +21,7 @@ import ( func TestPartialFileAccess(t *testing.T) { buf := make([]byte, 10*1024*1024) - u.NewSeededRand(0xdeadbeef).Read(buf) + ipfsutil.NewSeededRand(0xdeadbeef).Read(buf) r := bytes.NewReader(buf) ls := cidlink.DefaultLinkSystem()