diff --git a/README.md b/README.md index 2146b6c..f55a1b5 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,17 @@ This is an IPLD ADL that provides string based pathing for protobuf nodes. The t Note that while it works internally with go-codec-dagpb, the Reify method (used to get a UnixFSNode from a DagPB node should actually work successfully with go-ipld-prime-proto nodes) +## Usage + +The primary interaction with this package is to register an ADL on a link system. This is done with via a helper method. + +```go +AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) +``` + +For link systems which have UnixFS reification registered, two ADLs will be available to the [`InterpretAs`](https://ipld.io/specs/selectors/) selector: 'unixfs' and 'unixfs-preload'. The different between these two ADLs is that the preload variant will access all blocks within a UnixFS Object (file or directory) when that object is accessed by a selector traversal. The non-preload variant in contrast will only access the subset of blocks strictly needed for the traversal. In practice, this means the subset of a sharded directory needed to access a specific file, or the sub-range of a file directly accessed by a range selector. + + ## License Apache-2.0/MIT © Protocol Labs diff --git a/file/file.go b/file/file.go index 17a004a..d971033 100644 --- a/file/file.go +++ b/file/file.go @@ -33,6 +33,25 @@ func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSyst }, nil } +// NewUnixFSFileWithPreload is the same as NewUnixFSFile but it performs a full load of constituent +// blocks where the file spans multiple blocks. This is useful where a system needs to watch the +// LinkSystem for block loads to determine which blocks make up this file. +// NewUnixFSFileWithPreload is used by the "unixfs-preload" reifier. +func NewUnixFSFileWithPreload(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) { + f, err := NewUnixFSFile(ctx, substrate, lsys) + if err != nil { + return nil, err + } + r, err := f.AsLargeBytes() + if err != nil { + return nil, err + } + if _, err := io.Copy(io.Discard, r); err != nil { + return nil, err + } + return f, nil +} + // A LargeBytesNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type. type LargeBytesNode interface { ipld.Node diff --git a/hamt/shardeddir.go b/hamt/shardeddir.go index ebc59c5..97a833f 100644 --- a/hamt/shardeddir.go +++ b/hamt/shardeddir.go @@ -54,6 +54,22 @@ func NewUnixFSHAMTShard(ctx context.Context, substrate dagpb.PBNode, data data.U }, nil } +// NewUnixFSHAMTShardWithPreload attempts to construct a UnixFSHAMTShard node from the base protobuf node plus +// a decoded UnixFSData structure, and then iterate through and load the full set of hamt shards. +func NewUnixFSHAMTShardWithPreload(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) { + n, err := NewUnixFSHAMTShard(ctx, substrate, data, lsys) + if err != nil { + return n, err + } + + traverse := n.Length() + if traverse == -1 { + return n, fmt.Errorf("could not fully explore hamt during preload") + } + + return n, nil +} + func (n UnixFSHAMTShard) Substrate() ipld.Node { return n._substrate } diff --git a/reification.go b/reification.go index fc79291..dd293c1 100644 --- a/reification.go +++ b/reification.go @@ -15,6 +15,15 @@ import ( // Reify looks at an ipld Node and tries to interpret it as a UnixFSNode // if successful, it returns the UnixFSNode func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) { + return doReify(lnkCtx, maybePBNodeRoot, lsys, true) +} + +// nonLazyReify works like reify but will load all of a directory or file as it reaches them. +func nonLazyReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) { + return doReify(lnkCtx, maybePBNodeRoot, lsys, false) +} + +func doReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem, lazy bool) (ipld.Node, error) { pbNode, ok := maybePBNodeRoot.(dagpb.PBNode) if !ok { return maybePBNodeRoot, nil @@ -28,7 +37,12 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy // we could not decode the UnixFS data, therefore, not UnixFS return defaultReifier(lnkCtx.Ctx, pbNode, lsys) } - builder, ok := reifyFuncs[data.FieldDataType().Int()] + var builder reifyTypeFunc + if lazy { + builder, ok = lazyReifyFuncs[data.FieldDataType().Int()] + } else { + builder, ok = reifyFuncs[data.FieldDataType().Int()] + } if !ok { return nil, fmt.Errorf("no reification for this UnixFS node type") } @@ -38,6 +52,14 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy type reifyTypeFunc func(context.Context, dagpb.PBNode, data.UnixFSData, *ipld.LinkSystem) (ipld.Node, error) var reifyFuncs = map[int64]reifyTypeFunc{ + data.Data_File: unixFSFileReifierWithPreload, + data.Data_Metadata: defaultUnixFSReifier, + data.Data_Raw: unixFSFileReifier, + data.Data_Symlink: defaultUnixFSReifier, + data.Data_Directory: directory.NewUnixFSBasicDir, + data.Data_HAMTShard: hamt.NewUnixFSHAMTShardWithPreload, +} +var lazyReifyFuncs = map[int64]reifyTypeFunc{ data.Data_File: unixFSFileReifier, data.Data_Metadata: defaultUnixFSReifier, data.Data_Raw: unixFSFileReifier, @@ -47,7 +69,7 @@ var reifyFuncs = map[int64]reifyTypeFunc{ } // treat non-unixFS nodes like directories -- allow them to lookup by link -// TODO: Make this a separate node as directors gain more functionality +// TODO: Make this a separate node as directories gain more functionality func defaultReifier(_ context.Context, substrate dagpb.PBNode, _ *ipld.LinkSystem) (ipld.Node, error) { return &_PathedPBNode{_substrate: substrate}, nil } @@ -56,6 +78,10 @@ func unixFSFileReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixF return file.NewUnixFSFile(ctx, substrate, ls) } +func unixFSFileReifierWithPreload(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) { + return file.NewUnixFSFileWithPreload(ctx, substrate, ls) +} + func defaultUnixFSReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) { return defaultReifier(ctx, substrate, ls) } diff --git a/signaling.go b/signaling.go index 56eb52d..056d2ab 100644 --- a/signaling.go +++ b/signaling.go @@ -15,6 +15,7 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) { lsys.KnownReifiers = make(map[string]linking.NodeReifier) } lsys.KnownReifiers["unixfs"] = Reify + lsys.KnownReifiers["unixfs-preload"] = nonLazyReify } // UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory