Skip to content

Commit

Permalink
Merge pull request #38 from ipfs/feat/preload
Browse files Browse the repository at this point in the history
add an ADL to preload hamt loading
  • Loading branch information
willscott authored Oct 15, 2022
2 parents 4d36ba6 + f9e443f commit 475ed65
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 2 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ This is an IPLD ADL that provides string based pathing for protobuf nodes. The t

Note that while it works internally with go-codec-dagpb, the Reify method (used to get a UnixFSNode from a DagPB node should actually work successfully with go-ipld-prime-proto nodes)

## Usage

The primary interaction with this package is to register an ADL on a link system. This is done with via a helper method.

```go
AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem)
```

For link systems which have UnixFS reification registered, two ADLs will be available to the [`InterpretAs`](https://ipld.io/specs/selectors/) selector: 'unixfs' and 'unixfs-preload'. The different between these two ADLs is that the preload variant will access all blocks within a UnixFS Object (file or directory) when that object is accessed by a selector traversal. The non-preload variant in contrast will only access the subset of blocks strictly needed for the traversal. In practice, this means the subset of a sharded directory needed to access a specific file, or the sub-range of a file directly accessed by a range selector.


## License

Apache-2.0/MIT © Protocol Labs
19 changes: 19 additions & 0 deletions file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,25 @@ func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSyst
}, nil
}

// NewUnixFSFileWithPreload is the same as NewUnixFSFile but it performs a full load of constituent
// blocks where the file spans multiple blocks. This is useful where a system needs to watch the
// LinkSystem for block loads to determine which blocks make up this file.
// NewUnixFSFileWithPreload is used by the "unixfs-preload" reifier.
func NewUnixFSFileWithPreload(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) {
f, err := NewUnixFSFile(ctx, substrate, lsys)
if err != nil {
return nil, err
}
r, err := f.AsLargeBytes()
if err != nil {
return nil, err
}
if _, err := io.Copy(io.Discard, r); err != nil {
return nil, err
}
return f, nil
}

// A LargeBytesNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type.
type LargeBytesNode interface {
ipld.Node
Expand Down
16 changes: 16 additions & 0 deletions hamt/shardeddir.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ func NewUnixFSHAMTShard(ctx context.Context, substrate dagpb.PBNode, data data.U
}, nil
}

// NewUnixFSHAMTShardWithPreload attempts to construct a UnixFSHAMTShard node from the base protobuf node plus
// a decoded UnixFSData structure, and then iterate through and load the full set of hamt shards.
func NewUnixFSHAMTShardWithPreload(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) {
n, err := NewUnixFSHAMTShard(ctx, substrate, data, lsys)
if err != nil {
return n, err
}

traverse := n.Length()
if traverse == -1 {
return n, fmt.Errorf("could not fully explore hamt during preload")
}

return n, nil
}

func (n UnixFSHAMTShard) Substrate() ipld.Node {
return n._substrate
}
Expand Down
30 changes: 28 additions & 2 deletions reification.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ import (
// Reify looks at an ipld Node and tries to interpret it as a UnixFSNode
// if successful, it returns the UnixFSNode
func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) {
return doReify(lnkCtx, maybePBNodeRoot, lsys, true)
}

// nonLazyReify works like reify but will load all of a directory or file as it reaches them.
func nonLazyReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) {
return doReify(lnkCtx, maybePBNodeRoot, lsys, false)
}

func doReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem, lazy bool) (ipld.Node, error) {
pbNode, ok := maybePBNodeRoot.(dagpb.PBNode)
if !ok {
return maybePBNodeRoot, nil
Expand All @@ -28,7 +37,12 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy
// we could not decode the UnixFS data, therefore, not UnixFS
return defaultReifier(lnkCtx.Ctx, pbNode, lsys)
}
builder, ok := reifyFuncs[data.FieldDataType().Int()]
var builder reifyTypeFunc
if lazy {
builder, ok = lazyReifyFuncs[data.FieldDataType().Int()]
} else {
builder, ok = reifyFuncs[data.FieldDataType().Int()]
}
if !ok {
return nil, fmt.Errorf("no reification for this UnixFS node type")
}
Expand All @@ -38,6 +52,14 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy
type reifyTypeFunc func(context.Context, dagpb.PBNode, data.UnixFSData, *ipld.LinkSystem) (ipld.Node, error)

var reifyFuncs = map[int64]reifyTypeFunc{
data.Data_File: unixFSFileReifierWithPreload,
data.Data_Metadata: defaultUnixFSReifier,
data.Data_Raw: unixFSFileReifier,
data.Data_Symlink: defaultUnixFSReifier,
data.Data_Directory: directory.NewUnixFSBasicDir,
data.Data_HAMTShard: hamt.NewUnixFSHAMTShardWithPreload,
}
var lazyReifyFuncs = map[int64]reifyTypeFunc{
data.Data_File: unixFSFileReifier,
data.Data_Metadata: defaultUnixFSReifier,
data.Data_Raw: unixFSFileReifier,
Expand All @@ -47,7 +69,7 @@ var reifyFuncs = map[int64]reifyTypeFunc{
}

// treat non-unixFS nodes like directories -- allow them to lookup by link
// TODO: Make this a separate node as directors gain more functionality
// TODO: Make this a separate node as directories gain more functionality
func defaultReifier(_ context.Context, substrate dagpb.PBNode, _ *ipld.LinkSystem) (ipld.Node, error) {
return &_PathedPBNode{_substrate: substrate}, nil
}
Expand All @@ -56,6 +78,10 @@ func unixFSFileReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixF
return file.NewUnixFSFile(ctx, substrate, ls)
}

func unixFSFileReifierWithPreload(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) {
return file.NewUnixFSFileWithPreload(ctx, substrate, ls)
}

func defaultUnixFSReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) {
return defaultReifier(ctx, substrate, ls)
}
Expand Down
1 change: 1 addition & 0 deletions signaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
lsys.KnownReifiers = make(map[string]linking.NodeReifier)
}
lsys.KnownReifiers["unixfs"] = Reify
lsys.KnownReifiers["unixfs-preload"] = nonLazyReify
}

// UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory
Expand Down

0 comments on commit 475ed65

Please sign in to comment.