Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add UnixFSPathSelectorBuilder #45

Merged
merged 1 commit into from
Mar 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 92 additions & 13 deletions signaling.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,45 @@
package unixfsnode

import (
"strings"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/linking"
"github.com/ipld/go-ipld-prime/node/basicnode"
"github.com/ipld/go-ipld-prime/traversal/selector"
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
)

// ExploreAllRecursivelySelector is a selector that will explore all nodes. It
// is the same selector as selectorparse.CommonSelector_ExploreAllRecursively
// but it is precompiled for use with UnixFSPathSelectorBuilder().
var ExploreAllRecursivelySelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec {
return ssb.ExploreRecursive(
selector.RecursionLimitNone(),
ssb.ExploreAll(ssb.ExploreRecursiveEdge()),
)
})

// MatchUnixFSPreloadSelector is a selector that will match a single node,
// similar to selectorparse.CommonSelector_MatchPoint, but uses the
// "unixfs-preload" ADL to load sharded files and directories as a single node.
// Can be used to shallow load an entire UnixFS directory listing, sharded or
// not, but not its contents.
// MatchUnixfsPreloadSelector is precompiled for use with
// UnixFSPathSelectorBuilder().
var MatchUnixFSPreloadSelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec {
return ssb.ExploreInterpretAs("unixfs-preload", ssb.Matcher())
})

// MatchUnixFSSelector is a selector that will match a single node, similar to
// selectorparse.CommonSelector_MatchPoint, but uses the "unixfs" ADL to load
// as UnixFS data. Unlike MatchUnixFSPreloadSelector, this selector will not
// preload all blocks in sharded directories or files. Use
// MatchUnixFSPreloadSelector where the blocks that constitute the full UnixFS
// resource being selected are important to load.
var MatchUnixFSSelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec {
return ssb.ExploreInterpretAs("unixfs", ssb.Matcher())
})

func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
if lsys.KnownReifiers == nil {
lsys.KnownReifiers = make(map[string]linking.NodeReifier)
Expand All @@ -18,18 +48,67 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
lsys.KnownReifiers["unixfs-preload"] = nonLazyReify
}

// UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory
// if reification is setup on a link system
// UnixFSPathSelector creates a selector for IPLD path to a UnixFS resource if
// UnixFS reification is setup on a LinkSystem being used for traversal.
//
// Use UnixFSPathSelectorBuilder for more control over the selector, this
// function is the same as calling
//
// UnixFSPathSelectorBuilder(path, MatchUnixFSSelector, false)
func UnixFSPathSelector(path string) datamodel.Node {
segments := strings.Split(path, "/")
return UnixFSPathSelectorBuilder(path, MatchUnixFSSelector, false)
}

// UnixFSPathSelectorBuilder creates a selector for IPLD path to a UnixFS
// resource if UnixFS reification is setup on a LinkSystem being used for
// traversal.
//
// The path is interpreted according to
// github.com/ipld/go-ipld-prime/datamodel/Path rules,
// i.e.
// - leading and trailing slashes are ignored
// - redundant slashes are ignored
// - the segment `..` is a field named `..`, same with `.`
//
// targetSelector is the selector to apply to the final node in the path.
// Use ExploreAllRecursivelySelector to explore (i.e. load the blocks) all of
// the content from the terminus of the path. Use MatchUnixFSPreloadSelector to
// match the terminus of the path, but preload all blocks in sharded files and
// directories. Use MatchUnixFSSelector to match the terminus of the path, but
// not preload any blocks if the terminus is sharded. Or any other custom
// SelectorSpec can be supplied.
//
// If matchPath is false, the selector will explore, not match, so it's useful
// for traversals where block loads are important, not where the matcher visitor
// callback is important. if matchPath is true, the selector will match the
// nodes along the path while exploring them.
func UnixFSPathSelectorBuilder(path string, targetSelector builder.SelectorSpec, matchPath bool) ipld.Node {
segments := ipld.ParsePath(path)

ss := targetSelector
ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)
selectorSoFar := ssb.ExploreInterpretAs("unixfs", ssb.Matcher())
for i := len(segments) - 1; i >= 0; i-- {
selectorSoFar = ssb.ExploreInterpretAs("unixfs",
ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert(segments[i], selectorSoFar)
}),
)

for segments.Len() > 0 {
// Wrap selector in ExploreFields as we walk back up through the path.
// We can assume each segment to be a unixfs path section, so we
// InterpretAs to make sure the node is reified through go-unixfsnode
// (if possible) and we can traverse through according to unixfs pathing
// rather than bare IPLD pathing - which also gives us the ability to
// traverse through HAMT shards.
ss = ssb.ExploreInterpretAs("unixfs", ssb.ExploreFields(
func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert(segments.Last().String(), ss)
},
))
if matchPath {
ss = ssb.ExploreUnion(ssb.Matcher(), ss)
}
segments = segments.Pop()
}
return selectorSoFar.Node()

return ss.Node()
}

func specBuilder(b func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec) builder.SelectorSpec {
return b(builder.NewSelectorSpecBuilder(basicnode.Prototype.Any))
}
203 changes: 203 additions & 0 deletions signalling_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package unixfsnode_test

import (
"fmt"
"strings"
"testing"

"github.com/ipfs/go-unixfsnode"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec/dagjson"
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
"github.com/stretchr/testify/require"
)

// Selectors are tested against JSON expected forms; this doesn't necessarily
// validate that they work as advertised. It's just a sanity check that the
// selectors are being built as expected.

var exploreAllJson = mustDagJson(selectorparse.CommonSelector_ExploreAllRecursively)

// explore interpret-as (~), next (>), match (.), interpreted as unixfs-preload
var matchUnixfsPreloadJson = `{"~":{">":{".":{}},"as":"unixfs-preload"}}`

// match interpret-as (~), next (>), match (.), interpreted as unixfs
var matchUnixfsJson = `{"~":{">":{".":{}},"as":"unixfs"}}`

func TestUnixFSPathSelector(t *testing.T) {
testCases := []struct {
name string
path string
expextedSelector string
}{
{
name: "empty path",
path: "",
expextedSelector: matchUnixfsJson,
},
{
name: "single field",
path: "/foo",
expextedSelector: jsonFields(matchUnixfsJson, "foo"),
},
{
name: "multiple fields",
path: "/foo/bar",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
{
name: "leading slash optional",
path: "foo/bar",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
{
name: "trailing slash optional",
path: "/foo/bar/",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
{
// a go-ipld-prime specific thing, not clearly specified by path spec (?)
name: ".. is a field named ..",
path: "/foo/../bar/",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "..", "bar"),
},
{
// a go-ipld-prime specific thing, not clearly specified by path spec
name: "redundant slashes ignored",
path: "foo///bar",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
sel := unixfsnode.UnixFSPathSelector(tc.path)
require.Equal(t, tc.expextedSelector, mustDagJson(sel))
})
}
}

func TestUnixFSPathSelectorBuilder(t *testing.T) {
testCases := []struct {
name string
path string
target builder.SelectorSpec
matchPath bool
expextedSelector string
}{
{
name: "empty path",
path: "",
target: unixfsnode.ExploreAllRecursivelySelector,
expextedSelector: exploreAllJson,
},
{
name: "empty path shallow",
path: "",
target: unixfsnode.MatchUnixFSPreloadSelector,
expextedSelector: matchUnixfsPreloadJson,
},
{
name: "single field",
path: "/foo",
expextedSelector: jsonFields(exploreAllJson, "foo"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
name: "single field, match path",
path: "/foo",
expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo"),
target: unixfsnode.ExploreAllRecursivelySelector,
matchPath: true,
},
{
name: "single field shallow",
path: "/foo",
expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo"),
target: unixfsnode.MatchUnixFSPreloadSelector,
},
{
name: "multiple fields",
path: "/foo/bar",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
name: "multiple fields, match path",
path: "/foo/bar",
expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
matchPath: true,
},
{
name: "multiple fields shallow",
path: "/foo/bar",
expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo", "bar"),
target: unixfsnode.MatchUnixFSPreloadSelector,
},
{
name: "leading slash optional",
path: "foo/bar",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
name: "trailing slash optional",
path: "/foo/bar/",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
// a go-ipld-prime specific thing, not clearly specified by path spec (?)
{
name: ".. is a field named ..",
path: "/foo/../bar/",
expextedSelector: jsonFields(exploreAllJson, "foo", "..", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
// a go-ipld-prime specific thing, not clearly specified by path spec
name: "redundant slashes ignored",
path: "foo///bar",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
sel := unixfsnode.UnixFSPathSelectorBuilder(tc.path, tc.target, tc.matchPath)
require.Equal(t, tc.expextedSelector, mustDagJson(sel))
})
}
}

func jsonFields(target string, fields ...string) string {
var sb strings.Builder
for _, n := range fields {
// explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name
sb.WriteString(fmt.Sprintf(`{"~":{">":{"f":{"f>":{"%s":`, n))
}
sb.WriteString(target)
sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}`, len(fields)))
return sb.String()
}

func jsonFieldsMatchPoint(target string, fields ...string) string {
var sb strings.Builder
for _, n := range fields {
// union (|) of match (.) and explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name
sb.WriteString(fmt.Sprintf(`{"|":[{".":{}},{"~":{">":{"f":{"f>":{"%s":`, n))
}
sb.WriteString(target)
sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}]}`, len(fields)))
return sb.String()
}

func mustDagJson(n ipld.Node) string {
byts, err := ipld.Encode(n, dagjson.Encode)
if err != nil {
panic(err)
}
return string(byts)
}