diff --git a/.ipld b/.ipld index caedc8d7..9582ec21 160000 --- a/.ipld +++ b/.ipld @@ -1 +1 @@ -Subproject commit caedc8d768e027f4722c3ac3d4f743d5ec43b5d7 +Subproject commit 9582ec2122ab82f6bdf3a5a42c4f0e27c03f0a9f diff --git a/datamodel/path.go b/datamodel/path.go index 9d85167f..b336b6fa 100644 --- a/datamodel/path.go +++ b/datamodel/path.go @@ -213,6 +213,14 @@ func (p Path) Last() PathSegment { return p.segments[len(p.segments)-1] } +// Pop returns a path with all segments except the last. +func (p Path) Pop() Path { + if len(p.segments) < 1 { + return Path{} + } + return Path{p.segments[0 : len(p.segments)-1]} +} + // Shift returns the first segment of the path together with the remaining path after that first segment. // If applied to a zero-length path, it returns an empty segment and the same zero-length path. func (p Path) Shift() (PathSegment, Path) { diff --git a/traversal/focus.go b/traversal/focus.go index 8814488a..bca92eeb 100644 --- a/traversal/focus.go +++ b/traversal/focus.go @@ -171,6 +171,8 @@ func (prog *Progress) get(n datamodel.Node, p datamodel.Path, trackProgress bool // a copy-on-write fashion -- and the FocusedTransform function as a whole will // return a new Node containing identical children except for those replaced. // +// Returning nil from the TransformFn as the replacement node means "remove this". +// // FocusedTransform can be used again inside the applied function! // This kind of composition can be useful for doing batches of updates. // E.g. if have a large Node graph which contains a 100-element list, and @@ -208,6 +210,9 @@ func (prog Progress) FocusedTransform(n datamodel.Node, p datamodel.Path, fn Tra // // As implemented, this is not actually efficient if the update will be a no-op; it won't notice until it gets there. func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembler, p datamodel.Path, fn TransformFn, createParents bool) error { + at := prog.Path + // Base case: if we've reached the end of the path, do the replacement here. + // (Note: in some cases within maps, there is another branch that is the base case, for reasons involving removes.) if p.Len() == 0 { n2, err := fn(prog, n) if err != nil { @@ -231,7 +236,7 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl if err != nil { return err } - prog.Path = prog.Path.AppendSegment(seg) + prog.Path = at.AppendSegment(seg) if err := ma.AssembleKey().AssignString(seg.String()); err != nil { return err } @@ -252,6 +257,25 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl if err != nil { return err } + // If we're approaching the end of the path, call the TransformFunc. + // We need to know if it returns nil (meaning: do a deletion) _before_ we do the AssembleKey step. + // (This results in the entire map branch having a different base case.) + var end bool + var n2 datamodel.Node + if p2.Len() == 0 { + end = true + n3, err := n.LookupBySegment(seg) + if n3 != datamodel.Absent && err != nil { // TODO badly need to simplify the standard treatment of "not found" here. Can't even fit it all in one line! See https://github.com/ipld/go-ipld-prime/issues/360. + if _, ok := err.(datamodel.ErrNotExists); !ok { + return err + } + } + prog.Path = at.AppendSegment(seg) + n2, err = fn(prog, n3) + if err != nil { + return err + } + } // Copy children over. Replace the target (preserving its current position!) while doing this, if found. // Note that we don't recurse into copying children (assuming AssignNode doesn't); this is as shallow/COW as the AssignNode implementation permits. var replaced bool @@ -260,16 +284,32 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl if err != nil { return err } - if err := ma.AssembleKey().AssignNode(k); err != nil { - return err - } - if asPathSegment(k).Equals(seg) { - prog.Path = prog.Path.AppendSegment(seg) - if err := prog.focusedTransform(v, ma.AssembleValue(), p2, fn, createParents); err != nil { + if asPathSegment(k).Equals(seg) { // for the segment that's either update, update within, or being removed: + if end { // the last path segment in the overall instruction gets a different case because it may need to handle deletion + if n2 == nil { + replaced = true + continue // replace with nil means delete, which means continue early here: don't even copy the key. + } + } + // as long as we're not deleting, then this key will exist in the new data. + if err := ma.AssembleKey().AssignNode(k); err != nil { return err } replaced = true - } else { + if n2 != nil { // if we already produced the replacement because we're at the end... + if err := ma.AssembleValue().AssignNode(n2); err != nil { + return err + } + } else { // ... otherwise, recurse: + prog.Path = at.AppendSegment(seg) + if err := prog.focusedTransform(v, ma.AssembleValue(), p2, fn, createParents); err != nil { + return err + } + } + } else { // for any other siblings of the target: just copy. + if err := ma.AssembleKey().AssignNode(k); err != nil { + return err + } if err := ma.AssembleValue().AssignNode(v); err != nil { return err } @@ -281,7 +321,7 @@ func (prog Progress) focusedTransform(n datamodel.Node, na datamodel.NodeAssembl // If we didn't find the target yet: append it. // If we're at the end, always do this; // if we're in the middle, only do this if createParents mode is enabled. - prog.Path = prog.Path.AppendSegment(seg) + prog.Path = at.AppendSegment(seg) if p.Len() > 1 && !createParents { return fmt.Errorf("transform: parent position at %q did not exist (and createParents was false)", prog.Path) } diff --git a/traversal/patch/eval.go b/traversal/patch/eval.go new file mode 100644 index 00000000..8c7d20d8 --- /dev/null +++ b/traversal/patch/eval.go @@ -0,0 +1,145 @@ +// Package patch provides an implementation of the IPLD Patch specification. +// IPLD Patch is a system for declaratively specifying patches to a document, +// which can then be applied to produce a new, modified document. +// +// +// This package is EXPERIMENTAL; its behavior and API might change as it's still +// in development. +package patch + +import ( + "fmt" + + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/traversal" +) + +type Op string + +const ( + Op_Add = "add" + Op_Remove = "remove" + Op_Replace = "replace" + Op_Move = "move" + Op_Copy = "copy" + Op_Test = "test" +) + +type Operation struct { + Op Op // Always required. + Path datamodel.Path // Always required. + Value datamodel.Node // Present on 'add', 'replace', 'test'. + From datamodel.Path // Present on 'move', 'copy'. +} + +func Eval(n datamodel.Node, ops []Operation) (datamodel.Node, error) { + var err error + for _, op := range ops { + n, err = EvalOne(n, op) + if err != nil { + return nil, err + } + } + return n, nil +} + +func EvalOne(n datamodel.Node, op Operation) (datamodel.Node, error) { + switch op.Op { + case Op_Add: + // The behavior of the 'add' op in jsonpatch varies based on if the parent of the target path is a list. + // If the parent of the target path is a list, then 'add' is really more of an 'insert': it should slide the rest of the values down. + // There's also a special case for "-", which means "append to the end of the list". + // Otherwise, if the destination path exists, it's an error. (No upserting.) + // Handling this requires looking at the parent of the destination node, so we split this into *two* traversal.FocusedTransform calls. + return traversal.FocusedTransform(n, op.Path.Pop(), func(prog traversal.Progress, parent datamodel.Node) (datamodel.Node, error) { + if parent.Kind() == datamodel.Kind_List { + seg := op.Path.Last() + var idx int64 + if seg.String() == "-" { + idx = -1 + } + var err error + idx, err = seg.Index() + if err != nil { + return nil, fmt.Errorf("patch-invalid-path-through-list: at %q", op.Path) // TODO error structuralization and review the code + } + + nb := parent.Prototype().NewBuilder() + la, err := nb.BeginList(parent.Length() + 1) + if err != nil { + return nil, err + } + for itr := n.ListIterator(); !itr.Done(); { + i, v, err := itr.Next() + if err != nil { + return nil, err + } + if idx == i { + la.AssembleValue().AssignNode(op.Value) + } + if err := la.AssembleValue().AssignNode(v); err != nil { + return nil, err + } + } + // TODO: is one-past-the-end supposed to be supported or supposed to be ruled out? + if idx == -1 { + la.AssembleValue().AssignNode(op.Value) + } + if err := la.Finish(); err != nil { + return nil, err + } + return nb.Build(), nil + } + return prog.FocusedTransform(parent, datamodel.NewPath([]datamodel.PathSegment{op.Path.Last()}), func(prog traversal.Progress, point datamodel.Node) (datamodel.Node, error) { + if point != nil && !point.IsAbsent() { + return nil, fmt.Errorf("patch-target-exists: at %q", op.Path) // TODO error structuralization and review the code + } + return op.Value, nil + }, false) + }, false) + case "remove": + return traversal.FocusedTransform(n, op.Path, func(_ traversal.Progress, point datamodel.Node) (datamodel.Node, error) { + return nil, nil // Returning a nil value here means "remove what's here". + }, false) + case "replace": + // TODO i think you need a check that it's not landing under itself here + return traversal.FocusedTransform(n, op.Path, func(_ traversal.Progress, point datamodel.Node) (datamodel.Node, error) { + return op.Value, nil // is this right? what does FocusedTransform do re upsert? + }, false) + case "move": + // TODO i think you need a check that it's not landing under itself here + source, err := traversal.Get(n, op.From) + if err != nil { + return nil, err + } + n, err := traversal.FocusedTransform(n, op.Path, func(_ traversal.Progress, point datamodel.Node) (datamodel.Node, error) { + return source, nil // is this right? what does FocusedTransform do re upsert? + }, false) + if err != nil { + return nil, err + } + return traversal.FocusedTransform(n, op.From, func(_ traversal.Progress, point datamodel.Node) (datamodel.Node, error) { + return nil, nil // Returning a nil value here means "remove what's here". + }, false) + case "copy": + // TODO i think you need a check that it's not landing under itself here + source, err := traversal.Get(n, op.From) + if err != nil { + return nil, err + } + return traversal.FocusedTransform(n, op.Path, func(_ traversal.Progress, point datamodel.Node) (datamodel.Node, error) { + return source, nil // is this right? what does FocusedTransform do re upsert? + }, false) + case "test": + point, err := traversal.Get(n, op.Path) + if err != nil { + return nil, err + } + if datamodel.DeepEqual(point, op.Value) { + return n, nil + } + return n, fmt.Errorf("test failed") // TODO real error handling and a code + default: + return nil, fmt.Errorf("misuse: invalid operation: %s", op.Op) // TODO real error handling and a code + } +} diff --git a/traversal/patch/parse.go b/traversal/patch/parse.go new file mode 100644 index 00000000..49eadf05 --- /dev/null +++ b/traversal/patch/parse.go @@ -0,0 +1,63 @@ +package patch + +import ( + _ "embed" + + "bytes" + "io" + + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" + "github.com/ipld/go-ipld-prime/node/bindnode" + "github.com/ipld/go-ipld-prime/schema" + + "github.com/ipld/go-ipld-prime/codec/json" + "github.com/ipld/go-ipld-prime/datamodel" +) + +//go:embed patch.ipldsch +var embedSchema []byte + +var ts = func() *schema.TypeSystem { + ts, err := ipld.LoadSchemaBytes(embedSchema) + if err != nil { + panic(err) + } + return ts +}() + +func ParseBytes(b []byte, dec codec.Decoder) ([]Operation, error) { + return Parse(bytes.NewReader(b), dec) +} + +func Parse(r io.Reader, dec codec.Decoder) ([]Operation, error) { + npt := bindnode.Prototype((*[]operationRaw)(nil), ts.TypeByName("OperationSequence")) + nb := npt.Representation().NewBuilder() + if err := json.Decode(nb, r); err != nil { + return nil, err + } + opsRaw := bindnode.Unwrap(nb.Build()).(*[]operationRaw) + var ops []Operation + for _, opRaw := range *opsRaw { + // TODO check the Op string + op := Operation{ + Op: Op(opRaw.Op), + Path: datamodel.ParsePath(opRaw.Path), + Value: opRaw.Value, + } + if opRaw.From != nil { + op.From = datamodel.ParsePath(*opRaw.From) + } + ops = append(ops, op) + } + return ops, nil +} + +// operationRaw is roughly the same structure as Operation, but more amenable to serialization +// (it doesn't use high level library types that don't have a data model equivalent). +type operationRaw struct { + Op string + Path string + Value datamodel.Node + From *string +} diff --git a/traversal/patch/patch.ipldsch b/traversal/patch/patch.ipldsch new file mode 100644 index 00000000..037434fb --- /dev/null +++ b/traversal/patch/patch.ipldsch @@ -0,0 +1,39 @@ +# Op represents the kind of operation to perfrom +# The current set is based on the JSON Patch specification +# We may end up adding more operations in the future +type Op enum { + | add + | remove + | replace + | move + | copy + | test +} + +# Operation and OperationSequence are the types that describe operations (but not what to apply them on). +# See the Instruction type for describing both operations and what to apply them on. +type Operation struct { + op Op + path String + value optional Any + from optional String +} + +type OperationSequence [Operation] + +type Instruction struct { + startAt Link + operations OperationSequence + # future: optional field for adl signalling and/or other lenses +} + +type InstructionResult union { + | Error "error" + | Link "result" +} representation keyed + +type Error struct { + code String # enum forthcoming + message String + details {String:String} +} \ No newline at end of file diff --git a/traversal/patch/patch_test.go b/traversal/patch/patch_test.go new file mode 100644 index 00000000..ea74cadb --- /dev/null +++ b/traversal/patch/patch_test.go @@ -0,0 +1,91 @@ +package patch + +import ( + "bytes" + "encoding/json" + "os" + "strings" + "testing" + + qt "github.com/frankban/quicktest" + "github.com/warpfork/go-testmark" + + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec" + "github.com/ipld/go-ipld-prime/codec/dagjson" +) + +func TestSpecFixtures(t *testing.T) { + dir := "../../.ipld/specs/patch/fixtures/" + testOneSpecFixtureFile(t, dir+"fixtures-1.md") +} + +func testOneSpecFixtureFile(t *testing.T, filename string) { + doc, err := testmark.ReadFile(filename) + if os.IsNotExist(err) { + t.Skipf("not running spec suite: %s (did you clone the submodule with the data?)", err) + } + if err != nil { + t.Fatalf("spec file parse failed?!: %s", err) + } + + // Data hunk in this spec file are in "directories" of a test scenario each. + doc.BuildDirIndex() + + for _, dir := range doc.DirEnt.ChildrenList { + t.Run(dir.Name, func(t *testing.T) { + // Grab all the data hunks. + // Each "directory" contains three piece of data: + // - `initial` -- this is the "block". It's arbitrary example data. They're all in json (or dag-json) format, for simplicity. + // - `patch` -- this is a list of patch ops. Again, as json. + // - `result` -- this is the expected result object. Again, as json. + initialBlob := dir.Children["initial"].Hunk.Body + patchBlob := dir.Children["patch"].Hunk.Body + resultBlob := dir.Children["result"].Hunk.Body + + // Parse everything. + initial, err := ipld.Decode(initialBlob, dagjson.Decode) + if err != nil { + t.Fatalf("failed to parse fixture data: %s", err) + } + ops, err := ParseBytes(patchBlob, dagjson.Decode) + if err != nil { + t.Fatalf("failed to parse fixture patch: %s", err) + } + // We don't actually keep the decoded result object. We're just gonna serialize the result and textually diff that instead. + _, err = ipld.Decode(resultBlob, dagjson.Decode) + if err != nil { + t.Fatalf("failed to parse fixture data: %s", err) + } + + // Do the thing! + actualResult, err := Eval(initial, ops) + if strings.HasSuffix(dir.Name, "-fail") { + if err == nil { + t.Fatalf("patch was expected to fail") + } else { + return + } + } else { + if err != nil { + t.Fatalf("patch did not apply: %s", err) + } + } + + // Serialize (and pretty print) result, so that we can diff it. + actualResultBlob, err := ipld.Encode(actualResult, dagjson.EncodeOptions{ + EncodeLinks: true, + EncodeBytes: true, + MapSortMode: codec.MapSortMode_None, + }.Encode) + if err != nil { + t.Errorf("failed to reserialize result: %s", err) + } + var actualResultBlobPretty bytes.Buffer + json.Indent(&actualResultBlobPretty, actualResultBlob, "", "\t") + + // Diff! + qt.Assert(t, actualResultBlobPretty.String()+"\n", qt.Equals, string(resultBlob)) + }) + } +}