Skip to content

Commit

Permalink
feat(DeepDiff): move diff functions into methods of a config struct
Browse files Browse the repository at this point in the history
we need to add stuff like context and configurable options to diffing, making now a great time to move to a better foundation for diff methods.

this more closely resembles the diffmatchpatch api: github.com/sergi/go-diff/diffmatchpatch

BREAKING CHANGE:
api for accessing diff methods have moved into methods, added context to request methods
  • Loading branch information
b5 committed Nov 26, 2019
1 parent a10dd33 commit 6bf7c9a
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 71 deletions.
99 changes: 62 additions & 37 deletions deepdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package deepdiff

import (
"bytes"
"context"
"encoding/hex"
"hash"
"hash/fnv"
Expand All @@ -11,46 +12,70 @@ import (
"sync"
)

// Diff computes a slice of deltas that define an edit script for turning the
// value at d1 into d2
// currently Diff will never return an error, error returns are reserved for
// future use. specifically: bailing before delta calculation based on a
// configurable threshold
func Diff(d1, d2 interface{}, opts ...DiffOption) ([]*Delta, error) {
cfg := &DiffConfig{}
// Config are any possible configuration parameters for calculating diffs
type Config struct {
// If true Diff will calculate "moves" that describe changing the parent of
// a subtree
MoveDeltas bool
}

// DiffOption is a function that adjust a config, zero or more DiffOptions
// can be passed to the Diff function
type DiffOption func(cfg *Config)

// DeepDiff is a configuration for performing diffs
type DeepDiff struct {
changes bool
moveDeltas bool
}

// NewDeepDiff creates a deepdiff struct
func NewDeepDiff(opts ...DiffOption) *DeepDiff {
cfg := &Config{}
for _, opt := range opts {
opt(cfg)
}

deepdiff := &diff{cfg: cfg, d1: d1, d2: d2}
return deepdiff.diff(), nil
return &DeepDiff{
changes: false,
moveDeltas: cfg.MoveDeltas,
}
}

// DiffConfig are any possible configuration parameters for calculating diffs
type DiffConfig struct {
// If true Diff will calculate "moves" that describe changing the parent of
// a subtree
MoveDeltas bool
// Provide a non-nil stats pointer & diff will populate it with data from
// the diff process
Stats *Stats
// Diff computes a slice of deltas that define an edit script for turning a
// into b.
// currently Diff will never return an error, error returns are reserved for
// future use. specifically: bailing before delta calculation based on a
// configurable threshold
func (dd *DeepDiff) Diff(ctx context.Context, a, b interface{}) ([]*Delta, error) {
deepdiff := &diff{cfg: dd.config(), d1: a, d2: b}
return deepdiff.diff(ctx), nil
}

// DiffOption is a function that adjust a config, zero or more DiffOptions
// can be passed to the Diff function
type DiffOption func(cfg *DiffConfig)
// StatDiff calculates a diff script and diff stats
func (dd *DeepDiff) StatDiff(ctx context.Context, a, b interface{}) ([]*Delta, *Stats, error) {
deepdiff := &diff{cfg: dd.config(), d1: a, d2: b, stats: &Stats{}}
return deepdiff.diff(ctx), deepdiff.stats, nil
}

// Stat calculates the DiffStata between two documents
func (dd *DeepDiff) Stat(ctx context.Context, a, b interface{}) (*Stats, error) {
deepdiff := &diff{cfg: dd.config(), d1: a, d2: b, stats: &Stats{}}
deepdiff.diff(ctx)
return deepdiff.stats, nil
}

// OptionSetStats will set the passed-in stats pointer when Diff is called
func OptionSetStats(st *Stats) DiffOption {
return func(cfg *DiffConfig) {
cfg.Stats = st
func (dd *DeepDiff) config() *Config {
return &Config{
MoveDeltas: dd.moveDeltas,
}
}

// diff is a state machine for calculating an edit script that transitions between
// two state trees
// diff is a state machine for calculating an edit script that transitions
// between two state trees
type diff struct {
cfg *DiffConfig
cfg *Config
stats *Stats
d1, d2 interface{}
t1, t2 node
t1Nodes map[string][]node
Expand Down Expand Up @@ -80,8 +105,8 @@ type diff struct {
// correspond to inserted nodes.
// 6. consider each matching node and decide if the node is at its right
// place, or whether it has been moved.
func (d *diff) diff() []*Delta {
d.t1, d.t2, d.t1Nodes = d.prepTrees()
func (d *diff) diff(ctx context.Context) []*Delta {
d.t1, d.t2, d.t1Nodes = d.prepTrees(ctx)
d.queueMatch(d.t1Nodes, d.t2)
d.optimize(d.t1, d.t2)
// TODO (b5): a second optimize pass seems to help greatly on larger diffs, which
Expand Down Expand Up @@ -419,31 +444,31 @@ func (d *diff) calcDeltas(t1, t2 node) (dts []*Delta) {
return cleaned
}

if d.cfg.Stats != nil {
if d.stats != nil {
for _, delta := range dts {
switch delta.Type {
case DTInsert:
if n := nodeAtPath(t2, delta.Path); n != nil {
if cmp, ok := n.(compound); ok {
d.cfg.Stats.Inserts += cmp.DescendantsCount()
d.stats.Inserts += cmp.DescendantsCount()
}
}
d.cfg.Stats.Inserts++
d.stats.Inserts++
case DTUpdate:
d.cfg.Stats.Updates++
d.stats.Updates++
case DTDelete:
if n := nodeAtPath(t2, delta.Path); n != nil {
if cmp, ok := n.(compound); ok {
d.cfg.Stats.Deletes += cmp.DescendantsCount()
d.stats.Deletes += cmp.DescendantsCount()
}
}
d.cfg.Stats.Deletes++
d.stats.Deletes++
case DTMove:
if n := nodeAtPath(t2, delta.Path); n != nil {
if cmp, ok := n.(compound); ok {
d.cfg.Stats.Moves += cmp.DescendantsCount()
d.stats.Moves += cmp.DescendantsCount()
}
d.cfg.Stats.Moves++
d.stats.Moves++
}
}
}
Expand Down
41 changes: 31 additions & 10 deletions deepdiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package deepdiff

import (
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/google/go-cmp/cmp"
"io/ioutil"
"os"
"reflect"
Expand All @@ -21,6 +23,8 @@ func RunTestCases(t *testing.T, cases []TestCase, opts ...DiffOption) {
var (
src interface{}
dst interface{}
dd = NewDeepDiff(opts...)
ctx = context.Background()
)

for i, c := range cases {
Expand All @@ -31,7 +35,7 @@ func RunTestCases(t *testing.T, cases []TestCase, opts ...DiffOption) {
t.Fatal(err)
}

diff, err := Diff(src, dst, opts...)
diff, err := dd.Diff(ctx, src, dst)
if err != nil {
t.Fatalf("%d, %s Diff error: %s", i, c.description, err)
}
Expand Down Expand Up @@ -181,7 +185,7 @@ func TestMoveDiffs(t *testing.T) {
},
},
}
RunTestCases(t, cases, func(o *DiffConfig) {
RunTestCases(t, cases, func(o *Config) {
o.MoveDeltas = true
})
}
Expand Down Expand Up @@ -274,8 +278,8 @@ func TestDiffDotGraph(t *testing.T) {
panic(err)
}

d := &diff{cfg: &DiffConfig{}, d1: a, d2: b}
d.t1, d.t2, d.t1Nodes = d.prepTrees()
d := &diff{cfg: &Config{}, d1: a, d2: b}
d.t1, d.t2, d.t1Nodes = d.prepTrees(context.Background())
d.queueMatch(d.t1Nodes, d.t2)
d.optimize(d.t1, d.t2)

Expand Down Expand Up @@ -348,7 +352,7 @@ func TestDiffIntData(t *testing.T) {
[]interface{}{int64(10), int64(8), int64(9)},
}

diff, err := Diff(leftData, rightData)
diff, err := NewDeepDiff().Diff(context.Background(), leftData, rightData)
if err != nil {
t.Fatalf("Diff error: %s", err)
}
Expand Down Expand Up @@ -385,8 +389,7 @@ func TestDiffStats(t *testing.T) {
"b": []interface{}{},
}

stat := Stats{}
diff, err := Diff(leftData, rightData, OptionSetStats(&stat))
diff, stat, err := NewDeepDiff().StatDiff(context.Background(), leftData, rightData)
if err != nil {
t.Fatalf("Diff error: %s", err)
}
Expand All @@ -406,6 +409,17 @@ func TestDiffStats(t *testing.T) {
if err := CompareDiffs(expect, diff); err != nil {
t.Errorf("Compare result mismatch: %s", err)
}

expectStat := &Stats{
Left: 11,
Right: 3,
LeftWeight: 107,
RightWeight: 13,
Deletes: 2,
}
if diff := cmp.Diff(expectStat, stat); diff != "" {
t.Errorf("result mismatch. (-want +got):\n%s", diff)
}
}

func BenchmarkDiff1(b *testing.B) {
Expand All @@ -427,6 +441,8 @@ func BenchmarkDiff1(b *testing.B) {

var (
src, dst interface{}
ctx = context.Background()
dd = NewDeepDiff()
)
if err := json.Unmarshal([]byte(srcData), &src); err != nil {
b.Fatal(err)
Expand All @@ -436,16 +452,18 @@ func BenchmarkDiff1(b *testing.B) {
}

for n := 0; n < b.N; n++ {
Diff(src, dst)
dd.Diff(ctx, src, dst)
}
}

func BenchmarkDiffDatasets(b *testing.B) {
var (
diff = NewDeepDiff()
data1 = []byte(`{"body":[["a","b","c","d"],["1","2","3","4"],["e","f","g","h"]],"bodyPath":"/ipfs/QmP2tdkqc4RhSDGv1KSWoJw1pwzNu6HzMcYZaVFkLN9PMc","commit":{"author":{"id":"QmSyDX5LYTiwQi861F5NAwdHrrnd1iRGsoEvCyzQMUyZ4W"},"path":"/ipfs/QmbwJNx88xNknXYewLCVBVJqbZ5oaiffr4WYDoCJAuCZ93","qri":"cm:0","signature":"TUREFCfoKEf5J189c0jdKfleRYsGZm8Q6sm6g6lJctXGDDM8BGdpSVjMltGTmmrtN6qtQJKRail5ceG325Rb8hLYoMe4926gXZNWBlMfD0yBHSjo81LsE25UqVeloU2W19Z1MNOrLTDPDRBoM0g3vyJLykGQ0UPRqpUvXNod0E5ONZOKGrQpByp113h12yiAjsiCBR6sAfIScNpcyjzkiDhBCCbMy9cGfMVK8q7wNCmcC41zguGhvv1biDoE+MEVDc1QPN1dYeEaDsvaRu5jWSv44zhVdC3lZtlT8R9qArk8OQVW798ctQ6NJ5kCiZ3C6Z19VPrptr85oknoNNaYxA==","timestamp":"2019-02-04T14:26:43.158109Z","title":"created dataset"},"name":"test_1","path":"/ipfs/QmeSYBYd3LVsFPRp1jiXgT8q22Md3R7swUzd9yt7MPVUcj/dataset.json","peername":"b5","qri":"ds:0","structure":{"depth":2,"errCount":0,"format":"json","qri":"st:0","schema":{"type":"array"}}}`)
data2 = []byte(`{"body":[["a","b","c","d"],["1","2","3","4"],["e","f","g","h"]],"bodyPath":"/ipfs/QmP2tdkqc4RhSDGv1KSWoJw1pwzNu6HzMcYZaVFkLN9PMc","commit":{"author":{"id":"QmSyDX5LYTiwQi861F5NAwdHrrnd1iRGsoEvCyzQMUyZ4W"},"path":"/ipfs/QmVZrXZ2d6DF11BL7QLJ8AYFYaNiLgAWVEshZ3HB5ogZJS","qri":"cm:0","signature":"CppvSyFkaLNIY3lIOGxq7ybA18ZzJbgrF7XrIgrxi7pwKB3RGjriaCqaqTGNMTkdJCATN/qs/Yq4IIbpHlapIiwfzVHFUO8m0a2+wW0DHI+y1HYsRvhg3+LFIGHtm4M+hqcDZg9EbNk8weZI+Q+FPKk6VjPKpGtO+JHV+nEFovFPjS4XMMoyuJ96KiAEeZISuF4dN2CDSV+WC93sMhdPPAQJJZjZX+3cc/fOaghOkuhedXaA0poTVJQ05aAp94DyljEnysuS7I+jfNrsE/6XhtazZnOSYX7e0r1PJwD7OdoZYRH73HnDk+Q9wg6RrpU7EehF39o4UywyNGAI5yJkxg==","timestamp":"2019-02-11T17:50:20.501283Z","title":"forced update"},"name":"test_1","path":"/ipfs/QmaAuKZezio5knAFXU4krPcZfBWHnHDWWKEX32Ne9v6niQ/dataset.json","peername":"b5","previousPath":"/ipfs/QmeSYBYd3LVsFPRp1jiXgT8q22Md3R7swUzd9yt7MPVUcj","qri":"ds:0","structure":{"depth":2,"errCount":0,"format":"json","qri":"st:0","schema":{"type":"array"}}}`)
t1 interface{}
t2 interface{}
ctx = context.Background()
)
if err := json.Unmarshal(data1, &t1); err != nil {
b.Fatal(err)
Expand All @@ -454,11 +472,14 @@ func BenchmarkDiffDatasets(b *testing.B) {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
Diff(t1, t2)
diff.Diff(ctx, t1, t2)
}
}

func BenchmarkDiff5MB(b *testing.B) {
diff := NewDeepDiff()
ctx := context.Background()

f1, err := os.Open("testdata/airport_codes.json")
if err != nil {
b.Fatal(err)
Expand All @@ -476,6 +497,6 @@ func BenchmarkDiff5MB(b *testing.B) {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
Diff(t1, t2)
diff.Diff(ctx, t1, t2)
}
}
15 changes: 11 additions & 4 deletions examples_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package deepdiff

import (
"context"
"encoding/json"
"fmt"
)

func ExampleDiffJSON() {
// we'll use the background as our execution context
ctx := context.Background()

// start with two slightly different json documents
aJSON := []byte(`{
"a": 100,
Expand Down Expand Up @@ -46,10 +50,13 @@ func ExampleDiffJSON() {
panic(err)
}

// Diff will use default configuration to produce a slice of Deltas
// that describe the structured changes. by default Diff will not calculate
// moves, only inserts, deletes, and updates
diffs, err := Diff(a, b)
// create a differ, using the default configuration
dd := NewDeepDiff()

// Diff will produce a slice of Deltas that describe the structured changes.
// by default Diff will not calculate moves, only inserts, deletes, and
// updates
diffs, err := dd.Diff(ctx, a, b)
if err != nil {
panic(err)
}
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
module github.com/qri-io/deepdiff

go 1.12

require github.com/google/go-cmp v0.3.1
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
22 changes: 8 additions & 14 deletions stats_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package deepdiff

import (
"context"
"encoding/json"
"reflect"
"testing"

"github.com/google/go-cmp/cmp"
)

func TestCalcStats(t *testing.T) {
Expand All @@ -28,21 +30,13 @@ func TestCalcStats(t *testing.T) {
Deletes: 1,
Moves: 0,
}
stats := &Stats{}
Diff(a, b, OptionSetStats(stats))

if expect.NodeChange() != stats.NodeChange() {
t.Errorf("wrong node change. want: %d. got: %d", expect.NodeChange(), stats.NodeChange())
}

if expect.PctWeightChange() != stats.PctWeightChange() {
t.Errorf("wrong percentage of node change. want: %f. got: %f", expect.PctWeightChange(), stats.PctWeightChange())
got, err := NewDeepDiff().Stat(context.Background(), a, b)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(expect, stats) {
t.Errorf("response mismatch")
t.Logf("want: %v", expect)
t.Logf("got: %v", stats)
if diff := cmp.Diff(expect, got); diff != "" {
t.Errorf("result mismatch. (-want +got):\n%s", diff)
}

}
Loading

0 comments on commit 6bf7c9a

Please sign in to comment.