Skip to content

Commit

Permalink
Fix non-determinism in diffing algorithm (#247)
Browse files Browse the repository at this point in the history
A previous attempt to add non-determinism to the diffing algorithm
unfortunately broke the algorithm for half the cases.

This change modifies the algorithm to truly switch between starting
with a forward search versus a reverse search.
The main for-loop of Difference would switch repeatedly between
performing a forward search, then a reverse search, and vice-versa.
Since we can't jump into the middle of a for-loop to start with the
reverse search first, we use a series of labels and goto statements
to accomplish the same effect.

Fixes #238
  • Loading branch information
dsnet authored Nov 24, 2020
1 parent ade6b74 commit 449e17c
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 35 deletions.
48 changes: 27 additions & 21 deletions cmp/internal/diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (r Result) Similar() bool {
return r.NumSame+1 >= r.NumDiff
}

var randInt = rand.New(rand.NewSource(time.Now().Unix())).Intn(2)
var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0

// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
Expand Down Expand Up @@ -168,17 +168,6 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// A vertical edge is equivalent to inserting a symbol from list Y.
// A diagonal edge is equivalent to a matching symbol between both X and Y.

// To ensure flexibility in changing the algorithm in the future,
// introduce some degree of deliberate instability.
// This is achieved by fiddling the zigzag iterator to start searching
// the graph starting from the bottom-right versus than the top-left.
// The result may differ depending on the starting search location,
// but still produces a valid edit script.
zigzagInit := randInt // either 0 or 1
if flags.Deterministic {
zigzagInit = 0
}

// Invariants:
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
Expand All @@ -197,6 +186,11 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// approximately the square-root of the search budget.
searchBudget := 4 * (nx + ny) // O(n)

// Running the tests with the "cmp_debug" build tag prints a visualization
// of the algorithm running in real-time. This is educational for
// understanding how the algorithm works. See debug_enable.go.
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)

// The algorithm below is a greedy, meet-in-the-middle algorithm for
// computing sub-optimal edit-scripts between two lists.
//
Expand All @@ -214,22 +208,28 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// frontier towards the opposite corner.
// • This algorithm terminates when either the X coordinates or the
// Y coordinates of the forward and reverse frontier points ever intersect.
//

// This algorithm is correct even if searching only in the forward direction
// or in the reverse direction. We do both because it is commonly observed
// that two lists commonly differ because elements were added to the front
// or end of the other list.
//
// Running the tests with the "cmp_debug" build tag prints a visualization
// of the algorithm running in real-time. This is educational for
// understanding how the algorithm works. See debug_enable.go.
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
for {
// Non-deterministically start with either the forward or reverse direction
// to introduce some deliberate instability so that we have the flexibility
// to change this algorithm in the future.
if flags.Deterministic || randBool {
goto forwardSearch
} else {
goto reverseSearch
}

forwardSearch:
{
// Forward search from the beginning.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
goto finishSearch
}
for stop1, stop2, i := false, false, zigzagInit; !(stop1 && stop2) && searchBudget > 0; i++ {
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
Expand Down Expand Up @@ -262,10 +262,14 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
} else {
fwdFrontier.Y++
}
goto reverseSearch
}

reverseSearch:
{
// Reverse search from the end.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
goto finishSearch
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
Expand Down Expand Up @@ -300,8 +304,10 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
} else {
revFrontier.Y--
}
goto forwardSearch
}

finishSearch:
// Join the forward and reverse paths and then append the reverse path.
fwdPath.connect(revPath.point, f)
for i := len(revPath.es) - 1; i >= 0; i-- {
Expand Down
27 changes: 13 additions & 14 deletions cmp/internal/diff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,15 @@ import (
"strings"
"testing"
"unicode"

"github.com/google/go-cmp/cmp/internal/flags"
)

func init() {
flags.Deterministic = true
}

func TestDifference(t *testing.T) {
tests := []struct {
// Before passing x and y to Difference, we strip all spaces so that
// they can be used by the test author to indicate a missing symbol
// in one of the lists.
x, y string
want string
want string // '|' separated list of possible outputs
}{{
x: "",
y: "",
Expand All @@ -36,7 +30,7 @@ func TestDifference(t *testing.T) {
}, {
x: "##",
y: "# ",
want: ".X",
want: ".X|X.",
}, {
x: "a#",
y: "A ",
Expand All @@ -48,7 +42,7 @@ func TestDifference(t *testing.T) {
}, {
x: "# ",
y: "##",
want: ".Y",
want: ".Y|Y.",
}, {
x: " #",
y: "@#",
Expand Down Expand Up @@ -148,7 +142,7 @@ func TestDifference(t *testing.T) {
}, {
x: "ABCAB BA ",
y: " C BABAC",
want: "XX.X.Y..Y",
want: "XX.X.Y..Y|XX.Y.X..Y",
}, {
x: "# #### ###",
y: "#y####yy###",
Expand All @@ -164,7 +158,7 @@ func TestDifference(t *testing.T) {
}, {
x: "0 12z3x 456789 x x 0",
y: "0y12Z3 y456789y y y0",
want: ".Y..M.XY......YXYXY.",
want: ".Y..M.XY......YXYXY.|.Y..M.XY......XYXYY.",
}, {
x: "0 2 4 6 8 ..................abXXcdEXF.ghXi",
y: " 1 3 5 7 9..................AB CDE F.GH I",
Expand Down Expand Up @@ -216,7 +210,7 @@ func TestDifference(t *testing.T) {
}, {
x: "0123456789 ",
y: " 5678901234",
want: "XXXXX.....YYYYY",
want: "XXXXX.....YYYYY|YYYYY.....XXXXX",
}, {
x: "0123456789 ",
y: " 4567890123",
Expand Down Expand Up @@ -252,9 +246,14 @@ func TestDifference(t *testing.T) {
x := strings.Replace(tt.x, " ", "", -1)
y := strings.Replace(tt.y, " ", "", -1)
es := testStrings(t, x, y)
if got := es.String(); got != tt.want {
t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, tt.want)
var want string
got := es.String()
for _, want = range strings.Split(tt.want, "|") {
if got == want {
return
}
}
t.Errorf("Difference(%s, %s):\ngot %s\nwant %s", x, y, got, want)
})
}
}
Expand Down

0 comments on commit 449e17c

Please sign in to comment.