ast: Improve set implementation to memoize groudness

Previously sets were not memoizing the groundness check so each call to `IsGround` would iterate until it found a non-ground term. In the worst-case, this would check the entire set. Because we call `IsGround` as part of the inner-loop of the `topdown.evalTerm#enumerate` code-path for sets, this introduced a cross-product! This commit improves the set implementation to maintain a bit that indicates whether the set is ground. When new elements are added to the set, the bit is set accordingly. This change provides a significant speedup for large sets. name old time/op new time/op delta SetIteration/10-20 28.6µs ± 1% 27.3µs ± 1% -4.65% (p=0.008 n=5+5) SetIteration/100-20 263µs ± 1% 152µs ± 1% -42.20% (p=0.008 n=5+5) SetIteration/1000-20 12.5ms ± 0% 1.4ms ± 1% -88.67% (p=0.008 n=5+5) SetIteration/10000-20 1.12s ± 0% 0.02s ± 1% -98.63% (p=0.008 n=5+5) Signed-off-by: Torin Sandall <torinsandall@gmail.com>
open-policy-agent · Dec 2, 2020 · 7fb944a · 7fb944a
1 parent afdb035
commit 7fb944a
Show file tree

Hide file tree

Showing 2 changed files with 81 additions and 9 deletions.
diff --git a/ast/term.go b/ast/term.go
@@ -1260,9 +1260,10 @@ func newset(n int) *set {
 		keys = make([]*Term, 0, n)
 	}
 	return &set{
-		elems: make(map[int]*Term, n),
-		keys:  keys,
-		hash:  0,
+		elems:  make(map[int]*Term, n),
+		keys:   keys,
+		hash:   0,
+		ground: true,
 	}
 }
 
@@ -1275,9 +1276,10 @@ func SetTerm(t ...*Term) *Term {
 }
 
 type set struct {
-	elems map[int]*Term
-	keys  []*Term
-	hash  int
+	elems  map[int]*Term
+	keys   []*Term
+	hash   int
+	ground bool
 }
 
 // Copy returns a deep copy of s.
@@ -1287,14 +1289,13 @@ func (s *set) Copy() Set {
 		cpy.Add(x.Copy())
 	})
 	cpy.hash = s.hash
+	cpy.ground = s.ground
 	return cpy
 }
 
 // IsGround returns true if all terms in s are ground.
 func (s *set) IsGround() bool {
-	return !s.Until(func(x *Term) bool {
-		return !x.IsGround()
-	})
+	return s.ground
 }
 
 // Hash returns a hash code for s.
@@ -1550,6 +1551,7 @@ func (s *set) insert(x *Term) {
 	s.elems[hash] = x
 	s.keys = append(s.keys, x)
 	s.hash = 0
+	s.ground = s.ground && x.IsGround()
 }
 
 func (s *set) get(x *Term) *Term {

diff --git a/topdown/topdown_bench_test.go b/topdown/topdown_bench_test.go
@@ -19,6 +19,76 @@ import (
 	"github.com/open-policy-agent/opa/util"
 )
 
+func BenchmarkArrayIteration(b *testing.B) {
+	sizes := []int{10, 100, 1000, 10000}
+	for _, n := range sizes {
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			benchmarkIteration(b, getArrayIterationBenchmarkModule(n))
+		})
+	}
+}
+
+func BenchmarkSetIteration(b *testing.B) {
+	sizes := []int{10, 100, 1000, 10000}
+	for _, n := range sizes {
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			benchmarkIteration(b, getSetIterationBenchmarkModule(n))
+		})
+	}
+}
+
+func BenchmarkObjectIteration(b *testing.B) {
+	sizes := []int{10, 100, 1000, 10000}
+	for _, n := range sizes {
+		b.Run(fmt.Sprint(n), func(b *testing.B) {
+			benchmarkIteration(b, getObjectIterationBenchmarkModule(n))
+		})
+	}
+}
+
+func benchmarkIteration(b *testing.B, module string) {
+	ctx := context.Background()
+	query := ast.MustParseBody("data.test.main")
+	compiler := ast.MustCompileModules(map[string]string{
+		"test.rego": module,
+	})
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		q := NewQuery(query).WithCompiler(compiler)
+		_, err := q.Run(ctx)
+		if err != nil {
+			b.Fatal(err)
+		}
+	}
+}
+
+func getArrayIterationBenchmarkModule(n int) string {
+	return fmt.Sprintf(`package test
+
+	fixture = [ x | x := numbers.range(1, %d)[_] ]
+
+	main { fixture[i] }`, n)
+}
+
+func getSetIterationBenchmarkModule(n int) string {
+	return fmt.Sprintf(`package test
+
+	fixture = { x | x := numbers.range(1, %d)[_] }
+
+	main { fixture[i] }`, n)
+}
+
+func getObjectIterationBenchmarkModule(n int) string {
+	return fmt.Sprintf(`package test
+
+	fixture = { x: x | x := numbers.range(1, %d)[_] }
+
+	main { fixture[i] }`, n)
+}
+
 func BenchmarkLargeJSON(b *testing.B) {
 	data := generateLargeJSONBenchmarkData()
 	ctx := context.Background()