topdown: Fix bug in set/array comprehension indexing

In 00a71ef we added support for comprehension indexing, however, there was a bug in the implementation for sets and objects: the child query was closing over the parent--we only had test coverage against arrays so this went uncaught. This meant that sets and objects would still see O(N^2) runtime. This commit resolves the issue and includes a new instrumentation counter that should help identify this kind of issue (if the skip and miss count differs, something is wrong.) Rather than erroring in this case, we just lazily compute the comprehension. Signed-off-by: Torin Sandall <torinsandall@gmail.com>
open-policy-agent · May 15, 2020 · 4dd119b · 4dd119b
1 parent b5c1587
commit 4dd119b
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 26 deletions.
diff --git a/topdown/eval.go b/topdown/eval.go
@@ -859,6 +859,8 @@ func (e *eval) biunifyComprehension(a, b *ast.Term, b1, b2 *bindings, swap bool,
 		return err
 	} else if value != nil {
 		return e.biunify(value, b, b1, b2, iter)
+	} else {
+		e.instr.counterIncr(evalOpComprehensionCacheMiss)
 	}
 
 	switch a := a.Value.(type) {
@@ -932,7 +934,7 @@ func (e *eval) buildComprehensionCacheArray(x *ast.ArrayComprehension, keys []*a
 }
 
 func (e *eval) buildComprehensionCacheSet(x *ast.SetComprehension, keys []*ast.Term) (*comprehensionCacheElem, error) {
-	child := e.closure(x.Body)
+	child := e.child(x.Body)
 	node := newComprehensionCacheElem()
 	return node, child.Run(func(child *eval) error {
 		values := make([]*ast.Term, len(keys))
@@ -952,7 +954,7 @@ func (e *eval) buildComprehensionCacheSet(x *ast.SetComprehension, keys []*ast.T
 }
 
 func (e *eval) buildComprehensionCacheObject(x *ast.ObjectComprehension, keys []*ast.Term) (*comprehensionCacheElem, error) {
-	child := e.closure(x.Body)
+	child := e.child(x.Body)
 	node := newComprehensionCacheElem()
 	return node, child.Run(func(child *eval) error {
 		values := make([]*ast.Term, len(keys))

diff --git a/topdown/instrumentation.go b/topdown/instrumentation.go
@@ -18,6 +18,7 @@ const (
 	evalOpComprehensionCacheSkip  = "eval_op_comprehension_cache_skip"
 	evalOpComprehensionCacheBuild = "eval_op_comprehension_cache_build"
 	evalOpComprehensionCacheHit   = "eval_op_comprehension_cache_hit"
+	evalOpComprehensionCacheMiss  = "eval_op_comprehension_cache_miss"
 	partialOpSaveUnify            = "partial_op_save_unify"
 	partialOpSaveSetContains      = "partial_op_save_set_contains"
 	partialOpSaveSetContainsRec   = "partial_op_save_set_contains_rec"

diff --git a/topdown/topdown_bench_test.go b/topdown/topdown_bench_test.go
@@ -13,6 +13,7 @@ import (
 	"text/template"
 
 	"github.com/open-policy-agent/opa/ast"
+	"github.com/open-policy-agent/opa/metrics"
 	"github.com/open-policy-agent/opa/storage"
 	"github.com/open-policy-agent/opa/storage/inmem"
 	"github.com/open-policy-agent/opa/util"
@@ -622,41 +623,84 @@ func genWalkBenchmarkData(n int) map[string]interface{} {
 
 func BenchmarkComprehensionIndexing(b *testing.B) {
 	ctx := context.Background()
-	sizes := []int{10, 100, 1000}
-	for _, n := range sizes {
-		b.Run(fmt.Sprint(n), func(b *testing.B) {
-			data := genComprehensionIndexingData(n)
-			store := inmem.NewFromObject(data)
-			compiler := ast.MustCompileModules(map[string]string{
-				"test.rego": `
+	cases := []struct {
+		note   string
+		module string
+		query  string
+	}{
+		{
+			note: "arrays",
+			module: `
 				package test
 
-				p {
+				bench_array {
 					v := data.items[_]
 					ks := [k | some k; v == data.items[k]]
 				}
 			`,
-			})
-			query, err := compiler.QueryCompiler().Compile(ast.MustParseBody(`data.test.p = true`))
-			if err != nil {
-				b.Fatal(err)
-			}
-			b.ResetTimer()
-			for i := 0; i < b.N; i++ {
-				err = storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error {
-					q := NewQuery(query).WithStore(store).WithCompiler(compiler).WithTransaction(txn)
-					rs, err := q.Run(ctx)
-					if err != nil || len(rs) != 1 {
-						b.Fatal("Unexpected result:", rs, "err:", err)
-					}
-					return nil
+			query: `data.test.bench_array = true`,
+		},
+		{
+			note: "sets",
+			module: `
+				package test
+
+				bench_set {
+					v := data.items[_]
+					ks := {k | some k; v == data.items[k]}
+				}
+			`,
+			query: `data.test.bench_set = true`,
+		},
+		{
+			note: "objects",
+			module: `
+				package test
+
+				bench_object {
+					v := data.items[_]
+					ks := {k: 1 | some k; v == data.items[k]}
+				}
+			`,
+			query: `data.test.bench_object = true`,
+		},
+	}
+
+	sizes := []int{10, 100, 1000}
+	for _, tc := range cases {
+		for _, n := range sizes {
+			b.Run(fmt.Sprintf("%v_%v", tc.note, n), func(b *testing.B) {
+				data := genComprehensionIndexingData(n)
+				store := inmem.NewFromObject(data)
+				compiler := ast.MustCompileModules(map[string]string{
+					"test.rego": tc.module,
 				})
+				query, err := compiler.QueryCompiler().Compile(ast.MustParseBody(tc.query))
 				if err != nil {
 					b.Fatal(err)
 				}
+				b.ResetTimer()
+				for i := 0; i < b.N; i++ {
+					err = storage.Txn(ctx, store, storage.TransactionParams{}, func(txn storage.Transaction) error {
+						m := metrics.New()
+						instr := NewInstrumentation(m)
+						q := NewQuery(query).WithStore(store).WithCompiler(compiler).WithTransaction(txn).WithInstrumentation(instr)
+						rs, err := q.Run(ctx)
+						if m.Counter(evalOpComprehensionCacheMiss).Value().(uint64) > 0 {
+							b.Fatal("expected zero cache misses")
+						}
+						if err != nil || len(rs) != 1 {
+							b.Fatal("Unexpected result:", rs, "err:", err)
+						}
+						return nil
+					})
+					if err != nil {
+						b.Fatal(err)
+					}
 
-			}
-		})
+				}
+			})
+		}
 	}
 }