Skip to content

Commit

Permalink
Merge #68007
Browse files Browse the repository at this point in the history
68007: opt: improve CanBeCompositeSensitive r=RaduBerinde a=RaduBerinde

The current implementation of CanBeCompositeSensitive can be quadratic
in the expression size because each node builds the entire set of
outer columns for that subtree.

This change reworks the code and defines the logical property that we
were approximating by checking the outer cols directly. This also
reveals a bug in the previous version which did not detect relational
subtrees when there are no outer columns.

Release note: None

Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
  • Loading branch information
craig[bot] and RaduBerinde committed Jul 27, 2021
2 parents c2db7f8 + 6c45df7 commit 186313a
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 195 deletions.
94 changes: 54 additions & 40 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2602,55 +2602,69 @@ func deriveWithUses(r opt.Expr) props.WithUsesMap {
// An example of a composite-sensitive expression is `d::string`, where d is a
// DECIMAL.
//
// A formal definition:
// Let (c1,c2,...) be the outer columns of the scalar expression. Let
// f(x1,x2,..) be the result of the scalar expression for the given outer
// column values. The expression is composite insensitive if, for any two
// sets of values (x1,x2,...) and (y1,y2,...)
// (x1=y1 AND x2=y2 AND ...) => f(x1,x2,...) = f(y1,y2,...)
//
// Note that this doesn't mean that the final results are always *identical*
// just that they are logically equal.
//
// This property is used to determine when a scalar expression can be copied,
// with outer column variable references changed to refer to other columns that
// are known to be equal to the original columns.
func CanBeCompositeSensitive(md *opt.Metadata, e opt.Expr) bool {
outerCols := getOuterCols(e)
var compositeOuterCols opt.ColSet
outerCols.ForEach(func(col opt.ColumnID) {
if colinfo.HasCompositeKeyEncoding(md.ColumnMeta(col).Type) {
compositeOuterCols.Add(col)
}
})
if compositeOuterCols.Empty() {
// Fast path: none of the outer columns are composite.
return false
}

var canBeSensitive func(e opt.Expr) bool
canBeSensitive = func(e opt.Expr) bool {
// check is a recursive function which returns the following:
// - isCompositeInsensitive as defined above.
// - isCompositeIndependent is a stronger property, which says that for equal
// outer column values, the expression results are always *identical* (not
// just logically equal).
//
// A composite-insensitive expression with a non-composite result type is by
// definition also composite-independent.
//
// Any purely scalar expression which depends only on non-composite outer
// columns is composite-independent.
var check func(e opt.Expr) (isCompositeInsensitive, isCompositeIdentical bool)
check = func(e opt.Expr) (isCompositeInsensitive, isCompositeIdentical bool) {
if _, ok := e.(RelExpr); ok {
// Not a purely scalar expression.
return true
return false, false
}
if v, ok := e.(*VariableExpr); ok {
// Outer column references are our base case. They are always
// composite-insensitive. If they are not of composite type, they are also
// composite-identical.
return true, !colinfo.HasCompositeKeyEncoding(v.Typ)
}
if !getOuterCols(e).Intersects(compositeOuterCols) {
// None of the outer columns of this sub-expression are composite.
return false
}
// Check the inputs to the operator. Together, the following conditions are
// sufficient to prove that this expression is not sensitive:
// 1. None of the inputs are sensitive to composite outer columns.
// Otherwise, the operator can receive different inputs for logically
// equal outer values and thus produce different outputs.
// 2. The operator is marked as being always insensitive, or none of the
// input data types are composite.
checkTypes := !opt.IsCompositeInsensitiveOp(e)

allChildrenCompositeIdentical := true
for i, n := 0, e.ChildCount(); i < n; i++ {
if canBeSensitive(e.Child(i)) {
// Condition 1 not satisfied.
return true
}
if checkTypes {
// Note that the canBeSensitive() call above always returns true for
// relational expressions, so we are sure that the child is scalar.
if child := e.Child(i).(opt.ScalarExpr); colinfo.HasCompositeKeyEncoding(child.DataType()) {
// Condition 2 not satisfied.
return true
}
childCompositeInsensitive, childCompositeIdentical := check(e.Child(i))
if !childCompositeInsensitive {
// One of our inputs is composite-sensitive; all bets are off.
return false, false
}
allChildrenCompositeIdentical = allChildrenCompositeIdentical && childCompositeIdentical
}

if allChildrenCompositeIdentical {
// It doesn't matter what this operator does - its inputs are always
// identical so the output will be the same.
return true, true
}
return false

if opt.IsCompositeInsensitiveOp(e) {
// The operator is known to be composite-insensitive. If its result is a
// non-composite type, it is also composite-identical.
return true, !colinfo.HasCompositeKeyEncoding(e.(opt.ScalarExpr).DataType())
}

return false, false
}
return canBeSensitive(e)

isCompositeInsensitive, _ := check(e)
return !isCompositeInsensitive
}
2 changes: 0 additions & 2 deletions pkg/sql/opt/memo/testdata/logprops/join
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,6 @@ SELECT (SELECT m FROM
with &1
├── columns: m:29(int)
├── volatile, mutations
├── fd: ()-->(29)
├── prune: (29)
├── project
│ ├── columns: uv.u:6(int!null) uv.v:7(int!null)
Expand Down Expand Up @@ -1637,7 +1636,6 @@ with &1
│ └── function: unique_rowid [type=int]
└── project
├── columns: m:29(int)
├── fd: ()-->(29)
├── prune: (29)
├── scan uv
└── projections
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/opt/memo/testdata/logprops/project
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ project
└── project
├── columns: exists:18(bool)
├── outer: (1)
├── fd: ()-->(18)
├── prune: (18)
├── scan xysd
│ ├── columns: x:6(int!null) y:7(int) s:8(string) d:9(decimal!null) xysd.crdb_internal_mvcc_timestamp:10(decimal) xysd.tableoid:11(oid)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/norm/testdata/rules/decorrelate
Original file line number Diff line number Diff line change
Expand Up @@ -4241,7 +4241,7 @@ FROM a
----
project
├── columns: a:29!null x:30 y:31 b:32 exists:33 count:34!null
├── fd: ()-->(29,31-33)
├── fd: ()-->(29)
├── group-by
│ ├── columns: k:1!null xy.x:8 count_rows:28!null
│ ├── grouping columns: k:1!null
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/inline
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,6 @@ SELECT EXISTS(SELECT * FROM xy WHERE x=1 OR x=2), expr*2 AS r FROM (SELECT k+1 A
project
├── columns: exists:13 r:14!null
├── immutable
├── fd: ()-->(13)
├── scan a
│ ├── columns: k:1!null
│ └── key: (1)
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -1433,7 +1433,6 @@ SELECT (SELECT i_name FROM item LIMIT 1)
----
project
├── columns: i_name:57
├── fd: ()-->(57)
├── inner-join (hash)
│ ├── columns: h_data:9!null ol_o_id:12!null ol_d_id:13!null ol_w_id:14!null ol_number:15!null ol_dist_info:21!null true_agg:48
│ ├── fd: (12-15)-->(21,48), (9)==(21), (21)==(9)
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/project
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,6 @@ SELECT (SELECT x FROM (VALUES (1), (2)) f(x)) FROM (VALUES (2), (3))
project
├── columns: x:3
├── cardinality: [2 - 2]
├── fd: ()-->(3)
├── values
│ ├── cardinality: [2 - 2]
│ ├── ()
Expand Down
5 changes: 2 additions & 3 deletions pkg/sql/opt/norm/testdata/rules/scalar
Original file line number Diff line number Diff line change
Expand Up @@ -1691,14 +1691,14 @@ project
│ │ ├── columns: c.k:1!null b.k:8!null array:22 canary:25!null
│ │ ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one)
│ │ ├── key: (8)
│ │ ├── fd: ()-->(22,25), (1)==(8), (8)==(1)
│ │ ├── fd: ()-->(25), (8)-->(22), (1)==(8), (8)==(1)
│ │ ├── scan a [as=c]
│ │ │ ├── columns: c.k:1!null
│ │ │ └── key: (1)
│ │ ├── project
│ │ │ ├── columns: canary:25!null array:22 b.k:8!null
│ │ │ ├── key: (8)
│ │ │ ├── fd: ()-->(22,25)
│ │ │ ├── fd: ()-->(25), (8)-->(22)
│ │ │ ├── scan a [as=b]
│ │ │ │ ├── columns: b.k:8!null
│ │ │ │ └── key: (8)
Expand Down Expand Up @@ -1796,7 +1796,6 @@ SELECT ARRAY(SELECT k FROM a) FROM a
----
project
├── columns: array:15
├── fd: ()-->(15)
├── scan a
└── projections
└── array-flatten [as=array:15, subquery]
Expand Down
Loading

0 comments on commit 186313a

Please sign in to comment.