Skip to content

Commit

Permalink
opt: add With operator
Browse files Browse the repository at this point in the history
This commit introduces a With operator to opt, which allows us to
reference CTEs multiple times.

This enables some missing functionality, where we can reference CTEs
multiple times, or not at all, even if they contain mutations. We lose
some optimizations because CTEs now present an optimization fence.

This might be fixable with a rule something like:
```
(With
  $value:*
  $input:(WithRef) & (References $input $value)
)
=>
$value
```
subject to certain side-effect restrictions, along with some rule props
to get the Withs down further in the tree.

Additionally, as they are now, WithExprs present another optimization
fence, since we don't have any rules for pushing them further down the
tree. Ideally, we would get all WithExprs that *can* be inlined down to
their point of use, and all those that cannot up to the very root of the
tree. This is future work.

Fixes cockroachdb#24307.
Fixes cockroachdb#21084.

Release note (sql change): Common Table Expressions (CTEs) may now be
referenced from multiple locations in a query.
  • Loading branch information
Justin Jaffray committed Jul 15, 2019
1 parent 69fc2b9 commit ff73a3d
Show file tree
Hide file tree
Showing 36 changed files with 2,026 additions and 588 deletions.
12 changes: 11 additions & 1 deletion pkg/sql/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ type bufferNode struct {
// processors, but this node is local.
bufferedRows *rowcontainer.RowContainer
passThruNextRowIdx int

// label is a string used to describe the node in an EXPLAIN plan.
label string
}

func (n *bufferNode) startExec(params runParams) error {
Expand Down Expand Up @@ -64,7 +67,11 @@ func (n *bufferNode) Values() tree.Datums {

func (n *bufferNode) Close(ctx context.Context) {
n.plan.Close(ctx)
n.bufferedRows.Close(ctx)
// It's valid to be Closed without startExec having been called, in which
// case n.bufferedRows will be nil.
if n.bufferedRows != nil {
n.bufferedRows.Close(ctx)
}
}

// scanBufferNode behaves like an iterator into the bufferNode it is
Expand All @@ -74,6 +81,9 @@ type scanBufferNode struct {
buffer *bufferNode

nextRowIdx int

// label is a string used to describe the node in an EXPLAIN plan.
label string
}

func (n *scanBufferNode) startExec(runParams) error {
Expand Down
53 changes: 15 additions & 38 deletions pkg/sql/logictest/testdata/logic_test/with
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
# LogicTest: local local-opt fakedist fakedist-opt fakedist-metadata

query error unsupported multiple use of CTE clause "a"
WITH a AS (SELECT 1) SELECT * FROM a CROSS JOIN a

statement ok
CREATE TABLE x(a) AS SELECT generate_series(1, 3)

Expand Down Expand Up @@ -144,41 +141,6 @@ WITH t AS (
)
SELECT * FROM t

# Regression test for #24307 until CockroachDB learns how to execute
# side effects no matter what.
query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
INSERT INTO x(a) VALUES(0) RETURNING a
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
SELECT * FROM (
WITH b AS (INSERT INTO x(a) VALUES(0) RETURNING a)
TABLE b
)
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
DELETE FROM x RETURNING a
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
UPSERT INTO x(a) VALUES(0) RETURNING a
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
UPDATE x SET a = 0 RETURNING a
)
SELECT 1

# however if there are no side effects, no errors are required.
query I
WITH t AS (SELECT 1) SELECT 2
Expand Down Expand Up @@ -237,3 +199,18 @@ query I
((WITH lim(x) AS (SELECT 1) SELECT 123) LIMIT (SELECT x FROM lim))
----
123

# CTE with an ORDER BY.

statement ok
CREATE TABLE ab (a INT PRIMARY KEY, b INT)

statement ok
INSERT INTO ab VALUES (1, 2), (3, 4), (5, 6)

query I
WITH a AS (SELECT a FROM ab ORDER BY b) SELECT * FROM a
----
1
3
5
42 changes: 42 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/with-hp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# LogicTest: local fakedist

statement ok
CREATE TABLE x(a) AS SELECT generate_series(1, 3)

statement ok
CREATE TABLE y(a) AS SELECT generate_series(2, 4)

# Regression test for #24307 until CockroachDB learns how to execute
# side effects no matter what.
query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
INSERT INTO x(a) VALUES(0) RETURNING a
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
SELECT * FROM (
WITH b AS (INSERT INTO x(a) VALUES(0) RETURNING a)
TABLE b
)
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
DELETE FROM x RETURNING a
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
UPSERT INTO x(a) VALUES(0) RETURNING a
)
SELECT 1

query error unimplemented: common table expression "t" with side effects was not used in query
WITH t AS (
UPDATE x SET a = 0 RETURNING a
)
SELECT 1
70 changes: 70 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/with-opt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# LogicTest: local-opt fakedist-opt

statement ok
CREATE TABLE x(a) AS SELECT generate_series(1, 3)

statement ok
CREATE TABLE y(b) AS SELECT generate_series(2, 4)

# Referencing a CTE multiple times.
query II
WITH t AS (SELECT b FROM y) SELECT * FROM t JOIN t AS q ON true
----
2 2
2 3
2 4
3 2
3 3
3 4
4 2
4 3
4 4

query II
WITH
one AS (SELECT a AS u FROM x),
two AS (SELECT b AS v FROM (SELECT b FROM y UNION ALL SELECT u FROM one))
SELECT
*
FROM
one JOIN two ON u = v
----
1 1
2 2
3 3
2 2
3 3

# Mutation CTEs that aren't referenced elsewhere in the query.
statement ok
CREATE TABLE z (c INT PRIMARY KEY);

query I
WITH foo AS (INSERT INTO z VALUES (10) RETURNING 1) SELECT 2
----
2

query I
SELECT * FROM z
----
10

query I
WITH foo AS (UPDATE z SET c = 20 RETURNING 1) SELECT 3
----
3

query I
SELECT * FROM z
----
20

query I
WITH foo AS (DELETE FROM z RETURNING 1) SELECT 4
----
4

query I
SELECT count(*) FROM z
----
0
8 changes: 8 additions & 0 deletions pkg/sql/opt/bench/stub_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,11 @@ func (f *stubFactory) ConstructAlterTableSplit(
) (exec.Node, error) {
return struct{}{}, nil
}

func (f *stubFactory) ConstructBuffer(value exec.Node, label string) (exec.Node, error) {
return struct{}{}, nil
}

func (f *stubFactory) ConstructScanBuffer(ref exec.Node, label string) (exec.Node, error) {
return struct{}{}, nil
}
16 changes: 16 additions & 0 deletions pkg/sql/opt/exec/execbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ type Builder struct {
// each relational subexpression when evalCtx.SessionData.SaveTablesPrefix is
// non-empty.
nameGen *memo.ExprNameGenerator

// withExprs is the set of With expressions which may be referenced elsewhere
// in the query.
// TODO(justin): set this up so that we can look them up by index lookups
// rather than scans.
withExprs []builtWithExpr
}

// builtWithExpr is metadata regarding a With expression which has already been
// added to the set of subqueries for the query.
type builtWithExpr struct {
id opt.WithID
// outputCols maps the output ColumnIDs of the With expression to the ordinal
// positions they are output to. See execPlan.outputCols for more details.
outputCols opt.ColMap
bufferNode exec.Node
}

// New constructs an instance of the execution node builder using the
Expand Down
98 changes: 98 additions & 0 deletions pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package execbuilder

import (
"bytes"
"fmt"

"github.com/cockroachdb/cockroach/pkg/server/telemetry"
Expand Down Expand Up @@ -235,6 +236,12 @@ func (b *Builder) buildRelational(e memo.RelExpr) (execPlan, error) {
case *memo.CreateTableExpr:
ep, err = b.buildCreateTable(t)

case *memo.WithExpr:
ep, err = b.buildWith(t)

case *memo.WithScanExpr:
ep, err = b.buildWithScan(t)

case *memo.ExplainExpr:
ep, err = b.buildExplain(t)

Expand Down Expand Up @@ -1301,6 +1308,97 @@ func (b *Builder) buildMax1Row(max1Row *memo.Max1RowExpr) (execPlan, error) {

}

func (b *Builder) buildWith(with *memo.WithExpr) (execPlan, error) {
value, err := b.buildRelational(with.Binding)
if err != nil {
return execPlan{}, err
}

var label bytes.Buffer
fmt.Fprintf(&label, "buffer %d", with.ID)
if with.Name != "" {
fmt.Fprintf(&label, " (%s)", with.Name)
}

buffer, err := b.factory.ConstructBuffer(value.root, label.String())
if err != nil {
return execPlan{}, err
}

// TODO(justin): if the binding here has a spoolNode at its root, we can
// remove it, since subquery execution also guarantees complete execution.

// Add the buffer as a subquery so it gets executed ahead of time, and is
// available to be referenced by other queries.
b.subqueries = append(b.subqueries, exec.Subquery{
ExprNode: &tree.Subquery{
Select: with.OriginalExpr.Select,
},
// TODO(justin): this is wasteful: both the subquery and the bufferNode
// will buffer up all the results. This should be fixed by either making
// the buffer point directly to the subquery results or adding a new
// subquery mode that reads and discards all rows. This could possibly also
// be fixed by ensuring that bufferNode exhausts its input (and forcing it
// to behave like a spoolNode) and using the EXISTS mode.
Mode: exec.SubqueryAllRows,
Root: buffer,
})

b.withExprs = append(b.withExprs, builtWithExpr{
id: with.ID,
outputCols: value.outputCols,
bufferNode: buffer,
})

return b.buildRelational(with.Input)
}

func (b *Builder) buildWithScan(withScan *memo.WithScanExpr) (execPlan, error) {
id := withScan.ID
var e *builtWithExpr
for i := range b.withExprs {
if b.withExprs[i].id == id {
e = &b.withExprs[i]
break
}
}
if e == nil {
panic(errors.AssertionFailedf("couldn't find With expression with ID %d", id))
}

var label bytes.Buffer
fmt.Fprintf(&label, "buffer %d", withScan.ID)
if withScan.Name != "" {
fmt.Fprintf(&label, " (%s)", withScan.Name)
}

node, err := b.factory.ConstructScanBuffer(e.bufferNode, label.String())
if err != nil {
return execPlan{}, err
}

// The ColumnIDs from the With expression need to get remapped according to
// the mapping in the withScan to get the actual colMap for this expression.
var outputCols opt.ColMap

referencedExpr := b.mem.GetExpr(withScan.ID)
if !referencedExpr.Relational().OutputCols.Equals(withScan.InCols.ToSet()) {
panic(errors.AssertionFailedf(
"columns being output from WITH do not match expected columns",
))
}

for i := range withScan.InCols {
idx, _ := e.outputCols.Get(int(withScan.InCols[i]))
outputCols.Set(int(withScan.OutCols[i]), idx)
}

return execPlan{
root: node,
outputCols: outputCols,
}, nil
}

func (b *Builder) buildProjectSet(projectSet *memo.ProjectSetExpr) (execPlan, error) {
input, err := b.buildRelational(projectSet.Input)
if err != nil {
Expand Down
Loading

0 comments on commit ff73a3d

Please sign in to comment.