From 201ee03769f47cb73ec42e96797ef9956af48c7d Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 16 Oct 2020 16:35:25 -0700 Subject: [PATCH 1/2] xform: organize rules by the type of expressions they match This commit organizes exploration rules by the type of expressions they match. `GenerateZigzagJoins` and `GenerateInvertedZigzagJoins` have been moved to `select.opt`. All other rules were correctly organized. Release note: None --- pkg/sql/opt/norm/testdata/rules/combo | 18 ++++---- .../opttester/testdata/explore-trace | 20 ++++----- .../testutils/opttester/testdata/opt-steps | 12 +++--- pkg/sql/opt/xform/rules/join.opt | 42 ------------------- pkg/sql/opt/xform/rules/select.opt | 42 +++++++++++++++++++ 5 files changed, 67 insertions(+), 67 deletions(-) diff --git a/pkg/sql/opt/norm/testdata/rules/combo b/pkg/sql/opt/norm/testdata/rules/combo index 8ccbc6d3244a..6185736cca60 100644 --- a/pkg/sql/opt/norm/testdata/rules/combo +++ b/pkg/sql/opt/norm/testdata/rules/combo @@ -297,15 +297,15 @@ PruneJoinRightCols GenerateIndexScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -GenerateZigzagJoins (no changes) --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- GeneratePartialIndexScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- GenerateConstrainedScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- +GenerateZigzagJoins (no changes) +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- GenerateIndexScans (no changes) -------------------------------------------------------------------------------- ================================================================================ @@ -583,9 +583,6 @@ GenerateIndexScans ├── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] └── f:3 > 100.0 [outer=(3), constraints=(/3: [/100.00000000000001 - ]; tight)] -------------------------------------------------------------------------------- -GenerateZigzagJoins (no changes) --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- GeneratePartialIndexScans (no changes) -------------------------------------------------------------------------------- ================================================================================ @@ -610,6 +607,9 @@ GenerateConstrainedScans - ├── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] - └── f:3 > 100.0 [outer=(3), constraints=(/3: [/100.00000000000001 - ]; tight)] + └── fd: ()-->(4), (1)-->(3), (3)-->(1) +-------------------------------------------------------------------------------- +GenerateZigzagJoins (no changes) +-------------------------------------------------------------------------------- ================================================================================ Final best expression Cost: 14.10 @@ -2872,15 +2872,15 @@ GenerateIndexScans (no changes) GenerateIndexScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- -GenerateZigzagJoins (no changes) --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- GeneratePartialIndexScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- GenerateConstrainedScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- +GenerateZigzagJoins (no changes) +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- ReorderJoins (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- diff --git a/pkg/sql/opt/testutils/opttester/testdata/explore-trace b/pkg/sql/opt/testutils/opttester/testdata/explore-trace index 1ba3c18a7d6f..d1944b4741cf 100644 --- a/pkg/sql/opt/testutils/opttester/testdata/explore-trace +++ b/pkg/sql/opt/testutils/opttester/testdata/explore-trace @@ -38,7 +38,7 @@ New expression 1 of 1: └── const: 1 [type=int] ================================================================================ -GenerateZigzagJoins +GeneratePartialIndexScans ================================================================================ Source expression: select @@ -57,7 +57,7 @@ Source expression: No new expressions. ================================================================================ -GeneratePartialIndexScans +GenerateConstrainedScans ================================================================================ Source expression: select @@ -73,10 +73,15 @@ Source expression: ├── variable: b:2 [type=int] └── const: 1 [type=int] -No new expressions. +New expression 1 of 1: + scan ab@secondary + ├── columns: a:1(int!null) b:2(int!null) + ├── constraint: /2/1: [/1 - /1] + ├── key: (1) + └── fd: ()-->(2) ================================================================================ -GenerateConstrainedScans +GenerateZigzagJoins ================================================================================ Source expression: select @@ -92,12 +97,7 @@ Source expression: ├── variable: b:2 [type=int] └── const: 1 [type=int] -New expression 1 of 1: - scan ab@secondary - ├── columns: a:1(int!null) b:2(int!null) - ├── constraint: /2/1: [/1 - /1] - ├── key: (1) - └── fd: ()-->(2) +No new expressions. ---- ---- diff --git a/pkg/sql/opt/testutils/opttester/testdata/opt-steps b/pkg/sql/opt/testutils/opttester/testdata/opt-steps index 0b0f782dae89..ef34845f6d5e 100644 --- a/pkg/sql/opt/testutils/opttester/testdata/opt-steps +++ b/pkg/sql/opt/testutils/opttester/testdata/opt-steps @@ -143,9 +143,6 @@ GenerateIndexScans (higher cost) ├── variable: b:2 [type=int] └── const: 1 [type=int] -------------------------------------------------------------------------------- -GenerateZigzagJoins (no changes) --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- GeneratePartialIndexScans (no changes) -------------------------------------------------------------------------------- ================================================================================ @@ -167,6 +164,9 @@ GenerateConstrainedScans - ├── variable: b:2 [type=int] - └── const: 1 [type=int] + └── fd: ()-->(2) +-------------------------------------------------------------------------------- +GenerateZigzagJoins (no changes) +-------------------------------------------------------------------------------- ================================================================================ Final best expression Cost: 14.41 @@ -714,9 +714,6 @@ GenerateIndexScans (higher cost) ├── variable: k:1 [type=int] └── const: 1 [type=int] -------------------------------------------------------------------------------- -GenerateZigzagJoins (no changes) --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- GeneratePartialIndexScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- @@ -746,6 +743,9 @@ FoldComparison - └── const: 1 [type=int] + ├── constraint: /2/1/3: [/true/1 - /true/1] + └── fd: ()-->(1) +-------------------------------------------------------------------------------- +GenerateZigzagJoins (no changes) +-------------------------------------------------------------------------------- ================================================================================ Final best expression Cost: 14.51 diff --git a/pkg/sql/opt/xform/rules/join.opt b/pkg/sql/opt/xform/rules/join.opt index 338ced49c271..97e41f404e21 100644 --- a/pkg/sql/opt/xform/rules/join.opt +++ b/pkg/sql/opt/xform/rules/join.opt @@ -234,48 +234,6 @@ $private ) -# GenerateZigzagJoins creates ZigzagJoin operators for all index pairs (of the -# Scan table) where the prefix column(s) of both indexes is/are fixed to -# constant values in the filters. See comments in GenerateZigzagJoin and -# distsqlrun/zigzagjoiner.go for more details on when a zigzag join can be -# planned. -# -# Zigzag joins are prohibited when the source Scan operator has been configured -# with a row-level locking mode. This is mostly out of convenience so that these -# row-level locking modes don't need to added to the ZigzagJoin operator. There -# doesn't seem to be a strong reason to support this, but if one comes up, it -# should be possible to lift this restriction. -[GenerateZigzagJoins, Explore] -(Select - (Scan $scan:*) & (IsCanonicalScan $scan) & ^(IsLocking $scan) - $filters:* -) -=> -(GenerateZigzagJoins $scan $filters) - -# GenerateInvertedIndexZigzagJoins creates ZigzagJoin operators for inverted -# indexes that can be constrained with two or more distinct constant values. -# Inverted indexes contain one row for each path-to-leaf in a JSON value, so one -# row in the primary index could generate multiple inverted index keys. This -# property can be exploited by zigzag joining on the same inverted index, fixed -# at any two of the JSON paths we are querying for. -# -# Zigzag joins are prohibited when the source Scan operator has been configured -# with a row-level locking mode. This is mostly out of convenience so that these -# row-level locking modes don't need to added to the ZigzagJoin operator. There -# doesn't seem to be a strong reason to support this, but if one comes up, it -# should be possible to lift this restriction. -[GenerateInvertedIndexZigzagJoins, Explore] -(Select - (Scan $scan:*) & - (IsCanonicalScan $scan) & - ^(IsLocking $scan) & - (HasInvertedIndexes $scan) - $filters:* -) -=> -(GenerateInvertedIndexZigzagJoins $scan $filters) - # GenerateLookupJoinWithFilter creates a LookupJoin alternative for a Join which # has a Select->Scan combination as its right input. The filter can get merged # with the ON condition (this is correct for inner, left, and semi/anti join). diff --git a/pkg/sql/opt/xform/rules/select.opt b/pkg/sql/opt/xform/rules/select.opt index 27b17da359fa..5ee25162a7a6 100644 --- a/pkg/sql/opt/xform/rules/select.opt +++ b/pkg/sql/opt/xform/rules/select.opt @@ -53,6 +53,48 @@ => (GenerateInvertedIndexScans $scanPrivate $filters) +# GenerateZigzagJoins creates ZigzagJoin operators for all index pairs (of the +# Scan table) where the prefix column(s) of both indexes is/are fixed to +# constant values in the filters. See comments in GenerateZigzagJoin and +# distsqlrun/zigzagjoiner.go for more details on when a zigzag join can be +# planned. +# +# Zigzag joins are prohibited when the source Scan operator has been configured +# with a row-level locking mode. This is mostly out of convenience so that these +# row-level locking modes don't need to added to the ZigzagJoin operator. There +# doesn't seem to be a strong reason to support this, but if one comes up, it +# should be possible to lift this restriction. +[GenerateZigzagJoins, Explore] +(Select + (Scan $scan:*) & (IsCanonicalScan $scan) & ^(IsLocking $scan) + $filters:* +) +=> +(GenerateZigzagJoins $scan $filters) + +# GenerateInvertedIndexZigzagJoins creates ZigzagJoin operators for inverted +# indexes that can be constrained with two or more distinct constant values. +# Inverted indexes contain one row for each path-to-leaf in a JSON value, so one +# row in the primary index could generate multiple inverted index keys. This +# property can be exploited by zigzag joining on the same inverted index, fixed +# at any two of the JSON paths we are querying for. +# +# Zigzag joins are prohibited when the source Scan operator has been configured +# with a row-level locking mode. This is mostly out of convenience so that these +# row-level locking modes don't need to added to the ZigzagJoin operator. There +# doesn't seem to be a strong reason to support this, but if one comes up, it +# should be possible to lift this restriction. +[GenerateInvertedIndexZigzagJoins, Explore] +(Select + (Scan $scan:*) & + (IsCanonicalScan $scan) & + ^(IsLocking $scan) & + (HasInvertedIndexes $scan) + $filters:* +) +=> +(GenerateInvertedIndexZigzagJoins $scan $filters) + # SplitDisjunction splits disjunctions (Or expressions) into a Union of two # Select expressions, the first containing the left sub-expression of the Or # expression and the second containing the right sub-expression. All other From 652ec80512a006c6f4ac37255649e1c6cf626c77 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 16 Oct 2020 17:11:50 -0700 Subject: [PATCH 2/2] xform: reorganize tests to match the organization of the rules This commit moves around (and makes some minor necessary changes) to exploration tests so that tests for rules in `foo.opt` can be found in `testdata/foo`. Most of the changes were related to zigzag join rules which moved from `join` to `select`, and some tests for `GenerateConstrainedScans` that have lived in `scan` for years. The goal of this change is to make it easier to find tests for a specific rule and to set a precedent for future tests. Release note: None --- pkg/sql/opt/xform/testdata/rules/join | 1870 ++++++----------------- pkg/sql/opt/xform/testdata/rules/limit | 6 +- pkg/sql/opt/xform/testdata/rules/scan | 370 ----- pkg/sql/opt/xform/testdata/rules/select | 1509 ++++++++++++++++-- 4 files changed, 1891 insertions(+), 1864 deletions(-) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 0395f8e412c8..fc346e296a8a 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -57,16 +57,6 @@ CREATE TABLE zz ( ) ---- -exec-ddl -CREATE TABLE zz_redundant ( - a INT8 PRIMARY KEY, - b INT8 NULL, - c INT8 NULL, - INDEX idx_u (b ASC, c ASC), - INDEX idx_v (b ASC, c ASC) -) ----- - exec-ddl CREATE TABLE large (m INT, n INT) ---- @@ -1631,6 +1621,228 @@ left-join (hash) └── filters └── a:1 = z:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] +# ----------------------------------------------------- +# CommuteSemiJoin +# ----------------------------------------------------- + +exec-ddl +CREATE TABLE def (d INT, e INT, f INT, PRIMARY KEY (d, e)); +---- + +exec-ddl +ALTER TABLE abc INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100, + "distinct_count": 100 + }, + { + "columns": ["b"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100, + "distinct_count": 100 + } +]' +---- + +exec-ddl +ALTER TABLE def INJECT STATISTICS '[ + { + "columns": ["d"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 10000, + "distinct_count": 10000 + }, + { + "columns": ["e"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 10000, + "distinct_count": 10000 + } +]' +---- + +# Test the CommuteSemiJoinRule creates an appropriate inner join. +opt +SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=f) +---- +semi-join (hash) + ├── columns: a:1 b:2 c:3 + ├── scan abc + │ └── columns: a:1 b:2 c:3 + ├── scan def + │ └── columns: f:8 + └── filters + └── a:1 = f:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] + +# Test that we don't commute a SemiJoin when the On conditions are not +# equalities. For example, in this test we have a Lt condition. +opt expect-not=CommuteSemiJoin +SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a < e) +---- +semi-join (cross) + ├── columns: a:1 b:2 c:3 + ├── scan abc + │ └── columns: a:1 b:2 c:3 + ├── scan def + │ └── columns: e:7!null + └── filters + └── a:1 < e:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ])] + +# Test that we don't commute a SemiJoin when the On conditions are not +# equalities. For example, in this test we have an Or condition. +opt expect-not=CommuteSemiJoin +SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR c=e) +---- +semi-join (cross) + ├── columns: a:1 b:2 c:3 + ├── scan abc + │ └── columns: a:1 b:2 c:3 + ├── scan def + │ ├── columns: d:6!null e:7!null + │ └── key: (6,7) + └── filters + └── (a:1 = d:6) OR (c:3 = e:7) [outer=(1,3,6,7)] + +opt disable=CommuteSemiJoin format=show-all +SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d AND c=e) +---- +semi-join (lookup def) + ├── columns: a:1(int) b:2(int) c:3(int) + ├── key columns: [1 3] = [6 7] + ├── lookup columns are key + ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0] + ├── cost: 511.050394 + ├── prune: (2) + ├── interesting orderings: (+1,+2) (+2,+3) + ├── scan t.public.abc + │ ├── columns: t.public.abc.a:1(int) t.public.abc.b:2(int) t.public.abc.c:3(int) + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=1] + │ ├── cost: 111.02 + │ ├── prune: (1-3) + │ ├── interesting orderings: (+1,+2) (+2,+3) + │ └── unfiltered-cols: (1-5) + └── filters (true) + +# TODO(rytaft): See stats/join tests. Since we don't collect the stats properly +# for SemiJoins, we prefer the InnerJoin plan over the SemiJoin one more times +# than necessary. +opt format=show-all +SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d AND c=e) +---- +semi-join (lookup def) + ├── columns: a:1(int) b:2(int) c:3(int) + ├── key columns: [1 3] = [6 7] + ├── lookup columns are key + ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0] + ├── cost: 511.050394 + ├── prune: (2) + ├── interesting orderings: (+1,+2) (+2,+3) + ├── scan t.public.abc + │ ├── columns: t.public.abc.a:1(int) t.public.abc.b:2(int) t.public.abc.c:3(int) + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=1] + │ ├── cost: 111.02 + │ ├── prune: (1-3) + │ ├── interesting orderings: (+1,+2) (+2,+3) + │ └── unfiltered-cols: (1-5) + └── filters (true) + +exec-ddl +CREATE TABLE customers (id INT PRIMARY KEY, name STRING) +---- + +exec-ddl +CREATE TABLE orders (id INT PRIMARY KEY, cust_id INT REFERENCES customers (id), order_date DATE, INDEX (order_date) STORING (cust_id)) +---- + +exec-ddl +ALTER TABLE customers INJECT STATISTICS '[ + { + "columns": ["id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 100000, + "distinct_count": 100000 + } +]' +---- + +exec-ddl +ALTER TABLE orders INJECT STATISTICS '[ + { + "columns": ["id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000000, + "distinct_count": 1000000 + }, + { + "columns": ["cust_id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000000, + "distinct_count": 10000000 + }, + { + "columns": ["order_date"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000000, + "distinct_count": 1000000 + } +]' +---- + +opt disable=CommuteSemiJoin +SELECT * +FROM customers c +WHERE EXISTS(SELECT * FROM orders o WHERE o.cust_id=c.id AND o.order_date='2019-01-01') +---- +semi-join (merge) + ├── columns: id:1!null name:2 + ├── left ordering: +1 + ├── right ordering: +5 + ├── key: (1) + ├── fd: (1)-->(2) + ├── scan c + │ ├── columns: c.id:1!null name:2 + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── ordering: +1 + ├── sort + │ ├── columns: cust_id:5 order_date:6!null + │ ├── fd: ()-->(6) + │ ├── ordering: +5 opt(6) [actual: +5] + │ └── scan o@secondary + │ ├── columns: cust_id:5 order_date:6!null + │ ├── constraint: /6/4: [/'2019-01-01' - /'2019-01-01'] + │ └── fd: ()-->(6) + └── filters (true) + +# The CommuteSemiJoin rule allows a much better plan because we can use +# a lookup join. +opt +SELECT * +FROM customers c +WHERE EXISTS(SELECT * FROM orders o WHERE o.cust_id=c.id AND o.order_date='2019-01-01') +---- +project + ├── columns: id:1!null name:2 + ├── key: (1) + ├── fd: (1)-->(2) + └── inner-join (lookup customers) + ├── columns: c.id:1!null name:2 cust_id:5!null + ├── key columns: [5] = [1] + ├── lookup columns are key + ├── key: (5) + ├── fd: (1)-->(2), (1)==(5), (5)==(1) + ├── distinct-on + │ ├── columns: cust_id:5 + │ ├── grouping columns: cust_id:5 + │ ├── key: (5) + │ └── scan o@secondary + │ ├── columns: cust_id:5 order_date:6!null + │ ├── constraint: /6/4: [/'2019-01-01' - /'2019-01-01'] + │ └── fd: ()-->(6) + └── filters (true) + # -------------------------------------------------- # GenerateMergeJoins # -------------------------------------------------- @@ -1658,14 +1870,14 @@ memo (optimized, ~12KB, required=[presentation: a:1,b:2,c:3,x:6,y:7,z:8]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) (merge-join G2 G3 G5 inner-join,+1,+6) (lookup-join G2 G5 xyz@xy,keyCols=[1],outCols=(1-3,6-8)) (merge-join G3 G2 G5 inner-join,+6,+1) (lookup-join G3 G5 abc@ab,keyCols=[6],outCols=(1-3,6-8)) │ └── [presentation: a:1,b:2,c:3,x:6,y:7,z:8] │ ├── best: (merge-join G2="[ordering: +1]" G3="[ordering: +6]" G5 inner-join,+1,+6) - │ └── cost: 2178.05 + │ └── cost: 1197.05 ├── G2: (scan abc,cols=(1-3)) (scan abc@ab,cols=(1-3)) (scan abc@bc,cols=(1-3)) │ ├── [ordering: +1] │ │ ├── best: (scan abc@ab,cols=(1-3)) - │ │ └── cost: 1074.02 + │ │ └── cost: 111.02 │ └── [] │ ├── best: (scan abc,cols=(1-3)) - │ └── cost: 1074.02 + │ └── cost: 111.02 ├── G3: (scan xyz,cols=(6-8)) (scan xyz@xy,cols=(6-8)) (scan xyz@yz,cols=(6-8)) │ ├── [ordering: +6] │ │ ├── best: (scan xyz@xy,cols=(6-8)) @@ -1687,11 +1899,11 @@ memo (optimized, ~8KB, required=[presentation: a:1,b:2,c:3,x:6,y:7,z:8]) ├── G1: (inner-join G2 G3 G4) │ └── [presentation: a:1,b:2,c:3,x:6,y:7,z:8] │ ├── best: (inner-join G2 G3 G4) - │ └── cost: 2188.06 + │ └── cost: 1204.81 ├── G2: (scan abc,cols=(1-3)) (scan abc@ab,cols=(1-3)) (scan abc@bc,cols=(1-3)) │ └── [] │ ├── best: (scan abc,cols=(1-3)) - │ └── cost: 1074.02 + │ └── cost: 111.02 ├── G3: (scan xyz,cols=(6-8)) (scan xyz@xy,cols=(6-8)) (scan xyz@yz,cols=(6-8)) │ └── [] │ ├── best: (scan xyz,cols=(6-8)) @@ -1741,14 +1953,14 @@ SELECT * FROM abc JOIN xyz ON a=x AND b=y WHERE b=1 AND y=1 inner-join (hash) ├── columns: a:1!null b:2!null c:3 x:6!null y:7!null z:8 ├── fd: ()-->(2,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2) - ├── scan abc@bc - │ ├── columns: a:1 b:2!null c:3 - │ ├── constraint: /2/3/4: [/1 - /1] - │ └── fd: ()-->(2) ├── scan xyz@yz │ ├── columns: x:6 y:7!null z:8 │ ├── constraint: /7/8/9: [/1 - /1] │ └── fd: ()-->(7) + ├── scan abc@bc + │ ├── columns: a:1 b:2!null c:3 + │ ├── constraint: /2/3/4: [/1 - /1] + │ └── fd: ()-->(2) └── filters ├── a:1 = x:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] └── b:2 = y:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] @@ -1968,11 +2180,11 @@ memo (optimized, ~13KB, required=[presentation: a:1,b:2,c:3,x:6,y:7,z:8]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) │ └── [presentation: a:1,b:2,c:3,x:6,y:7,z:8] │ ├── best: (inner-join G3 G2 G4) - │ └── cost: 2169.68 + │ └── cost: 1208.58 ├── G2: (select G5 G6) (select G7 G6) (select G8 G6) │ └── [] - │ ├── best: (select G8 G6) - │ └── cost: 1073.23 + │ ├── best: (select G7 G6) + │ └── cost: 112.03 ├── G3: (scan xyz,cols=(6-8)) (scan xyz@xy,cols=(6-8)) (scan xyz@yz,cols=(6-8)) │ └── [] │ ├── best: (scan xyz,cols=(6-8)) @@ -1981,16 +2193,16 @@ memo (optimized, ~13KB, required=[presentation: a:1,b:2,c:3,x:6,y:7,z:8]) ├── G5: (scan abc,cols=(1-3)) (scan abc@ab,cols=(1-3)) (scan abc@bc,cols=(1-3)) │ └── [] │ ├── best: (scan abc,cols=(1-3)) - │ └── cost: 1074.02 + │ └── cost: 111.02 ├── G6: (filters G9) ├── G7: (scan abc@ab,cols=(1-3),constrained) │ └── [] │ ├── best: (scan abc@ab,cols=(1-3),constrained) - │ └── cost: 1074.01 + │ └── cost: 111.01 ├── G8: (scan abc@bc,cols=(1-3),constrained) │ └── [] │ ├── best: (scan abc@bc,cols=(1-3),constrained) - │ └── cost: 1063.31 + │ └── cost: 111.01 ├── G9: (eq G10 G11) ├── G10: (variable a) └── G11: (variable b) @@ -2005,12 +2217,12 @@ SELECT * FROM abc JOIN kfloat ON a=k memo (optimized, ~10KB, required=[presentation: a:1,b:2,c:3,k:6]) ├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4) │ └── [presentation: a:1,b:2,c:3,k:6] - │ ├── best: (inner-join G2 G3 G4) - │ └── cost: 12128.06 + │ ├── best: (inner-join G3 G2 G4) + │ └── cost: 2149.31 ├── G2: (scan abc,cols=(1-3)) (scan abc@ab,cols=(1-3)) (scan abc@bc,cols=(1-3)) │ └── [] │ ├── best: (scan abc,cols=(1-3)) - │ └── cost: 1074.02 + │ └── cost: 111.02 ├── G3: (scan kfloat,cols=(6)) │ └── [] │ ├── best: (scan kfloat,cols=(6)) @@ -4459,889 +4671,283 @@ project └── filters └── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable, constraints=(/10: (/NULL - ]; /16: (/NULL - ])] -# -------------------------------------------------- -# GenerateZigzagJoins -# -------------------------------------------------- - -# Simple zigzag case - where all requested columns are in the indexes being -# joined. -opt -SELECT q,r FROM pqr WHERE q = 1 AND r = 2 ----- -inner-join (zigzag pqr@q pqr@r) - ├── columns: q:2!null r:3!null - ├── eq columns: [1] = [1] - ├── left fixed columns: [2] = [1] - ├── right fixed columns: [3] = [2] - ├── fd: ()-->(2,3) - └── filters - ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] - └── r:3 = 2 [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] - -opt -SELECT q,r FROM pqr WHERE q = 1 AND r IS NULL ----- -inner-join (zigzag pqr@q pqr@r) - ├── columns: q:2!null r:3 - ├── eq columns: [1] = [1] - ├── left fixed columns: [2] = [1] - ├── right fixed columns: [3] = [NULL] - ├── fd: ()-->(2,3) - └── filters - ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] - └── r:3 IS NULL [outer=(3), constraints=(/3: [/NULL - /NULL]; tight), fd=()-->(3)] +# ----------------------------------------------------- +# ConvertSemiToInnerJoin +# ----------------------------------------------------- -memo -SELECT q,r FROM pqr WHERE q = 1 AND r = 2 ----- -memo (optimized, ~13KB, required=[presentation: q:2,r:3]) - ├── G1: (select G2 G3) (zigzag-join G3 pqr@q pqr@r) (select G4 G5) (select G6 G7) (select G8 G7) - │ └── [presentation: q:2,r:3] - │ ├── best: (zigzag-join G3 pqr@q pqr@r) - │ └── cost: 1.93 - ├── G2: (scan pqr,cols=(2,3)) - │ └── [] - │ ├── best: (scan pqr,cols=(2,3)) - │ └── cost: 1074.02 - ├── G3: (filters G9 G10) - ├── G4: (index-join G11 pqr,cols=(2,3)) - │ └── [] - │ ├── best: (index-join G11 pqr,cols=(2,3)) - │ └── cost: 75.12 - ├── G5: (filters G10) - ├── G6: (index-join G12 pqr,cols=(2,3)) - │ └── [] - │ ├── best: (index-join G12 pqr,cols=(2,3)) - │ └── cost: 75.12 - ├── G7: (filters G9) - ├── G8: (index-join G13 pqr,cols=(2,3)) - │ └── [] - │ ├── best: (index-join G13 pqr,cols=(2,3)) - │ └── cost: 75.22 - ├── G9: (eq G14 G15) - ├── G10: (eq G16 G17) - ├── G11: (scan pqr@q,cols=(1,2),constrained) - │ └── [] - │ ├── best: (scan pqr@q,cols=(1,2),constrained) - │ └── cost: 14.41 - ├── G12: (scan pqr@r,cols=(1,3),constrained) - │ └── [] - │ ├── best: (scan pqr@r,cols=(1,3),constrained) - │ └── cost: 14.41 - ├── G13: (scan pqr@rs,cols=(1,3),constrained) - │ └── [] - │ ├── best: (scan pqr@rs,cols=(1,3),constrained) - │ └── cost: 14.51 - ├── G14: (variable q) - ├── G15: (const 1) - ├── G16: (variable r) - └── G17: (const 2) - -# Case where the fixed columns are extracted from a complicated expression. -opt -SELECT q,r FROM pqr WHERE q = 1 AND ((r < 1 AND r > 1) OR (r >= 2 AND r <= 2)) ----- -inner-join (zigzag pqr@q pqr@r) - ├── columns: q:2!null r:3!null - ├── eq columns: [1] = [1] - ├── left fixed columns: [2] = [1] - ├── right fixed columns: [3] = [2] - ├── fd: ()-->(2,3) - └── filters - ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] - └── ((r:3 < 1) AND (r:3 > 1)) OR ((r:3 >= 2) AND (r:3 <= 2)) [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] - -# Nested zigzag case - zigzag join needs to be wrapped in a lookup join to -# satisfy required columns. -opt -SELECT q,r,s FROM pqr WHERE q = 1 AND r = 2 ----- -inner-join (lookup pqr) - ├── columns: q:2!null r:3!null s:4 - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── fd: ()-->(2,3) - ├── inner-join (zigzag pqr@q pqr@r) - │ ├── columns: p:1!null q:2!null r:3!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = [1] - │ ├── right fixed columns: [3] = [2] - │ ├── fd: ()-->(2,3) - │ └── filters - │ ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] - │ └── r:3 = 2 [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] - └── filters (true) - -memo -SELECT q,r,s FROM pqr WHERE q = 1 AND r = 2 ----- -memo (optimized, ~15KB, required=[presentation: q:2,r:3,s:4]) - ├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) (select G6 G7) (select G8 G9) (select G10 G9) - │ └── [presentation: q:2,r:3,s:4] - │ ├── best: (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) - │ └── cost: 7.48 - ├── G2: (scan pqr,cols=(2-4)) - │ └── [] - │ ├── best: (scan pqr,cols=(2-4)) - │ └── cost: 1084.02 - ├── G3: (filters G11 G12) - ├── G4: (zigzag-join G3 pqr@q pqr@r) - │ └── [] - │ ├── best: (zigzag-join G3 pqr@q pqr@r) - │ └── cost: 1.94 - ├── G5: (filters) - ├── G6: (index-join G13 pqr,cols=(2-4)) - │ └── [] - │ ├── best: (index-join G13 pqr,cols=(2-4)) - │ └── cost: 75.22 - ├── G7: (filters G12) - ├── G8: (index-join G14 pqr,cols=(2-4)) - │ └── [] - │ ├── best: (index-join G14 pqr,cols=(2-4)) - │ └── cost: 75.22 - ├── G9: (filters G11) - ├── G10: (index-join G15 pqr,cols=(2-4)) - │ └── [] - │ ├── best: (index-join G15 pqr,cols=(2-4)) - │ └── cost: 75.32 - ├── G11: (eq G16 G17) - ├── G12: (eq G18 G19) - ├── G13: (scan pqr@q,cols=(1,2),constrained) - │ └── [] - │ ├── best: (scan pqr@q,cols=(1,2),constrained) - │ └── cost: 14.41 - ├── G14: (scan pqr@r,cols=(1,3),constrained) - │ └── [] - │ ├── best: (scan pqr@r,cols=(1,3),constrained) - │ └── cost: 14.41 - ├── G15: (scan pqr@rs,cols=(1,3,4),constrained) - │ └── [] - │ ├── best: (scan pqr@rs,cols=(1,3,4),constrained) - │ └── cost: 14.61 - ├── G16: (variable q) - ├── G17: (const 1) - ├── G18: (variable r) - └── G19: (const 2) - -# Zigzag with fixed columns of different types. -opt -SELECT q,s FROM pqr WHERE q = 1 AND s = 'foo' ----- -inner-join (zigzag pqr@q pqr@s) - ├── columns: q:2!null s:4!null - ├── eq columns: [1] = [1] - ├── left fixed columns: [2] = [1] - ├── right fixed columns: [4] = ['foo'] - ├── fd: ()-->(2,4) - └── filters - ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] - └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] - -memo -SELECT q,s FROM pqr WHERE q = 1 AND s = 'foo' ----- -memo (optimized, ~11KB, required=[presentation: q:2,s:4]) - ├── G1: (select G2 G3) (zigzag-join G3 pqr@q pqr@s) (select G4 G5) (select G6 G7) - │ └── [presentation: q:2,s:4] - │ ├── best: (zigzag-join G3 pqr@q pqr@s) - │ └── cost: 1.94 - ├── G2: (scan pqr,cols=(2,4)) - │ └── [] - │ ├── best: (scan pqr,cols=(2,4)) - │ └── cost: 1074.02 - ├── G3: (filters G8 G9) - ├── G4: (index-join G10 pqr,cols=(2,4)) - │ └── [] - │ ├── best: (index-join G10 pqr,cols=(2,4)) - │ └── cost: 75.12 - ├── G5: (filters G9) - ├── G6: (index-join G11 pqr,cols=(2,4)) - │ └── [] - │ ├── best: (index-join G11 pqr,cols=(2,4)) - │ └── cost: 75.22 - ├── G7: (filters G8) - ├── G8: (eq G12 G13) - ├── G9: (eq G14 G15) - ├── G10: (scan pqr@q,cols=(1,2),constrained) - │ └── [] - │ ├── best: (scan pqr@q,cols=(1,2),constrained) - │ └── cost: 14.41 - ├── G11: (scan pqr@s,cols=(1,4),constrained) - │ └── [] - │ ├── best: (scan pqr@s,cols=(1,4),constrained) - │ └── cost: 14.51 - ├── G12: (variable q) - ├── G13: (const 1) - ├── G14: (variable s) - └── G15: (const 'foo') - -# Zigzag with implicit equality column in addition to primary key: -# indexes on (r,s) and (t,s) should be chosen even though s is not being fixed -# in the ON clause. -opt -SELECT r,t FROM pqr WHERE r = 1 AND t = 'foo' ----- -inner-join (zigzag pqr@rs pqr@ts) - ├── columns: r:3!null t:5!null - ├── eq columns: [4 1] = [4 1] - ├── left fixed columns: [3] = [1] - ├── right fixed columns: [5] = ['foo'] - ├── fd: ()-->(3,5) - └── filters - ├── r:3 = 1 [outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] - └── t:5 = 'foo' [outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] - -memo -SELECT r,t FROM pqr WHERE r = 1 AND t = 'foo' ----- -memo (optimized, ~13KB, required=[presentation: r:3,t:5]) - ├── G1: (select G2 G3) (zigzag-join G3 pqr@rs pqr@ts) (select G4 G5) (select G6 G5) (select G7 G8) - │ └── [presentation: r:3,t:5] - │ ├── best: (zigzag-join G3 pqr@rs pqr@ts) - │ └── cost: 1.95 - ├── G2: (scan pqr,cols=(3,5)) - │ └── [] - │ ├── best: (scan pqr,cols=(3,5)) - │ └── cost: 1074.02 - ├── G3: (filters G9 G10) - ├── G4: (index-join G11 pqr,cols=(3,5)) - │ └── [] - │ ├── best: (index-join G11 pqr,cols=(3,5)) - │ └── cost: 75.12 - ├── G5: (filters G10) - ├── G6: (index-join G12 pqr,cols=(3,5)) - │ └── [] - │ ├── best: (index-join G12 pqr,cols=(3,5)) - │ └── cost: 75.22 - ├── G7: (index-join G13 pqr,cols=(3,5)) - │ └── [] - │ ├── best: (index-join G13 pqr,cols=(3,5)) - │ └── cost: 75.22 - ├── G8: (filters G9) - ├── G9: (eq G14 G15) - ├── G10: (eq G16 G17) - ├── G11: (scan pqr@r,cols=(1,3),constrained) - │ └── [] - │ ├── best: (scan pqr@r,cols=(1,3),constrained) - │ └── cost: 14.41 - ├── G12: (scan pqr@rs,cols=(1,3),constrained) - │ └── [] - │ ├── best: (scan pqr@rs,cols=(1,3),constrained) - │ └── cost: 14.51 - ├── G13: (scan pqr@ts,cols=(1,5),constrained) - │ └── [] - │ ├── best: (scan pqr@ts,cols=(1,5),constrained) - │ └── cost: 14.51 - ├── G14: (variable r) - ├── G15: (const 1) - ├── G16: (variable t) - └── G17: (const 'foo') - -# Zigzag with choice between indexes for multiple equality predicates. -opt -SELECT p,q,r,s FROM pqr WHERE q = 1 AND r = 1 AND s = 'foo' ----- -inner-join (zigzag pqr@q pqr@s) - ├── columns: p:1!null q:2!null r:3!null s:4!null - ├── eq columns: [1] = [1] - ├── left fixed columns: [2] = [1] - ├── right fixed columns: [4] = ['foo'] - ├── key: (1) - ├── fd: ()-->(2-4) - └── filters - ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] - ├── r:3 = 1 [outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] - └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] - -# Tests for zigzag joins over partial indexes. - -exec-ddl -CREATE TABLE zz_partial ( - k INT PRIMARY KEY, - i INT, - j INT, - b1 BOOL, - b2 BOOL, - s STRING -) ----- - -exec-ddl -CREATE INDEX i ON zz_partial (i) WHERE b1 ----- - -exec-ddl -CREATE INDEX j ON zz_partial (j) WHERE b2 ----- - -# Generate a zigzag join over two partial indexes. -opt expect=GenerateZigzagJoins -SELECT k FROM zz_partial WHERE i = 10 AND b1 AND j = 20 AND b2 +# This rule applies when the On conditions are not equalities. For example, +# in this test we have a Lt condition. It allows us to use a lookup join even +# though the index is not covering. +opt expect=ConvertSemiToInnerJoin +SELECT * from pqr WHERE EXISTS (SELECT * FROM zz WHERE a = 0 AND q = b AND r < c) ---- project - ├── columns: k:1!null + ├── columns: p:1!null q:2 r:3 s:4 t:5 ├── key: (1) - └── inner-join (lookup zz_partial) - ├── columns: k:1!null i:2!null j:3!null b1:4!null b2:5!null - ├── key columns: [1] = [1] - ├── lookup columns are key + ├── fd: (1)-->(2-5) + └── project + ├── columns: p:1!null q:2!null r:3!null s:4 t:5 ├── key: (1) - ├── fd: ()-->(2-5) - ├── inner-join (zigzag zz_partial@i zz_partial@j) - │ ├── columns: k:1!null i:2!null j:3!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = [10] - │ ├── right fixed columns: [3] = [20] - │ ├── fd: ()-->(2,3) - │ └── filters - │ ├── i:2 = 10 [outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] - │ └── j:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] - └── filters (true) - -# Don't generate a zigzag join when the first index predicate is not implied. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE i = 10 AND j = 20 AND b2 ----- -project - └── select - ├── index-join zz_partial - │ └── select - │ ├── scan zz_partial@j,partial - │ └── filters - │ └── j = 20 - └── filters - └── i = 10 - -# Don't generate a zigzag join when the second index predicate is not implied. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE i = 10 AND b1 AND j = 20 ----- -project - └── select - ├── index-join zz_partial - │ └── select - │ ├── scan zz_partial@i,partial - │ └── filters - │ └── i = 10 - └── filters - └── j = 20 - -exec-ddl -DROP INDEX j ----- - -exec-ddl -CREATE INDEX j ON zz_partial (j) ----- + ├── fd: ()-->(2), (1)-->(3-5) + └── inner-join (lookup pqr) + ├── columns: p:1!null q:2!null r:3!null s:4 t:5 a:7!null b:8!null c:9!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: ()-->(2,7-9), (1)-->(3-5), (2)==(8), (8)==(2) + ├── inner-join (lookup pqr@q) + │ ├── columns: p:1!null q:2!null a:7!null b:8!null c:9 + │ ├── key columns: [8] = [2] + │ ├── key: (1) + │ ├── fd: ()-->(2,7-9), (2)==(8), (8)==(2) + │ ├── scan zz + │ │ ├── columns: a:7!null b:8 c:9 + │ │ ├── constraint: /7: [/0 - /0] + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ └── fd: ()-->(7-9) + │ └── filters (true) + └── filters + └── r:3 < c:9 [outer=(3,9), constraints=(/3: (/NULL - ]; /9: (/NULL - ])] -# Generate a zigzag join over one partial and one non-partial index. -opt expect=GenerateZigzagJoins -SELECT k FROM zz_partial WHERE i = 10 AND b1 AND j = 20 +# In this test we have an Or condition. +opt expect=ConvertSemiToInnerJoin +SELECT * from pqr WHERE EXISTS (SELECT * FROM zz WHERE a = 0 AND q = b AND (p = a OR r = c)) ---- project - ├── columns: k:1!null + ├── columns: p:1!null q:2 r:3 s:4 t:5 ├── key: (1) - └── inner-join (lookup zz_partial) - ├── columns: k:1!null i:2!null j:3!null b1:4!null - ├── key columns: [1] = [1] - ├── lookup columns are key + ├── fd: (1)-->(2-5) + └── project + ├── columns: p:1!null q:2!null r:3 s:4 t:5 ├── key: (1) - ├── fd: ()-->(2-4) - ├── inner-join (zigzag zz_partial@i zz_partial@j) - │ ├── columns: k:1!null i:2!null j:3!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = [10] - │ ├── right fixed columns: [3] = [20] - │ ├── fd: ()-->(2,3) - │ └── filters - │ ├── i:2 = 10 [outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] - │ └── j:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] - └── filters (true) - -# Don't generate a zigzag join when the partial index predicate is not implied. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE i = 10 AND j = 20 ----- -project - └── select - ├── index-join zz_partial - │ └── scan zz_partial@j - │ └── constraint: /3/1: [/20 - /20] - └── filters - └── i = 10 - -exec-ddl -DROP INDEX i ----- - -exec-ddl -DROP INDEX j ----- - -exec-ddl -CREATE INDEX i ON zz_partial (i) WHERE i = 10 ----- - -exec-ddl -CREATE INDEX j ON zz_partial (j) ----- + ├── fd: ()-->(2), (1)-->(3-5) + └── inner-join (lookup pqr) + ├── columns: p:1!null q:2!null r:3 s:4 t:5 a:7!null b:8!null c:9 + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: ()-->(2,7-9), (1)-->(3-5), (2)==(8), (8)==(2) + ├── inner-join (lookup pqr@q) + │ ├── columns: p:1!null q:2!null a:7!null b:8!null c:9 + │ ├── key columns: [8] = [2] + │ ├── key: (1) + │ ├── fd: ()-->(2,7-9), (2)==(8), (8)==(2) + │ ├── scan zz + │ │ ├── columns: a:7!null b:8 c:9 + │ │ ├── constraint: /7: [/0 - /0] + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ └── fd: ()-->(7-9) + │ └── filters (true) + └── filters + └── (p:1 = 0) OR (r:3 = c:9) [outer=(1,3,9)] -# Don't generate a zigzag join when the expression that fixes the left columns -# is removed during partial index implication of the left index. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE i = 10 AND j = 20 +# In this case we need to add the key back to zz since it was pruned during +# normalization. +opt expect=ConvertSemiToInnerJoin +SELECT b, c from zz WHERE EXISTS (SELECT * FROM pqr WHERE p = 0 AND q = b AND (p = c OR r = c)) ---- project - └── select - ├── index-join zz_partial - │ └── scan zz_partial@i,partial - └── filters - └── j = 20 + ├── columns: b:2 c:3 + ├── lax-key: (2,3) + ├── fd: (3)~~>(2) + └── project + ├── columns: a:1!null b:2!null c:3!null + ├── key: (1) + ├── fd: ()-->(2), (1)-->(3), (3)-->(1) + └── inner-join (lookup zz) + ├── columns: a:1!null b:2!null c:3!null p:5!null q:6!null r:7 + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: ()-->(2,5-7), (1)-->(3), (3)-->(1), (2)==(6), (6)==(2) + ├── inner-join (lookup zz@idx_b) + │ ├── columns: a:1!null b:2!null p:5!null q:6!null r:7 + │ ├── key columns: [6] = [2] + │ ├── key: (1) + │ ├── fd: ()-->(2,5-7), (2)==(6), (6)==(2) + │ ├── scan pqr + │ │ ├── columns: p:5!null q:6 r:7 + │ │ ├── constraint: /5: [/0 - /0] + │ │ ├── cardinality: [0 - 1] + │ │ ├── key: () + │ │ └── fd: ()-->(5-7) + │ └── filters (true) + └── filters + └── (c:3 = 0) OR (r:7 = c:3) [outer=(3,7), constraints=(/3: (/NULL - ])] -exec-ddl -DROP INDEX i ----- +# -------------------------------------------------- +# PushJoinIntoIndexJoin +# -------------------------------------------------- -exec-ddl -DROP INDEX j +opt expect=PushJoinIntoIndexJoin +SELECT * FROM abc INNER JOIN (SELECT * FROM pqr ORDER BY q LIMIT 5) ON a=q ---- +inner-join (lookup pqr) + ├── columns: a:1!null b:2 c:3 p:6!null q:7!null r:8 s:9 t:10 + ├── key columns: [6] = [6] + ├── lookup columns are key + ├── fd: (6)-->(7-10), (1)==(7), (7)==(1) + ├── inner-join (lookup abc@ab) + │ ├── columns: a:1!null b:2 c:3 p:6!null q:7!null + │ ├── key columns: [7] = [1] + │ ├── fd: (6)-->(7), (1)==(7), (7)==(1) + │ ├── scan pqr@q + │ │ ├── columns: p:6!null q:7 + │ │ ├── limit: 5 + │ │ ├── key: (6) + │ │ └── fd: (6)-->(7) + │ └── filters (true) + └── filters (true) -exec-ddl -CREATE INDEX i ON zz_partial (i) +# Cross join case. The plan produced by PushJoinIntoIndexJoin isn't chosen +# because the cross join doesn't filter rows. +opt expect=PushJoinIntoIndexJoin +SELECT * FROM abc CROSS JOIN (SELECT * FROM pqr ORDER BY q LIMIT 5) ---- +inner-join (cross) + ├── columns: a:1 b:2 c:3 p:6!null q:7 r:8 s:9 t:10 + ├── fd: (6)-->(7-10) + ├── scan abc + │ └── columns: a:1 b:2 c:3 + ├── index-join pqr + │ ├── columns: p:6!null q:7 r:8 s:9 t:10 + │ ├── cardinality: [0 - 5] + │ ├── key: (6) + │ ├── fd: (6)-->(7-10) + │ └── scan pqr@q + │ ├── columns: p:6!null q:7 + │ ├── limit: 5 + │ ├── key: (6) + │ └── fd: (6)-->(7) + └── filters (true) -exec-ddl -CREATE INDEX j ON zz_partial (j) WHERE j = 20 +# No-op case because the index join is the right input of the LeftJoin. +opt expect-not=PushJoinIntoIndexJoin +SELECT * FROM abc LEFT JOIN (SELECT * FROM pqr ORDER BY q LIMIT 5) ON a=q ---- +left-join (merge) + ├── columns: a:1 b:2 c:3 p:6 q:7 r:8 s:9 t:10 + ├── left ordering: +1 + ├── right ordering: +7 + ├── fd: (6)-->(7-10) + ├── scan abc@ab + │ ├── columns: a:1 b:2 c:3 + │ └── ordering: +1 + ├── index-join pqr + │ ├── columns: p:6!null q:7 r:8 s:9 t:10 + │ ├── cardinality: [0 - 5] + │ ├── key: (6) + │ ├── fd: (6)-->(7-10) + │ ├── ordering: +7 + │ └── scan pqr@q + │ ├── columns: p:6!null q:7 + │ ├── limit: 5 + │ ├── key: (6) + │ ├── fd: (6)-->(7) + │ └── ordering: +7 + └── filters (true) -# Don't generate a zigzag join when the expression that fixes the right columns -# is removed during partial index implication of the right index. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE i = 10 AND j = 20 ----- -project - └── select - ├── index-join zz_partial - │ └── scan zz_partial@j,partial - └── filters - └── i = 10 - -exec-ddl -DROP INDEX i ----- - -exec-ddl -DROP INDEX j ----- - -exec-ddl -CREATE INDEX zz_partial_s ON zz_partial (s) ----- - -exec-ddl -CREATE INDEX j ON zz_partial (j) WHERE s = 'foo' ----- - -# Don't generate a zigzag join when the expression that fixes the left columns -# is removed during partial index implication of the right index. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE s = 'foo' AND j = 20 ----- -project - └── scan zz_partial@j,partial - └── constraint: /3/1: [/20 - /20] - -exec-ddl -DROP INDEX zz_partial_s ----- - -exec-ddl -DROP INDEX j ----- - -exec-ddl -CREATE INDEX i ON zz_partial (i) WHERE s = 'foo' ----- - -exec-ddl -CREATE INDEX zz_partial_s ON zz_partial (s) ----- - -# Don't generate a zigzag join when the expression that fixes the right columns -# is removed during partial index implication of the left index. -opt expect-not=GenerateZigzagJoins format=hide-all -SELECT k FROM zz_partial WHERE i = 10 AND s = 'foo' ----- -project - └── scan zz_partial@i,partial - └── constraint: /2/1: [/10 - /10] - -exec-ddl -DROP INDEX i ----- - -exec-ddl -DROP INDEX zz_partial_s ----- - -exec-ddl -CREATE INDEX i ON zz_partial (i) ----- - -exec-ddl -CREATE INDEX b1 ON zz_partial (b1) WHERE s = 'foo' ----- - -exec-ddl -CREATE INDEX j ON zz_partial (j) ----- - -# The filters should be reset during each iteration over the left and right -# indexes if they are reduced while proving partial index implication. In this -# test, (s = 'foo') must be applied after the zigzag join. -opt expect=GenerateZigzagJoins -SELECT k FROM zz_partial WHERE i = 10 AND j = 20 AND b1 AND s = 'foo' ----- -project - ├── columns: k:1!null - ├── key: (1) - └── inner-join (lookup zz_partial) - ├── columns: k:1!null i:2!null j:3!null b1:4!null s:6!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── key: (1) - ├── fd: ()-->(2-4,6) - ├── inner-join (zigzag zz_partial@i zz_partial@j) - │ ├── columns: k:1!null i:2!null j:3!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = [10] - │ ├── right fixed columns: [3] = [20] - │ ├── fd: ()-->(2,3) - │ └── filters - │ ├── i:2 = 10 [outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] - │ └── j:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] - └── filters - ├── b1:4 [outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)] - └── s:6 = 'foo' [outer=(6), constraints=(/6: [/'foo' - /'foo']; tight), fd=()-->(6)] - -# Don't generate a zigzag which has the PK as its equality columns against -# nullable unique indexes where the primary key is not part of the indexed -# columns. - -# Regression test for #36051: prior to fixing this, we would try to use the PK -# as the equality column here, but it's not actually part of the key so we -# can't zigzag on it. -opt -SELECT * FROM zz WHERE b IS NULL AND c = 2 ----- -select - ├── columns: a:1!null b:2 c:3!null - ├── cardinality: [0 - 1] - ├── key: () - ├── fd: ()-->(1-3) - ├── index-join zz - │ ├── columns: a:1!null b:2 c:3 - │ ├── cardinality: [0 - 1] - │ ├── key: () - │ ├── fd: ()-->(1-3) - │ └── scan zz@idx_c - │ ├── columns: a:1!null c:3!null - │ ├── constraint: /3: [/2 - /2] - │ ├── cardinality: [0 - 1] - │ ├── key: () - │ └── fd: ()-->(1,3) - └── filters - └── b:2 IS NULL [outer=(2), constraints=(/2: [/NULL - /NULL]; tight), fd=()-->(2)] - -memo -SELECT p,q,r,s FROM pqr WHERE q = 1 AND r = 1 AND s = 'foo' ----- -memo (optimized, ~31KB, required=[presentation: p:1,q:2,r:3,s:4]) - ├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(1-4)) (zigzag-join G3 pqr@q pqr@s) (zigzag-join G3 pqr@q pqr@rs) (lookup-join G6 G7 pqr,keyCols=[1],outCols=(1-4)) (select G8 G9) (select G10 G11) (select G12 G7) (select G13 G7) - │ └── [presentation: p:1,q:2,r:3,s:4] - │ ├── best: (zigzag-join G3 pqr@q pqr@s) - │ └── cost: 1.95 - ├── G2: (scan pqr,cols=(1-4)) - │ └── [] - │ ├── best: (scan pqr,cols=(1-4)) - │ └── cost: 1094.02 - ├── G3: (filters G14 G15 G16) - ├── G4: (zigzag-join G17 pqr@q pqr@r) - │ └── [] - │ ├── best: (zigzag-join G17 pqr@q pqr@r) - │ └── cost: 1.94 - ├── G5: (filters G16) - ├── G6: (zigzag-join G9 pqr@r pqr@s) - │ └── [] - │ ├── best: (zigzag-join G9 pqr@r pqr@s) - │ └── cost: 1.95 - ├── G7: (filters G14) - ├── G8: (index-join G18 pqr,cols=(1-4)) - │ └── [] - │ ├── best: (index-join G18 pqr,cols=(1-4)) - │ └── cost: 75.22 - ├── G9: (filters G15 G16) - ├── G10: (index-join G19 pqr,cols=(1-4)) - │ └── [] - │ ├── best: (index-join G19 pqr,cols=(1-4)) - │ └── cost: 75.22 - ├── G11: (filters G14 G16) - ├── G12: (index-join G20 pqr,cols=(1-4)) - │ └── [] - │ ├── best: (index-join G20 pqr,cols=(1-4)) - │ └── cost: 21.76 - ├── G13: (index-join G21 pqr,cols=(1-4)) - │ └── [] - │ ├── best: (index-join G21 pqr,cols=(1-4)) - │ └── cost: 10.51 - ├── G14: (eq G22 G23) - ├── G15: (eq G24 G23) - ├── G16: (eq G25 G26) - ├── G17: (filters G14 G15) - ├── G18: (scan pqr@q,cols=(1,2),constrained) - │ └── [] - │ ├── best: (scan pqr@q,cols=(1,2),constrained) - │ └── cost: 14.41 - ├── G19: (scan pqr@r,cols=(1,3),constrained) - │ └── [] - │ ├── best: (scan pqr@r,cols=(1,3),constrained) - │ └── cost: 14.41 - ├── G20: (select G27 G28) - │ └── [] - │ ├── best: (select G27 G28) - │ └── cost: 14.73 - ├── G21: (scan pqr@rs,cols=(1,3,4),constrained) - │ └── [] - │ ├── best: (scan pqr@rs,cols=(1,3,4),constrained) - │ └── cost: 4.98 - ├── G22: (variable q) - ├── G23: (const 1) - ├── G24: (variable r) - ├── G25: (variable s) - ├── G26: (const 'foo') - ├── G27: (scan pqr@s,cols=(1,3,4),constrained) - │ └── [] - │ ├── best: (scan pqr@s,cols=(1,3,4),constrained) - │ └── cost: 14.61 - └── G28: (filters G15) - -# Zigzag joins cannot be planned for indexes where equality columns do not -# immediately follow fixed columns. Here, the only index on t is (t,s,p) and -# s is not a fixed or equal column, so a zigzag join shouldn't be planned. -opt -SELECT q,t FROM pqr WHERE q = 1 AND t = 'foo' +# No-op case because the InnerJoin has join hints. +opt expect-not=PushJoinIntoIndexJoin +SELECT * FROM (SELECT * FROM pqr ORDER BY q LIMIT 5) INNER HASH JOIN abc ON a=q ---- -select - ├── columns: q:2!null t:5!null - ├── fd: ()-->(2,5) +inner-join (hash) + ├── columns: p:1!null q:2!null r:3 s:4 t:5 a:7!null b:8 c:9 + ├── flags: force hash join (store right side) + ├── fd: (1)-->(2-5), (2)==(7), (7)==(2) ├── index-join pqr - │ ├── columns: q:2 t:5 - │ ├── fd: ()-->(2) + │ ├── columns: p:1!null q:2 r:3 s:4 t:5 + │ ├── cardinality: [0 - 5] + │ ├── key: (1) + │ ├── fd: (1)-->(2-5) │ └── scan pqr@q - │ ├── columns: p:1!null q:2!null - │ ├── constraint: /2/1: [/1 - /1] + │ ├── columns: p:1!null q:2 + │ ├── limit: 5 │ ├── key: (1) - │ └── fd: ()-->(2) + │ └── fd: (1)-->(2) + ├── scan abc + │ └── columns: a:7 b:8 c:9 └── filters - └── t:5 = 'foo' [outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] - -memo -SELECT q,t FROM pqr WHERE q = 1 AND t = 'foo' ----- -memo (optimized, ~9KB, required=[presentation: q:2,t:5]) - ├── G1: (select G2 G3) (select G4 G5) (select G6 G7) - │ └── [presentation: q:2,t:5] - │ ├── best: (select G4 G5) - │ └── cost: 75.24 - ├── G2: (scan pqr,cols=(2,5)) - │ └── [] - │ ├── best: (scan pqr,cols=(2,5)) - │ └── cost: 1074.02 - ├── G3: (filters G8 G9) - ├── G4: (index-join G10 pqr,cols=(2,5)) - │ └── [] - │ ├── best: (index-join G10 pqr,cols=(2,5)) - │ └── cost: 75.12 - ├── G5: (filters G9) - ├── G6: (index-join G11 pqr,cols=(2,5)) - │ └── [] - │ ├── best: (index-join G11 pqr,cols=(2,5)) - │ └── cost: 75.22 - ├── G7: (filters G8) - ├── G8: (eq G12 G13) - ├── G9: (eq G14 G15) - ├── G10: (scan pqr@q,cols=(1,2),constrained) - │ └── [] - │ ├── best: (scan pqr@q,cols=(1,2),constrained) - │ └── cost: 14.41 - ├── G11: (scan pqr@ts,cols=(1,5),constrained) - │ └── [] - │ ├── best: (scan pqr@ts,cols=(1,5),constrained) - │ └── cost: 14.51 - ├── G12: (variable q) - ├── G13: (const 1) - ├── G14: (variable t) - └── G15: (const 'foo') - -# Don't zigzag on two identical indexes. -memo -SELECT c FROM zz_redundant WHERE b = 1 ----- -memo (optimized, ~6KB, required=[presentation: c:3]) - ├── G1: (project G2 G3 c) - │ └── [presentation: c:3] - │ ├── best: (project G2 G3 c) - │ └── cost: 14.62 - ├── G2: (select G4 G5) (scan zz_redundant@idx_u,cols=(2,3),constrained) (scan zz_redundant@idx_v,cols=(2,3),constrained) - │ └── [] - │ ├── best: (scan zz_redundant@idx_u,cols=(2,3),constrained) - │ └── cost: 14.51 - ├── G3: (projections) - ├── G4: (scan zz_redundant,cols=(2,3)) (scan zz_redundant@idx_u,cols=(2,3)) (scan zz_redundant@idx_v,cols=(2,3)) - │ └── [] - │ ├── best: (scan zz_redundant,cols=(2,3)) - │ └── cost: 1054.02 - ├── G5: (filters G6) - ├── G6: (eq G7 G8) - ├── G7: (variable b) - └── G8: (const 1) + └── a:7 = q:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] -# GenerateZigzagJoins is disabled in the presence of a row-level locking clause. -opt -SELECT q,r FROM pqr WHERE q = 1 AND r = 2 FOR UPDATE +# No-op case because the ON condition references a column that doesn't come from +# the input of the index join or the right side of the InnerJoin. +opt expect-not=PushJoinIntoIndexJoin +SELECT * FROM (SELECT * FROM pqr ORDER BY q LIMIT 5) INNER JOIN abc ON a=r ---- -select - ├── columns: q:2!null r:3!null - ├── volatile - ├── fd: ()-->(2,3) +inner-join (lookup abc@ab) + ├── columns: p:1!null q:2 r:3!null s:4 t:5 a:7!null b:8 c:9 + ├── key columns: [3] = [7] + ├── fd: (1)-->(2-5), (3)==(7), (7)==(3) ├── index-join pqr - │ ├── columns: q:2 r:3 - │ ├── volatile - │ ├── fd: ()-->(2) + │ ├── columns: p:1!null q:2 r:3 s:4 t:5 + │ ├── cardinality: [0 - 5] + │ ├── key: (1) + │ ├── fd: (1)-->(2-5) │ └── scan pqr@q - │ ├── columns: p:1!null q:2!null - │ ├── constraint: /2/1: [/1 - /1] - │ ├── locking: for-update - │ ├── volatile + │ ├── columns: p:1!null q:2 + │ ├── limit: 5 │ ├── key: (1) - │ └── fd: ()-->(2) - └── filters - └── r:3 = 2 [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] - -# -------------------------------------------------- -# GenerateInvertedIndexZigzagJoins -# -------------------------------------------------- + │ └── fd: (1)-->(2) + └── filters (true) -exec-ddl -CREATE TABLE t5 ( - a INT PRIMARY KEY, - b JSONB, - c INT, - INVERTED INDEX b_idx(b) +# No-op case because the right side of the InnerJoin has outer columns. +opt expect-not=PushJoinIntoIndexJoin disable=(TryDecorrelateProject) +SELECT * +FROM stu +INNER JOIN LATERAL ( + SELECT * + FROM (SELECT * FROM pqr ORDER BY q LIMIT 5) + INNER JOIN (SELECT *, a*s FROM abc) + ON a=q ) +ON True ---- - -# One path. Should generate a scan constrained on the inverted index. -opt -SELECT b,a FROM t5 WHERE b @> '{"a":1}' ----- -index-join t5 - ├── columns: b:2!null a:1!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2) - └── scan t5@b_idx - ├── columns: a:1!null - ├── constraint: /2/1: [/'{"a": 1}' - /'{"a": 1}'] - └── key: (1) - -opt -SELECT b,a FROM t5 WHERE b @> '{"a":[[{"b":{"c":[{"d":"e"}]}}]]}' ----- -index-join t5 - ├── columns: b:2!null a:1!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2) - └── scan t5@b_idx - ├── columns: a:1!null - ├── constraint: /2/1: [/'{"a": [[{"b": {"c": [{"d": "e"}]}}]]}' - /'{"a": [[{"b": {"c": [{"d": "e"}]}}]]}'] - └── key: (1) - -# Two paths. Should generate a zigzag join. -opt -SELECT b,a FROM t5 WHERE b @> '{"a":1, "c":2}' ----- -inner-join (lookup t5) - ├── columns: b:2!null a:1!null - ├── key columns: [1] = [1] - ├── lookup columns are key +inner-join-apply + ├── columns: s:1!null t:2!null u:3!null p:5!null q:6!null r:7 s:8 t:9 a:11!null b:12 c:13 "?column?":16 ├── immutable - ├── key: (1) - ├── fd: (1)-->(2) - ├── inner-join (zigzag t5@b_idx t5@b_idx) - │ ├── columns: a:1!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = ['{"a": 1}'] - │ ├── right fixed columns: [2] = ['{"c": 2}'] + ├── fd: (1-3,5)-->(6-9), (1-3,11)-->(16), (6)==(11), (11)==(6) + ├── scan stu + │ ├── columns: stu.s:1!null stu.t:2!null u:3!null + │ └── key: (1-3) + ├── inner-join (merge) + │ ├── columns: p:5!null q:6!null r:7 pqr.s:8 pqr.t:9 a:11!null b:12 c:13 "?column?":16 + │ ├── left ordering: +6 + │ ├── right ordering: +11 + │ ├── outer: (1) + │ ├── immutable + │ ├── fd: (5)-->(6-9), (11)-->(16), (6)==(11), (11)==(6) + │ ├── index-join pqr + │ │ ├── columns: p:5!null q:6 r:7 pqr.s:8 pqr.t:9 + │ │ ├── cardinality: [0 - 5] + │ │ ├── key: (5) + │ │ ├── fd: (5)-->(6-9) + │ │ ├── ordering: +6 + │ │ └── scan pqr@q + │ │ ├── columns: p:5!null q:6 + │ │ ├── limit: 5 + │ │ ├── key: (5) + │ │ ├── fd: (5)-->(6) + │ │ └── ordering: +6 + │ ├── project + │ │ ├── columns: "?column?":16 a:11 b:12 c:13 + │ │ ├── outer: (1) + │ │ ├── immutable + │ │ ├── fd: (11)-->(16) + │ │ ├── ordering: +11 + │ │ ├── scan abc@ab + │ │ │ ├── columns: a:11 b:12 c:13 + │ │ │ └── ordering: +11 + │ │ └── projections + │ │ └── a:11 * stu.s:1 [as="?column?":16, outer=(1,11), immutable] │ └── filters (true) - └── filters - └── b:2 @> '{"a": 1, "c": 2}' [outer=(2), immutable, constraints=(/2: (/NULL - ])] - -memo -SELECT a FROM t5 WHERE b @> '{"a":1, "c":2}' ----- -memo (optimized, ~14KB, required=[presentation: a:1]) - ├── G1: (project G2 G3 a) - │ └── [presentation: a:1] - │ ├── best: (project G2 G3 a) - │ └── cost: 100.53 - ├── G2: (select G4 G5) (lookup-join G6 G5 t5,keyCols=[1],outCols=(1,2)) (select G7 G5) - │ └── [] - │ ├── best: (lookup-join G6 G5 t5,keyCols=[1],outCols=(1,2)) - │ └── cost: 100.40 - ├── G3: (projections) - ├── G4: (scan t5,cols=(1,2)) - │ └── [] - │ ├── best: (scan t5,cols=(1,2)) - │ └── cost: 1054.02 - ├── G5: (filters G8) - ├── G6: (zigzag-join G9 t5@b_idx t5@b_idx) - │ └── [] - │ ├── best: (zigzag-join G9 t5@b_idx t5@b_idx) - │ └── cost: 25.69 - ├── G7: (index-join G10 t5,cols=(1,2)) - │ └── [] - │ ├── best: (index-join G10 t5,cols=(1,2)) - │ └── cost: 782.82 - ├── G8: (contains G11 G12) - ├── G9: (filters) - ├── G10: (scan t5@b_idx,cols=(1),constrained) - │ └── [] - │ ├── best: (scan t5@b_idx,cols=(1),constrained) - │ └── cost: 117.31 - ├── G11: (variable b) - └── G12: (const '{"a": 1, "c": 2}') + └── filters (true) -# Three or more paths. Should generate zigzag joins. -opt -SELECT b,a FROM t5 WHERE b @> '{"a":[{"b":"c", "d":3}, 5]}' ----- -inner-join (lookup t5) - ├── columns: b:2!null a:1!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2) - ├── inner-join (zigzag t5@b_idx t5@b_idx) - │ ├── columns: a:1!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = ['{"a": [{"b": "c"}]}'] - │ ├── right fixed columns: [2] = ['{"a": [{"d": 3}]}'] - │ └── filters (true) - └── filters - └── b:2 @> '{"a": [{"b": "c", "d": 3}, 5]}' [outer=(2), immutable, constraints=(/2: (/NULL - ])] +# -------------------------------------------------- +# Misc +# -------------------------------------------------- # Regression test for issue where zero-column expressions could exist multiple # times in the tree, causing collisions. @@ -5617,519 +5223,3 @@ union-all │ └── filters (true) └── projections └── 1 [as="?column?":6] - -exec-ddl -CREATE TABLE def (d INT, e INT, f INT, PRIMARY KEY (d, e)); ----- - -exec-ddl -ALTER TABLE abc INJECT STATISTICS '[ - { - "columns": ["a"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 100, - "distinct_count": 100 - }, - { - "columns": ["b"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 100, - "distinct_count": 100 - } -]' ----- - -exec-ddl -ALTER TABLE def INJECT STATISTICS '[ - { - "columns": ["d"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 10000, - "distinct_count": 10000 - }, - { - "columns": ["e"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 10000, - "distinct_count": 10000 - } -]' ----- - -# Test the CommuteSemiJoinRule creates an appropriate inner join. -opt -SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=f) ----- -semi-join (hash) - ├── columns: a:1 b:2 c:3 - ├── scan abc - │ └── columns: a:1 b:2 c:3 - ├── scan def - │ └── columns: f:8 - └── filters - └── a:1 = f:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] - -# Test that we don't commute a SemiJoin when the On conditions are not -# equalities. For example, in this test we have a Lt condition. -opt expect-not=CommuteSemiJoin -SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a < e) ----- -semi-join (cross) - ├── columns: a:1 b:2 c:3 - ├── scan abc - │ └── columns: a:1 b:2 c:3 - ├── scan def - │ └── columns: e:7!null - └── filters - └── a:1 < e:7 [outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ])] - -# Test that we don't commute a SemiJoin when the On conditions are not -# equalities. For example, in this test we have an Or condition. -opt expect-not=CommuteSemiJoin -SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR c=e) ----- -semi-join (cross) - ├── columns: a:1 b:2 c:3 - ├── scan abc - │ └── columns: a:1 b:2 c:3 - ├── scan def - │ ├── columns: d:6!null e:7!null - │ └── key: (6,7) - └── filters - └── (a:1 = d:6) OR (c:3 = e:7) [outer=(1,3,6,7)] - -opt disable=CommuteSemiJoin format=show-all -SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d AND c=e) ----- -semi-join (lookup def) - ├── columns: a:1(int) b:2(int) c:3(int) - ├── key columns: [1 3] = [6 7] - ├── lookup columns are key - ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0] - ├── cost: 511.050394 - ├── prune: (2) - ├── interesting orderings: (+1,+2) (+2,+3) - ├── scan t.public.abc - │ ├── columns: t.public.abc.a:1(int) t.public.abc.b:2(int) t.public.abc.c:3(int) - │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=1] - │ ├── cost: 111.02 - │ ├── prune: (1-3) - │ ├── interesting orderings: (+1,+2) (+2,+3) - │ └── unfiltered-cols: (1-5) - └── filters (true) - -# TODO(rytaft): See stats/join tests. Since we don't collect the stats properly -# for SemiJoins, we prefer the InnerJoin plan over the SemiJoin one more times -# than necessary. -opt format=show-all -SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d AND c=e) ----- -semi-join (lookup def) - ├── columns: a:1(int) b:2(int) c:3(int) - ├── key columns: [1 3] = [6 7] - ├── lookup columns are key - ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=0] - ├── cost: 511.050394 - ├── prune: (2) - ├── interesting orderings: (+1,+2) (+2,+3) - ├── scan t.public.abc - │ ├── columns: t.public.abc.a:1(int) t.public.abc.b:2(int) t.public.abc.c:3(int) - │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(3)=10, null(3)=1] - │ ├── cost: 111.02 - │ ├── prune: (1-3) - │ ├── interesting orderings: (+1,+2) (+2,+3) - │ └── unfiltered-cols: (1-5) - └── filters (true) - -exec-ddl -CREATE TABLE customers (id INT PRIMARY KEY, name STRING) ----- - -exec-ddl -CREATE TABLE orders (id INT PRIMARY KEY, cust_id INT REFERENCES customers (id), order_date DATE, INDEX (order_date) STORING (cust_id)) ----- - -exec-ddl -ALTER TABLE customers INJECT STATISTICS '[ - { - "columns": ["id"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 100000, - "distinct_count": 100000 - } -]' ----- - -exec-ddl -ALTER TABLE orders INJECT STATISTICS '[ - { - "columns": ["id"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 1000000, - "distinct_count": 1000000 - }, - { - "columns": ["cust_id"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 1000000, - "distinct_count": 10000000 - }, - { - "columns": ["order_date"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 1000000, - "distinct_count": 1000000 - } -]' ----- - -opt disable=CommuteSemiJoin -SELECT * -FROM customers c -WHERE EXISTS(SELECT * FROM orders o WHERE o.cust_id=c.id AND o.order_date='2019-01-01') ----- -semi-join (merge) - ├── columns: id:1!null name:2 - ├── left ordering: +1 - ├── right ordering: +5 - ├── key: (1) - ├── fd: (1)-->(2) - ├── scan c - │ ├── columns: c.id:1!null name:2 - │ ├── key: (1) - │ ├── fd: (1)-->(2) - │ └── ordering: +1 - ├── sort - │ ├── columns: cust_id:5 order_date:6!null - │ ├── fd: ()-->(6) - │ ├── ordering: +5 opt(6) [actual: +5] - │ └── scan o@secondary - │ ├── columns: cust_id:5 order_date:6!null - │ ├── constraint: /6/4: [/'2019-01-01' - /'2019-01-01'] - │ └── fd: ()-->(6) - └── filters (true) - -# The CommuteSemiJoin rule allows a much better plan because we can use -# a lookup join. -opt -SELECT * -FROM customers c -WHERE EXISTS(SELECT * FROM orders o WHERE o.cust_id=c.id AND o.order_date='2019-01-01') ----- -project - ├── columns: id:1!null name:2 - ├── key: (1) - ├── fd: (1)-->(2) - └── inner-join (lookup customers) - ├── columns: c.id:1!null name:2 cust_id:5!null - ├── key columns: [5] = [1] - ├── lookup columns are key - ├── key: (5) - ├── fd: (1)-->(2), (1)==(5), (5)==(1) - ├── distinct-on - │ ├── columns: cust_id:5 - │ ├── grouping columns: cust_id:5 - │ ├── key: (5) - │ └── scan o@secondary - │ ├── columns: cust_id:5 order_date:6!null - │ ├── constraint: /6/4: [/'2019-01-01' - /'2019-01-01'] - │ └── fd: ()-->(6) - └── filters (true) - -# GenerateInvertedIndexZigzagJoins is disabled in the presence of a row-level -# locking clause. -opt -SELECT b,a FROM t5 WHERE b @> '{"a":1, "c":2}' FOR UPDATE ----- -select - ├── columns: b:2!null a:1!null - ├── volatile - ├── key: (1) - ├── fd: (1)-->(2) - ├── index-join t5 - │ ├── columns: a:1!null b:2 - │ ├── volatile - │ ├── key: (1) - │ ├── fd: (1)-->(2) - │ └── scan t5@b_idx - │ ├── columns: a:1!null - │ ├── constraint: /2/1: [/'{"a": 1}' - /'{"a": 1}'] - │ ├── locking: for-update - │ ├── volatile - │ └── key: (1) - └── filters - └── b:2 @> '{"a": 1, "c": 2}' [outer=(2), immutable, constraints=(/2: (/NULL - ])] - -# ----------------------------------------------------- -# ConvertSemiToInnerJoin -# ----------------------------------------------------- - -# This rule applies when the On conditions are not equalities. For example, -# in this test we have a Lt condition. It allows us to use a lookup join even -# though the index is not covering. -opt expect=ConvertSemiToInnerJoin -SELECT * from pqr WHERE EXISTS (SELECT * FROM zz WHERE a = 0 AND q = b AND r < c) ----- -project - ├── columns: p:1!null q:2 r:3 s:4 t:5 - ├── key: (1) - ├── fd: (1)-->(2-5) - └── project - ├── columns: p:1!null q:2!null r:3!null s:4 t:5 - ├── key: (1) - ├── fd: ()-->(2), (1)-->(3-5) - └── inner-join (lookup pqr) - ├── columns: p:1!null q:2!null r:3!null s:4 t:5 a:7!null b:8!null c:9!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── key: (1) - ├── fd: ()-->(2,7-9), (1)-->(3-5), (2)==(8), (8)==(2) - ├── inner-join (lookup pqr@q) - │ ├── columns: p:1!null q:2!null a:7!null b:8!null c:9 - │ ├── key columns: [8] = [2] - │ ├── key: (1) - │ ├── fd: ()-->(2,7-9), (2)==(8), (8)==(2) - │ ├── scan zz - │ │ ├── columns: a:7!null b:8 c:9 - │ │ ├── constraint: /7: [/0 - /0] - │ │ ├── cardinality: [0 - 1] - │ │ ├── key: () - │ │ └── fd: ()-->(7-9) - │ └── filters (true) - └── filters - └── r:3 < c:9 [outer=(3,9), constraints=(/3: (/NULL - ]; /9: (/NULL - ])] - -# In this test we have an Or condition. -opt expect=ConvertSemiToInnerJoin -SELECT * from pqr WHERE EXISTS (SELECT * FROM zz WHERE a = 0 AND q = b AND (p = a OR r = c)) ----- -project - ├── columns: p:1!null q:2 r:3 s:4 t:5 - ├── key: (1) - ├── fd: (1)-->(2-5) - └── project - ├── columns: p:1!null q:2!null r:3 s:4 t:5 - ├── key: (1) - ├── fd: ()-->(2), (1)-->(3-5) - └── inner-join (lookup pqr) - ├── columns: p:1!null q:2!null r:3 s:4 t:5 a:7!null b:8!null c:9 - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── key: (1) - ├── fd: ()-->(2,7-9), (1)-->(3-5), (2)==(8), (8)==(2) - ├── inner-join (lookup pqr@q) - │ ├── columns: p:1!null q:2!null a:7!null b:8!null c:9 - │ ├── key columns: [8] = [2] - │ ├── key: (1) - │ ├── fd: ()-->(2,7-9), (2)==(8), (8)==(2) - │ ├── scan zz - │ │ ├── columns: a:7!null b:8 c:9 - │ │ ├── constraint: /7: [/0 - /0] - │ │ ├── cardinality: [0 - 1] - │ │ ├── key: () - │ │ └── fd: ()-->(7-9) - │ └── filters (true) - └── filters - └── (p:1 = 0) OR (r:3 = c:9) [outer=(1,3,9)] - -# In this case we need to add the key back to zz since it was pruned during -# normalization. -opt expect=ConvertSemiToInnerJoin -SELECT b, c from zz WHERE EXISTS (SELECT * FROM pqr WHERE p = 0 AND q = b AND (p = c OR r = c)) ----- -project - ├── columns: b:2 c:3 - ├── lax-key: (2,3) - ├── fd: (3)~~>(2) - └── project - ├── columns: a:1!null b:2!null c:3!null - ├── key: (1) - ├── fd: ()-->(2), (1)-->(3), (3)-->(1) - └── inner-join (lookup zz) - ├── columns: a:1!null b:2!null c:3!null p:5!null q:6!null r:7 - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── key: (1) - ├── fd: ()-->(2,5-7), (1)-->(3), (3)-->(1), (2)==(6), (6)==(2) - ├── inner-join (lookup zz@idx_b) - │ ├── columns: a:1!null b:2!null p:5!null q:6!null r:7 - │ ├── key columns: [6] = [2] - │ ├── key: (1) - │ ├── fd: ()-->(2,5-7), (2)==(6), (6)==(2) - │ ├── scan pqr - │ │ ├── columns: p:5!null q:6 r:7 - │ │ ├── constraint: /5: [/0 - /0] - │ │ ├── cardinality: [0 - 1] - │ │ ├── key: () - │ │ └── fd: ()-->(5-7) - │ └── filters (true) - └── filters - └── (c:3 = 0) OR (r:7 = c:3) [outer=(3,7), constraints=(/3: (/NULL - ])] - -# -------------------------------------------------- -# PushJoinIntoIndexJoin -# -------------------------------------------------- - -opt expect=PushJoinIntoIndexJoin -SELECT * FROM abc INNER JOIN (SELECT * FROM pqr ORDER BY q LIMIT 5) ON a=q ----- -inner-join (lookup pqr) - ├── columns: a:1!null b:2 c:3 p:6!null q:7!null r:8 s:9 t:10 - ├── key columns: [6] = [6] - ├── lookup columns are key - ├── fd: (6)-->(7-10), (1)==(7), (7)==(1) - ├── inner-join (lookup abc@ab) - │ ├── columns: a:1!null b:2 c:3 p:6!null q:7!null - │ ├── key columns: [7] = [1] - │ ├── fd: (6)-->(7), (1)==(7), (7)==(1) - │ ├── scan pqr@q - │ │ ├── columns: p:6!null q:7 - │ │ ├── limit: 5 - │ │ ├── key: (6) - │ │ └── fd: (6)-->(7) - │ └── filters (true) - └── filters (true) - -# Cross join case. The plan produced by PushJoinIntoIndexJoin isn't chosen -# because the cross join doesn't filter rows. -opt expect=PushJoinIntoIndexJoin -SELECT * FROM abc CROSS JOIN (SELECT * FROM pqr ORDER BY q LIMIT 5) ----- -inner-join (cross) - ├── columns: a:1 b:2 c:3 p:6!null q:7 r:8 s:9 t:10 - ├── fd: (6)-->(7-10) - ├── scan abc - │ └── columns: a:1 b:2 c:3 - ├── index-join pqr - │ ├── columns: p:6!null q:7 r:8 s:9 t:10 - │ ├── cardinality: [0 - 5] - │ ├── key: (6) - │ ├── fd: (6)-->(7-10) - │ └── scan pqr@q - │ ├── columns: p:6!null q:7 - │ ├── limit: 5 - │ ├── key: (6) - │ └── fd: (6)-->(7) - └── filters (true) - -# No-op case because the index join is the right input of the LeftJoin. -opt expect-not=PushJoinIntoIndexJoin -SELECT * FROM abc LEFT JOIN (SELECT * FROM pqr ORDER BY q LIMIT 5) ON a=q ----- -left-join (merge) - ├── columns: a:1 b:2 c:3 p:6 q:7 r:8 s:9 t:10 - ├── left ordering: +1 - ├── right ordering: +7 - ├── fd: (6)-->(7-10) - ├── scan abc@ab - │ ├── columns: a:1 b:2 c:3 - │ └── ordering: +1 - ├── index-join pqr - │ ├── columns: p:6!null q:7 r:8 s:9 t:10 - │ ├── cardinality: [0 - 5] - │ ├── key: (6) - │ ├── fd: (6)-->(7-10) - │ ├── ordering: +7 - │ └── scan pqr@q - │ ├── columns: p:6!null q:7 - │ ├── limit: 5 - │ ├── key: (6) - │ ├── fd: (6)-->(7) - │ └── ordering: +7 - └── filters (true) - -# No-op case because the InnerJoin has join hints. -opt expect-not=PushJoinIntoIndexJoin -SELECT * FROM (SELECT * FROM pqr ORDER BY q LIMIT 5) INNER HASH JOIN abc ON a=q ----- -inner-join (hash) - ├── columns: p:1!null q:2!null r:3 s:4 t:5 a:7!null b:8 c:9 - ├── flags: force hash join (store right side) - ├── fd: (1)-->(2-5), (2)==(7), (7)==(2) - ├── index-join pqr - │ ├── columns: p:1!null q:2 r:3 s:4 t:5 - │ ├── cardinality: [0 - 5] - │ ├── key: (1) - │ ├── fd: (1)-->(2-5) - │ └── scan pqr@q - │ ├── columns: p:1!null q:2 - │ ├── limit: 5 - │ ├── key: (1) - │ └── fd: (1)-->(2) - ├── scan abc - │ └── columns: a:7 b:8 c:9 - └── filters - └── a:7 = q:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] - -# No-op case because the ON condition references a column that doesn't come from -# the input of the index join or the right side of the InnerJoin. -opt expect-not=PushJoinIntoIndexJoin -SELECT * FROM (SELECT * FROM pqr ORDER BY q LIMIT 5) INNER JOIN abc ON a=r ----- -inner-join (lookup abc@ab) - ├── columns: p:1!null q:2 r:3!null s:4 t:5 a:7!null b:8 c:9 - ├── key columns: [3] = [7] - ├── fd: (1)-->(2-5), (3)==(7), (7)==(3) - ├── index-join pqr - │ ├── columns: p:1!null q:2 r:3 s:4 t:5 - │ ├── cardinality: [0 - 5] - │ ├── key: (1) - │ ├── fd: (1)-->(2-5) - │ └── scan pqr@q - │ ├── columns: p:1!null q:2 - │ ├── limit: 5 - │ ├── key: (1) - │ └── fd: (1)-->(2) - └── filters (true) - -# No-op case because the right side of the InnerJoin has outer columns. -opt expect-not=PushJoinIntoIndexJoin disable=(TryDecorrelateProject) -SELECT * -FROM stu -INNER JOIN LATERAL ( - SELECT * - FROM (SELECT * FROM pqr ORDER BY q LIMIT 5) - INNER JOIN (SELECT *, a*s FROM abc) - ON a=q -) -ON True ----- -inner-join-apply - ├── columns: s:1!null t:2!null u:3!null p:5!null q:6!null r:7 s:8 t:9 a:11!null b:12 c:13 "?column?":16 - ├── immutable - ├── fd: (1-3,5)-->(6-9), (1-3,11)-->(16), (6)==(11), (11)==(6) - ├── scan stu - │ ├── columns: stu.s:1!null stu.t:2!null u:3!null - │ └── key: (1-3) - ├── inner-join (merge) - │ ├── columns: p:5!null q:6!null r:7 pqr.s:8 pqr.t:9 a:11!null b:12 c:13 "?column?":16 - │ ├── left ordering: +6 - │ ├── right ordering: +11 - │ ├── outer: (1) - │ ├── immutable - │ ├── fd: (5)-->(6-9), (11)-->(16), (6)==(11), (11)==(6) - │ ├── index-join pqr - │ │ ├── columns: p:5!null q:6 r:7 pqr.s:8 pqr.t:9 - │ │ ├── cardinality: [0 - 5] - │ │ ├── key: (5) - │ │ ├── fd: (5)-->(6-9) - │ │ ├── ordering: +6 - │ │ └── scan pqr@q - │ │ ├── columns: p:5!null q:6 - │ │ ├── limit: 5 - │ │ ├── key: (5) - │ │ ├── fd: (5)-->(6) - │ │ └── ordering: +6 - │ ├── project - │ │ ├── columns: "?column?":16 a:11 b:12 c:13 - │ │ ├── outer: (1) - │ │ ├── immutable - │ │ ├── fd: (11)-->(16) - │ │ ├── ordering: +11 - │ │ ├── scan abc@ab - │ │ │ ├── columns: a:11 b:12 c:13 - │ │ │ └── ordering: +11 - │ │ └── projections - │ │ └── a:11 * stu.s:1 [as="?column?":16, outer=(1,11), immutable] - │ └── filters (true) - └── filters (true) diff --git a/pkg/sql/opt/xform/testdata/rules/limit b/pkg/sql/opt/xform/testdata/rules/limit index 3826a2c680ab..a4472c5780fc 100644 --- a/pkg/sql/opt/xform/testdata/rules/limit +++ b/pkg/sql/opt/xform/testdata/rules/limit @@ -676,14 +676,14 @@ GenerateIndexScans + │ └── (a:1 >= 20) AND (a:1 <= 30) [outer=(1), constraints=(/1: [/20 - /30]; tight)] + └── 5 -------------------------------------------------------------------------------- -GenerateZigzagJoins (no changes) --------------------------------------------------------------------------------- --------------------------------------------------------------------------------- GeneratePartialIndexScans (no changes) -------------------------------------------------------------------------------- -------------------------------------------------------------------------------- GenerateConstrainedScans (no changes) -------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +GenerateZigzagJoins (no changes) +-------------------------------------------------------------------------------- ================================================================================ Final best expression Cost: 4083.92 diff --git a/pkg/sql/opt/xform/testdata/rules/scan b/pkg/sql/opt/xform/testdata/rules/scan index 126448f7761f..3a0a650e9776 100644 --- a/pkg/sql/opt/xform/testdata/rules/scan +++ b/pkg/sql/opt/xform/testdata/rules/scan @@ -222,58 +222,6 @@ scan p@secondary,partial ├── columns: i:1 └── ordering: +1 -# -------------------------------------------------- -# GenerateConstrainedScans -# -------------------------------------------------- - -# Constrain the a@si_idx so that an index join is generated. -exploretrace rule=GenerateConstrainedScans -SELECT s, i, f FROM a WHERE s='foo' ORDER BY s, k, i ----- ----- -================================================================================ -GenerateConstrainedScans -================================================================================ -Source expression: - select - ├── columns: s:4!null i:2 f:3 [hidden: k:1!null] - ├── key: (1) - ├── fd: ()-->(4), (1)-->(2,3) - ├── ordering: +1 opt(4) [actual: +1] - ├── scan a@s_idx - │ ├── columns: k:1!null i:2 f:3 s:4 - │ ├── key: (1) - │ ├── fd: (1)-->(2-4) - │ └── ordering: +1 opt(4) [actual: +4,+1] - └── filters - └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] - -New expression 1 of 2: - scan a@s_idx - ├── columns: s:4!null i:2 f:3 [hidden: k:1!null] - ├── constraint: /4/1: [/'foo' - /'foo'] - ├── key: (1) - ├── fd: ()-->(4), (1)-->(2,3) - └── ordering: +1 opt(4) [actual: +1] - -New expression 2 of 2: - sort - ├── columns: s:4!null i:2 f:3 [hidden: k:1!null] - ├── key: (1) - ├── fd: ()-->(4), (1)-->(2,3) - ├── ordering: +1 opt(4) [actual: +1] - └── index-join a - ├── columns: k:1!null i:2 f:3 s:4!null - ├── key: (1) - ├── fd: ()-->(4), (1)-->(2,3) - └── scan a@si_idx - ├── columns: k:1!null i:2 s:4!null - ├── constraint: /-4/-2/1: [/'foo' - /'foo'] - ├── key: (1) - └── fd: ()-->(4), (1)-->(2) ----- ----- - memo SELECT s, i, f FROM a ORDER BY f ---- @@ -301,36 +249,6 @@ memo (optimized, ~2KB, required=[presentation: s:4,i:2,f:3] [ordering: -4,+2]) ├── best: (scan a@s_idx,cols=(2-4)) └── cost: 1074.02 -memo -SELECT s, i, f FROM a WHERE s='foo' ORDER BY s DESC, i ----- -memo (optimized, ~6KB, required=[presentation: s:4,i:2,f:3] [ordering: +2 opt(4)]) - ├── G1: (select G2 G3) (scan a@s_idx,cols=(2-4),constrained) (index-join G4 a,cols=(2-4)) - │ ├── [presentation: s:4,i:2,f:3] [ordering: +2 opt(4)] - │ │ ├── best: (sort G1) - │ │ └── cost: 15.58 - │ └── [] - │ ├── best: (scan a@s_idx,cols=(2-4),constrained) - │ └── cost: 14.71 - ├── G2: (scan a,cols=(2-4)) (scan a@s_idx,cols=(2-4)) - │ ├── [ordering: +2 opt(4)] - │ │ ├── best: (sort G2) - │ │ └── cost: 1293.35 - │ └── [] - │ ├── best: (scan a@s_idx,cols=(2-4)) - │ └── cost: 1074.02 - ├── G3: (filters G5) - ├── G4: (scan a@si_idx,cols=(1,2,4),constrained) - │ ├── [ordering: +2 opt(4)] - │ │ ├── best: (scan a@si_idx,rev,cols=(1,2,4),constrained) - │ │ └── cost: 15.04 - │ └── [] - │ ├── best: (scan a@si_idx,cols=(1,2,4),constrained) - │ └── cost: 14.71 - ├── G5: (eq G6 G7) - ├── G6: (variable s) - └── G7: (const 'foo') - # Force an index in order to ensure that an index join is created. opt SELECT * FROM a@si_idx @@ -379,32 +297,6 @@ memo (optimized, ~3KB, required=[presentation: d:4] [ordering: +6]) ├── G5: (scalar-list G6) └── G6: (variable d) -memo -SELECT j FROM a WHERE s = 'foo' ----- -memo (optimized, ~7KB, required=[presentation: j:5]) - ├── G1: (project G2 G3 j) - │ └── [presentation: j:5] - │ ├── best: (project G2 G3 j) - │ └── cost: 14.72 - ├── G2: (select G4 G5) (index-join G6 a,cols=(4,5)) (scan a@si_idx,cols=(4,5),constrained) - │ └── [] - │ ├── best: (scan a@si_idx,cols=(4,5),constrained) - │ └── cost: 14.61 - ├── G3: (projections) - ├── G4: (scan a,cols=(4,5)) (scan a@si_idx,cols=(4,5)) - │ └── [] - │ ├── best: (scan a@si_idx,cols=(4,5)) - │ └── cost: 1064.02 - ├── G5: (filters G7) - ├── G6: (scan a@s_idx,cols=(1,4),constrained) - │ └── [] - │ ├── best: (scan a@s_idx,cols=(1,4),constrained) - │ └── cost: 14.61 - ├── G7: (eq G8 G9) - ├── G8: (variable s) - └── G9: (const 'foo') - # Scan of primary index is lowest cost. opt SELECT s, i, f FROM a ORDER BY k, i, s @@ -477,28 +369,6 @@ memo (optimized, ~2KB, required=[presentation: i:2,k:1] [ordering: -4,+2,+1]) ├── best: (scan a@s_idx,cols=(1,2,4)) └── cost: 1074.02 -memo -SELECT i, k FROM a WHERE s >= 'foo' ----- -memo (optimized, ~6KB, required=[presentation: i:2,k:1]) - ├── G1: (project G2 G3 k i) - │ └── [presentation: i:2,k:1] - │ ├── best: (project G2 G3 k i) - │ └── cost: 364.02 - ├── G2: (select G4 G5) (scan a@s_idx,cols=(1,2,4),constrained) (scan a@si_idx,cols=(1,2,4),constrained) - │ └── [] - │ ├── best: (scan a@s_idx,cols=(1,2,4),constrained) - │ └── cost: 360.68 - ├── G3: (projections) - ├── G4: (scan a,cols=(1,2,4)) (scan a@s_idx,cols=(1,2,4)) (scan a@si_idx,cols=(1,2,4)) - │ └── [] - │ ├── best: (scan a@s_idx,cols=(1,2,4)) - │ └── cost: 1074.02 - ├── G5: (filters G6) - ├── G6: (ge G7 G8) - ├── G7: (variable s) - └── G8: (const 'foo') - # GenerateIndexScans propagates row-level locking information. opt SELECT s, i, f FROM a ORDER BY s FOR UPDATE @@ -509,246 +379,6 @@ scan a@s_idx ├── volatile └── ordering: +4 -# Collated strings are treated properly. -exec-ddl -CREATE TABLE x (s STRING COLLATE en_u_ks_level1 PRIMARY KEY) ----- - -opt -SELECT s FROM x WHERE s < 'hello' COLLATE en_u_ks_level1 ----- -scan x - ├── columns: s:1!null - ├── constraint: /1: [ - /'hello' COLLATE en_u_ks_level1) - └── key: (1) - -opt -SELECT s FROM x WHERE s = 'hello' COLLATE en_u_ks_level1 ----- -scan x - ├── columns: s:1!null - ├── constraint: /1: [/'hello' COLLATE en_u_ks_level1 - /'hello' COLLATE en_u_ks_level1] - ├── cardinality: [0 - 1] - ├── key: () - └── fd: ()-->(1) - -# Can't generate spans for other collations. -opt -SELECT s FROM x WHERE s COLLATE en = 'hello' COLLATE en ----- -select - ├── columns: s:1!null - ├── key: (1) - ├── scan x - │ ├── columns: s:1!null - │ └── key: (1) - └── filters - └── s:1 COLLATE en = 'hello' COLLATE en [outer=(1)] - -# Realistic example where using constraints as filters help. -# An even more realistic example would have a creation timestamp instead of a -# seq_num integer, but that makes the plans much more cluttered. -exec-ddl -CREATE TABLE "orders" ( - region STRING NOT NULL, - id INT NOT NULL, - total DECIMAL NOT NULL, - seq_num INT NOT NULL, - PRIMARY KEY (region, id), - UNIQUE INDEX orders_by_seq_num (region, seq_num, id) STORING (total), - CHECK (region IN ('us-east1', 'us-west1', 'europe-west2')) -) ----- - -exec-ddl -ALTER TABLE "orders" INJECT STATISTICS '[ - { - "columns": ["region"], - "distinct_count": 3, - "null_count": 0, - "row_count": 100, - "created_at": "2018-01-01 1:00:00.00000+00:00" - }, - { - "columns": ["id"], - "distinct_count": 100, - "null_count": 0, - "row_count": 100, - "created_at": "2018-01-01 1:00:00.00000+00:00" - }, - { - "columns": ["total"], - "distinct_count": 100, - "null_count": 0, - "row_count": 100, - "created_at": "2018-01-01 1:00:00.00000+00:00" - }, - { - "columns": ["seq_num"], - "distinct_count": 50, - "null_count": 0, - "row_count": 100, - "created_at": "2018-01-01 1:00:00.00000+00:00" - } -]' ----- - -opt -SELECT sum(total) FROM orders WHERE seq_num >= 10 AND seq_num < 20 ----- -scalar-group-by - ├── columns: sum:6 - ├── cardinality: [1 - 1] - ├── key: () - ├── fd: ()-->(6) - ├── scan orders@orders_by_seq_num - │ ├── columns: total:3!null seq_num:4!null - │ └── constraint: /1/4/2 - │ ├── [/'europe-west2'/10 - /'europe-west2'/19] - │ ├── [/'us-east1'/10 - /'us-east1'/19] - │ └── [/'us-west1'/10 - /'us-west1'/19] - └── aggregations - └── sum [as=sum:6, outer=(3)] - └── total:3 - -exec-ddl -CREATE TABLE xyz ( - x INT PRIMARY KEY, - y INT NOT NULL, - z STRING NOT NULL, - CHECK (x < 10 AND x > 1), - CHECK (y < 10 AND y > 1), - CHECK (z in ('first', 'second')), - INDEX secondary (y, x), - INDEX tertiary (z, y, x)) ----- - -opt -SELECT x, y FROM xyz WHERE x > 5 ----- -select - ├── columns: x:1!null y:2!null - ├── key: (1) - ├── fd: (1)-->(2) - ├── scan xyz@tertiary - │ ├── columns: x:1!null y:2!null - │ ├── constraint: /3/2/1 - │ │ ├── [/'first'/2/6 - /'first'/9/9] - │ │ └── [/'second'/2/6 - /'second'/9/9] - │ ├── key: (1) - │ └── fd: (1)-->(2) - └── filters - └── x:1 > 5 [outer=(1), constraints=(/1: [/6 - ]; tight)] - -# TODO(ridwanmsharif): Confirm if this makes sense. I would've expected that the primary index -# would be used here. But it isn't the plan being picked. Curious. -opt -SELECT * FROM xyz WHERE x > 5 ----- -select - ├── columns: x:1!null y:2!null z:3!null - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── scan xyz@tertiary - │ ├── columns: x:1!null y:2!null z:3!null - │ ├── constraint: /3/2/1 - │ │ ├── [/'first'/2/6 - /'first'/9/9] - │ │ └── [/'second'/2/6 - /'second'/9/9] - │ ├── key: (1) - │ └── fd: (1)-->(2,3) - └── filters - └── x:1 > 5 [outer=(1), constraints=(/1: [/6 - ]; tight)] - -# Check constraint used only for the non nullable column. Constraints on x are ignored. -exec-ddl -CREATE TABLE xy ( - x INT, - y INT NOT NULL, - CHECK (x < 10 AND x > 1), - CHECK (y < 10 AND y > 1), - INDEX secondary (y, x)) ----- - -opt -SELECT x, y FROM xy WHERE x > 5 ----- -select - ├── columns: x:1!null y:2!null - ├── scan xy@secondary - │ ├── columns: x:1 y:2!null - │ └── constraint: /2/1/3: [/2/6 - /9] - └── filters - └── x:1 > 5 [outer=(1), constraints=(/1: [/6 - ]; tight)] - -# Check constraints that can evaluate to NULL are ignored. -exec-ddl -CREATE TABLE null_constraint ( - y INT NOT NULL, - CHECK (y IN (1, 2, NULL)), - INDEX index_1 (y)) ----- - -opt -SELECT y FROM null_constraint WHERE y > 0 ----- -scan null_constraint@index_1 - ├── columns: y:1!null - └── constraint: /1/2: [/1 - ] - -exec-ddl -CREATE TABLE null_constraint_2 ( - y INT NOT NULL, - CHECK ((y IN (1, 2, NULL)) AND (y > 10)), - CHECK (y < 15), - INDEX index_1 (y)) ----- - -opt -SELECT y FROM null_constraint_2 WHERE y > 0 ----- -scan null_constraint_2@index_1 - ├── columns: y:1!null - └── constraint: /1/2: [/1 - /14] - -# Unvalidated constraints are ignored. -exec-ddl -CREATE TABLE check_constraint_validity ( - a int NOT NULL, - INDEX secondary (a), - CONSTRAINT "check:unvalidated" CHECK (a < 10), - CONSTRAINT "check:validated" CHECK (a < 20)) ----- - -opt -SELECT * FROM check_constraint_validity WHERE a > 6 ----- -scan check_constraint_validity@secondary - ├── columns: a:1!null - └── constraint: /1/2: [/7 - /19] - -# Test that we can constrain indexes using the results of now(). -exec-ddl -CREATE TABLE with_time_index (k INT PRIMARY KEY, time TIMESTAMP, INDEX(time)) ----- - -opt -SELECT * FROM with_time_index WHERE time > now() - INTERVAL '1 hour' ----- -scan with_time_index@secondary - ├── columns: k:1!null time:2!null - ├── constraint: /2/1: [/'2017-05-10 12:00:00.000001' - ] - ├── key: (1) - └── fd: (1)-->(2) - -opt -SELECT * FROM with_time_index WHERE time >= 'today' ----- -scan with_time_index@secondary - ├── columns: k:1!null time:2!null - ├── constraint: /2/1: [/'2017-05-10 00:00:00' - ] - ├── key: (1) - └── fd: (1)-->(2) - exec-ddl CREATE TABLE fk_a ( a INT PRIMARY KEY, diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index a99f39061cd9..a9fc21a8db54 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -106,7 +106,6 @@ CREATE TABLE no_explicit_primary_key # GeneratePartialIndexScans # -------------------------------------------------- - exec-ddl CREATE INDEX idx ON p (i) STORING (f, s) WHERE s = 'foo' ---- @@ -966,6 +965,385 @@ select └── filters └── (k:1 + u:2) = 1 [outer=(1,2), immutable] +exec-ddl +CREATE TABLE kifs +( + k INT PRIMARY KEY, + i INT, + f FLOAT, + s STRING, + j JSON, + INDEX s_idx (s) STORING (i, f), + INDEX si_idx (s DESC, i DESC) STORING (j) +) +---- + +# Constrain the kifs@si_idx so that an index join is generated. +exploretrace rule=GenerateConstrainedScans +SELECT s, i, f FROM kifs WHERE s='foo' ORDER BY s, k, i +---- +---- +================================================================================ +GenerateConstrainedScans +================================================================================ +Source expression: + select + ├── columns: s:4!null i:2 f:3 [hidden: k:1!null] + ├── key: (1) + ├── fd: ()-->(4), (1)-->(2,3) + ├── ordering: +1 opt(4) [actual: +1] + ├── scan kifs@s_idx + │ ├── columns: k:1!null i:2 f:3 s:4 + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ └── ordering: +1 opt(4) [actual: +4,+1] + └── filters + └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] + +New expression 1 of 2: + scan kifs@s_idx + ├── columns: s:4!null i:2 f:3 [hidden: k:1!null] + ├── constraint: /4/1: [/'foo' - /'foo'] + ├── key: (1) + ├── fd: ()-->(4), (1)-->(2,3) + └── ordering: +1 opt(4) [actual: +1] + +New expression 2 of 2: + sort + ├── columns: s:4!null i:2 f:3 [hidden: k:1!null] + ├── key: (1) + ├── fd: ()-->(4), (1)-->(2,3) + ├── ordering: +1 opt(4) [actual: +1] + └── index-join kifs + ├── columns: k:1!null i:2 f:3 s:4!null + ├── key: (1) + ├── fd: ()-->(4), (1)-->(2,3) + └── scan kifs@si_idx + ├── columns: k:1!null i:2 s:4!null + ├── constraint: /-4/-2/1: [/'foo' - /'foo'] + ├── key: (1) + └── fd: ()-->(4), (1)-->(2) +---- +---- + +memo +SELECT s, i, f FROM kifs WHERE s='foo' ORDER BY s DESC, i +---- +memo (optimized, ~6KB, required=[presentation: s:4,i:2,f:3] [ordering: +2 opt(4)]) + ├── G1: (select G2 G3) (scan kifs@s_idx,cols=(2-4),constrained) (index-join G4 kifs,cols=(2-4)) + │ ├── [presentation: s:4,i:2,f:3] [ordering: +2 opt(4)] + │ │ ├── best: (sort G1) + │ │ └── cost: 15.58 + │ └── [] + │ ├── best: (scan kifs@s_idx,cols=(2-4),constrained) + │ └── cost: 14.71 + ├── G2: (scan kifs,cols=(2-4)) (scan kifs@s_idx,cols=(2-4)) + │ ├── [ordering: +2 opt(4)] + │ │ ├── best: (sort G2) + │ │ └── cost: 1293.35 + │ └── [] + │ ├── best: (scan kifs@s_idx,cols=(2-4)) + │ └── cost: 1074.02 + ├── G3: (filters G5) + ├── G4: (scan kifs@si_idx,cols=(1,2,4),constrained) + │ ├── [ordering: +2 opt(4)] + │ │ ├── best: (scan kifs@si_idx,rev,cols=(1,2,4),constrained) + │ │ └── cost: 15.04 + │ └── [] + │ ├── best: (scan kifs@si_idx,cols=(1,2,4),constrained) + │ └── cost: 14.71 + ├── G5: (eq G6 G7) + ├── G6: (variable s) + └── G7: (const 'foo') + +memo +SELECT j FROM kifs WHERE s = 'foo' +---- +memo (optimized, ~7KB, required=[presentation: j:5]) + ├── G1: (project G2 G3 j) + │ └── [presentation: j:5] + │ ├── best: (project G2 G3 j) + │ └── cost: 14.72 + ├── G2: (select G4 G5) (index-join G6 kifs,cols=(4,5)) (scan kifs@si_idx,cols=(4,5),constrained) + │ └── [] + │ ├── best: (scan kifs@si_idx,cols=(4,5),constrained) + │ └── cost: 14.61 + ├── G3: (projections) + ├── G4: (scan kifs,cols=(4,5)) (scan kifs@si_idx,cols=(4,5)) + │ └── [] + │ ├── best: (scan kifs@si_idx,cols=(4,5)) + │ └── cost: 1064.02 + ├── G5: (filters G7) + ├── G6: (scan kifs@s_idx,cols=(1,4),constrained) + │ └── [] + │ ├── best: (scan kifs@s_idx,cols=(1,4),constrained) + │ └── cost: 14.61 + ├── G7: (eq G8 G9) + ├── G8: (variable s) + └── G9: (const 'foo') + +memo +SELECT i, k FROM kifs WHERE s >= 'foo' +---- +memo (optimized, ~6KB, required=[presentation: i:2,k:1]) + ├── G1: (project G2 G3 k i) + │ └── [presentation: i:2,k:1] + │ ├── best: (project G2 G3 k i) + │ └── cost: 364.02 + ├── G2: (select G4 G5) (scan kifs@s_idx,cols=(1,2,4),constrained) (scan kifs@si_idx,cols=(1,2,4),constrained) + │ └── [] + │ ├── best: (scan kifs@s_idx,cols=(1,2,4),constrained) + │ └── cost: 360.68 + ├── G3: (projections) + ├── G4: (scan kifs,cols=(1,2,4)) (scan kifs@s_idx,cols=(1,2,4)) (scan kifs@si_idx,cols=(1,2,4)) + │ └── [] + │ ├── best: (scan kifs@s_idx,cols=(1,2,4)) + │ └── cost: 1074.02 + ├── G5: (filters G6) + ├── G6: (ge G7 G8) + ├── G7: (variable s) + └── G8: (const 'foo') + +# Collated strings are treated properly. +exec-ddl +CREATE TABLE x (s STRING COLLATE en_u_ks_level1 PRIMARY KEY) +---- + +opt +SELECT s FROM x WHERE s < 'hello' COLLATE en_u_ks_level1 +---- +scan x + ├── columns: s:1!null + ├── constraint: /1: [ - /'hello' COLLATE en_u_ks_level1) + └── key: (1) + +opt +SELECT s FROM x WHERE s = 'hello' COLLATE en_u_ks_level1 +---- +scan x + ├── columns: s:1!null + ├── constraint: /1: [/'hello' COLLATE en_u_ks_level1 - /'hello' COLLATE en_u_ks_level1] + ├── cardinality: [0 - 1] + ├── key: () + └── fd: ()-->(1) + +# Can't generate spans for other collations. +opt +SELECT s FROM x WHERE s COLLATE en = 'hello' COLLATE en +---- +select + ├── columns: s:1!null + ├── key: (1) + ├── scan x + │ ├── columns: s:1!null + │ └── key: (1) + └── filters + └── s:1 COLLATE en = 'hello' COLLATE en [outer=(1)] + +# Realistic example where using constraints as filters help. +# An even more realistic example would have a creation timestamp instead of a +# seq_num integer, but that makes the plans much more cluttered. +exec-ddl +CREATE TABLE "orders" ( + region STRING NOT NULL, + id INT NOT NULL, + total DECIMAL NOT NULL, + seq_num INT NOT NULL, + PRIMARY KEY (region, id), + UNIQUE INDEX orders_by_seq_num (region, seq_num, id) STORING (total), + CHECK (region IN ('us-east1', 'us-west1', 'europe-west2')) +) +---- + +exec-ddl +ALTER TABLE "orders" INJECT STATISTICS '[ + { + "columns": ["region"], + "distinct_count": 3, + "null_count": 0, + "row_count": 100, + "created_at": "2018-01-01 1:00:00.00000+00:00" + }, + { + "columns": ["id"], + "distinct_count": 100, + "null_count": 0, + "row_count": 100, + "created_at": "2018-01-01 1:00:00.00000+00:00" + }, + { + "columns": ["total"], + "distinct_count": 100, + "null_count": 0, + "row_count": 100, + "created_at": "2018-01-01 1:00:00.00000+00:00" + }, + { + "columns": ["seq_num"], + "distinct_count": 50, + "null_count": 0, + "row_count": 100, + "created_at": "2018-01-01 1:00:00.00000+00:00" + } +]' +---- + +opt +SELECT sum(total) FROM orders WHERE seq_num >= 10 AND seq_num < 20 +---- +scalar-group-by + ├── columns: sum:6 + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(6) + ├── scan orders@orders_by_seq_num + │ ├── columns: total:3!null seq_num:4!null + │ └── constraint: /1/4/2 + │ ├── [/'europe-west2'/10 - /'europe-west2'/19] + │ ├── [/'us-east1'/10 - /'us-east1'/19] + │ └── [/'us-west1'/10 - /'us-west1'/19] + └── aggregations + └── sum [as=sum:6, outer=(3)] + └── total:3 + +exec-ddl +CREATE TABLE xyz ( + x INT PRIMARY KEY, + y INT NOT NULL, + z STRING NOT NULL, + CHECK (x < 10 AND x > 1), + CHECK (y < 10 AND y > 1), + CHECK (z in ('first', 'second')), + INDEX secondary (y, x), + INDEX tertiary (z, y, x)) +---- + +opt +SELECT x, y FROM xyz WHERE x > 5 +---- +select + ├── columns: x:1!null y:2!null + ├── key: (1) + ├── fd: (1)-->(2) + ├── scan xyz@tertiary + │ ├── columns: x:1!null y:2!null + │ ├── constraint: /3/2/1 + │ │ ├── [/'first'/2/6 - /'first'/9/9] + │ │ └── [/'second'/2/6 - /'second'/9/9] + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── filters + └── x:1 > 5 [outer=(1), constraints=(/1: [/6 - ]; tight)] + +# TODO(ridwanmsharif): Confirm if this makes sense. I would've expected that the primary index +# would be used here. But it isn't the plan being picked. Curious. +opt +SELECT * FROM xyz WHERE x > 5 +---- +select + ├── columns: x:1!null y:2!null z:3!null + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan xyz@tertiary + │ ├── columns: x:1!null y:2!null z:3!null + │ ├── constraint: /3/2/1 + │ │ ├── [/'first'/2/6 - /'first'/9/9] + │ │ └── [/'second'/2/6 - /'second'/9/9] + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── x:1 > 5 [outer=(1), constraints=(/1: [/6 - ]; tight)] + +# Check constraint used only for the non nullable column. Constraints on x are ignored. +exec-ddl +CREATE TABLE xy ( + x INT, + y INT NOT NULL, + CHECK (x < 10 AND x > 1), + CHECK (y < 10 AND y > 1), + INDEX secondary (y, x)) +---- + +opt +SELECT x, y FROM xy WHERE x > 5 +---- +select + ├── columns: x:1!null y:2!null + ├── scan xy@secondary + │ ├── columns: x:1 y:2!null + │ └── constraint: /2/1/3: [/2/6 - /9] + └── filters + └── x:1 > 5 [outer=(1), constraints=(/1: [/6 - ]; tight)] + +# Check constraints that can evaluate to NULL are ignored. +exec-ddl +CREATE TABLE null_constraint ( + y INT NOT NULL, + CHECK (y IN (1, 2, NULL)), + INDEX index_1 (y)) +---- + +opt +SELECT y FROM null_constraint WHERE y > 0 +---- +scan null_constraint@index_1 + ├── columns: y:1!null + └── constraint: /1/2: [/1 - ] + +exec-ddl +CREATE TABLE null_constraint_2 ( + y INT NOT NULL, + CHECK ((y IN (1, 2, NULL)) AND (y > 10)), + CHECK (y < 15), + INDEX index_1 (y)) +---- + +opt +SELECT y FROM null_constraint_2 WHERE y > 0 +---- +scan null_constraint_2@index_1 + ├── columns: y:1!null + └── constraint: /1/2: [/1 - /14] + +# Unvalidated constraints are ignored. +exec-ddl +CREATE TABLE check_constraint_validity ( + a int NOT NULL, + INDEX secondary (a), + CONSTRAINT "check:unvalidated" CHECK (a < 10), + CONSTRAINT "check:validated" CHECK (a < 20)) +---- + +opt +SELECT * FROM check_constraint_validity WHERE a > 6 +---- +scan check_constraint_validity@secondary + ├── columns: a:1!null + └── constraint: /1/2: [/7 - /19] + +# Test that we can constrain indexes using the results of now(). +exec-ddl +CREATE TABLE with_time_index (k INT PRIMARY KEY, time TIMESTAMP, INDEX(time)) +---- + +opt +SELECT * FROM with_time_index WHERE time > now() - INTERVAL '1 hour' +---- +scan with_time_index@secondary + ├── columns: k:1!null time:2!null + ├── constraint: /2/1: [/'2017-05-10 12:00:00.000001' - ] + ├── key: (1) + └── fd: (1)-->(2) + +opt +SELECT * FROM with_time_index WHERE time >= 'today' +---- +scan with_time_index@secondary + ├── columns: k:1!null time:2!null + ├── constraint: /2/1: [/'2017-05-10 00:00:00' - ] + ├── key: (1) + └── fd: (1)-->(2) + # Constrained partial index scan. exec-ddl @@ -1283,31 +1661,6 @@ memo (optimized, ~7KB, required=[presentation: k:1]) ├── G8: (variable j) └── G9: (const '{"a": "b"}') -# Query only the primary key with a remaining filter. 2+ paths in containment -# query should favor zigzag joins. -opt -SELECT k FROM b WHERE j @> '{"a": "b", "c": "d"}' ----- -project - ├── columns: k:1!null - ├── immutable - ├── key: (1) - └── inner-join (lookup b) - ├── columns: k:1!null j:4!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── immutable - ├── key: (1) - ├── fd: (1)-->(4) - ├── inner-join (zigzag b@inv_idx b@inv_idx) - │ ├── columns: k:1!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [4] = ['{"a": "b"}'] - │ ├── right fixed columns: [4] = ['{"c": "d"}'] - │ └── filters (true) - └── filters - └── j:4 @> '{"a": "b", "c": "d"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] - # Query requiring an index join with no remaining filter. opt SELECT u, k FROM b WHERE j @> '{"a": "b"}' @@ -1353,46 +1706,6 @@ index-join b ├── constraint: /4/1: [/'{"a": "b"}' - /'{"a": "b"}'] └── key: (1) -# Query requiring a zigzag join with a remaining filter. -# TODO(itsbilal): remove filter from index join if zigzag join covers it. -opt -SELECT j, k FROM b WHERE j @> '{"a": "b", "c": "d"}' ----- -inner-join (lookup b) - ├── columns: j:4!null k:1!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── immutable - ├── key: (1) - ├── fd: (1)-->(4) - ├── inner-join (zigzag b@inv_idx b@inv_idx) - │ ├── columns: k:1!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [4] = ['{"a": "b"}'] - │ ├── right fixed columns: [4] = ['{"c": "d"}'] - │ └── filters (true) - └── filters - └── j:4 @> '{"a": "b", "c": "d"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] - -opt -SELECT * FROM b WHERE j @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' ----- -inner-join (lookup b) - ├── columns: k:1!null u:2 v:3 j:4!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2-4), (3)~~>(1,2,4) - ├── inner-join (zigzag b@inv_idx b@inv_idx) - │ ├── columns: k:1!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [4] = ['{"a": {"b": "c"}}'] - │ ├── right fixed columns: [4] = ['{"a": {"d": "e"}}'] - │ └── filters (true) - └── filters - └── j:4 @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] - opt SELECT * FROM b WHERE j @> '{}' ---- @@ -1483,6 +1796,19 @@ select └── filters └── j:4 @> '{"a": []}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] +opt +SELECT * FROM b WHERE j @> '{"a":[[{"b":{"c":[{"d":"e"}]}}]]}' +---- +index-join b + ├── columns: k:1!null u:2 v:3 j:4!null + ├── immutable + ├── key: (1) + ├── fd: (1)-->(2-4), (3)~~>(1,2,4) + └── scan b@inv_idx + ├── columns: k:1!null + ├── constraint: /4/1: [/'{"a": [[{"b": {"c": [{"d": "e"}]}}]]}' - /'{"a": [[{"b": {"c": [{"d": "e"}]}}]]}'] + └── key: (1) + # GenerateInvertedIndexScans propagates row-level locking information. opt SELECT k FROM b WHERE j @> '{"a": "b"}' FOR UPDATE @@ -1511,29 +1837,6 @@ project ├── constraint: /2/1: [/ARRAY[1] - /ARRAY[1]] └── key: (1) -opt -SELECT k FROM c WHERE a @> ARRAY[1,3,1,5] ----- -project - ├── columns: k:1!null - ├── immutable - ├── key: (1) - └── inner-join (lookup c) - ├── columns: k:1!null a:2!null - ├── key columns: [1] = [1] - ├── lookup columns are key - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2) - ├── inner-join (zigzag c@inv_idx c@inv_idx) - │ ├── columns: k:1!null - │ ├── eq columns: [1] = [1] - │ ├── left fixed columns: [2] = [ARRAY[1]] - │ ├── right fixed columns: [2] = [ARRAY[3]] - │ └── filters (true) - └── filters - └── a:2 @> ARRAY[1,3,1,5] [outer=(2), immutable, constraints=(/2: (/NULL - ])] - opt SELECT k FROM c WHERE a @> ARRAY[]::INT[] ---- @@ -2602,20 +2905,1025 @@ DROP INDEX idx ---- # -------------------------------------------------- -# SplitDisjunction +# GenerateZigzagJoins # -------------------------------------------------- -# TODO(mgartner): PruneAggCols should be pruning columns from the DistinctOn -# and further down the expression tree, ultimately eliminating the index-joins. -# PruneAggCols does not run in this case because normalization rules do not run -# at the root tree generated by an exploration rule. -opt expect=SplitDisjunction -SELECT k FROM d WHERE u = 1 OR v = 1 +exec-ddl +CREATE TABLE pqr +( + p INT PRIMARY KEY, + q INT, + r INT, + s STRING, + t STRING, + INDEX q (q), + INDEX r (r), + INDEX s (s) STORING (r), + INDEX rs (r,s), + INDEX ts (t,s) +) ---- -project - ├── columns: k:1!null - ├── key: (1) - └── distinct-on + +exec-ddl +CREATE TABLE zz ( + a INT8 PRIMARY KEY, + b INT8 NULL, + c INT8 NULL, + INDEX idx_b (b ASC), + CONSTRAINT idx_c UNIQUE (c) +) +---- + +exec-ddl +CREATE TABLE zz_redundant ( + a INT8 PRIMARY KEY, + b INT8 NULL, + c INT8 NULL, + INDEX idx_u (b ASC, c ASC), + INDEX idx_v (b ASC, c ASC) +) +---- + +# Simple zigzag case - where all requested columns are in the indexes being +# joined. +opt +SELECT q,r FROM pqr WHERE q = 1 AND r = 2 +---- +inner-join (zigzag pqr@q pqr@r) + ├── columns: q:2!null r:3!null + ├── eq columns: [1] = [1] + ├── left fixed columns: [2] = [1] + ├── right fixed columns: [3] = [2] + ├── fd: ()-->(2,3) + └── filters + ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + └── r:3 = 2 [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] + +opt +SELECT q,r FROM pqr WHERE q = 1 AND r IS NULL +---- +inner-join (zigzag pqr@q pqr@r) + ├── columns: q:2!null r:3 + ├── eq columns: [1] = [1] + ├── left fixed columns: [2] = [1] + ├── right fixed columns: [3] = [NULL] + ├── fd: ()-->(2,3) + └── filters + ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + └── r:3 IS NULL [outer=(3), constraints=(/3: [/NULL - /NULL]; tight), fd=()-->(3)] + +memo +SELECT q,r FROM pqr WHERE q = 1 AND r = 2 +---- +memo (optimized, ~13KB, required=[presentation: q:2,r:3]) + ├── G1: (select G2 G3) (select G4 G5) (select G6 G7) (select G8 G7) (zigzag-join G3 pqr@q pqr@r) + │ └── [presentation: q:2,r:3] + │ ├── best: (zigzag-join G3 pqr@q pqr@r) + │ └── cost: 1.93 + ├── G2: (scan pqr,cols=(2,3)) + │ └── [] + │ ├── best: (scan pqr,cols=(2,3)) + │ └── cost: 1074.02 + ├── G3: (filters G9 G10) + ├── G4: (index-join G11 pqr,cols=(2,3)) + │ └── [] + │ ├── best: (index-join G11 pqr,cols=(2,3)) + │ └── cost: 75.12 + ├── G5: (filters G10) + ├── G6: (index-join G12 pqr,cols=(2,3)) + │ └── [] + │ ├── best: (index-join G12 pqr,cols=(2,3)) + │ └── cost: 75.12 + ├── G7: (filters G9) + ├── G8: (index-join G13 pqr,cols=(2,3)) + │ └── [] + │ ├── best: (index-join G13 pqr,cols=(2,3)) + │ └── cost: 75.22 + ├── G9: (eq G14 G15) + ├── G10: (eq G16 G17) + ├── G11: (scan pqr@q,cols=(1,2),constrained) + │ └── [] + │ ├── best: (scan pqr@q,cols=(1,2),constrained) + │ └── cost: 14.41 + ├── G12: (scan pqr@r,cols=(1,3),constrained) + │ └── [] + │ ├── best: (scan pqr@r,cols=(1,3),constrained) + │ └── cost: 14.41 + ├── G13: (scan pqr@rs,cols=(1,3),constrained) + │ └── [] + │ ├── best: (scan pqr@rs,cols=(1,3),constrained) + │ └── cost: 14.51 + ├── G14: (variable q) + ├── G15: (const 1) + ├── G16: (variable r) + └── G17: (const 2) + +# Case where the fixed columns are extracted from a complicated expression. +opt +SELECT q,r FROM pqr WHERE q = 1 AND ((r < 1 AND r > 1) OR (r >= 2 AND r <= 2)) +---- +inner-join (zigzag pqr@q pqr@r) + ├── columns: q:2!null r:3!null + ├── eq columns: [1] = [1] + ├── left fixed columns: [2] = [1] + ├── right fixed columns: [3] = [2] + ├── fd: ()-->(2,3) + └── filters + ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + └── ((r:3 < 1) AND (r:3 > 1)) OR ((r:3 >= 2) AND (r:3 <= 2)) [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] + +# Nested zigzag case - zigzag join needs to be wrapped in a lookup join to +# satisfy required columns. +opt +SELECT q,r,s FROM pqr WHERE q = 1 AND r = 2 +---- +inner-join (lookup pqr) + ├── columns: q:2!null r:3!null s:4 + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── fd: ()-->(2,3) + ├── inner-join (zigzag pqr@q pqr@r) + │ ├── columns: p:1!null q:2!null r:3!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [1] + │ ├── right fixed columns: [3] = [2] + │ ├── fd: ()-->(2,3) + │ └── filters + │ ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + │ └── r:3 = 2 [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] + └── filters (true) + +memo +SELECT q,r,s FROM pqr WHERE q = 1 AND r = 2 +---- +memo (optimized, ~15KB, required=[presentation: q:2,r:3,s:4]) + ├── G1: (select G2 G3) (select G4 G5) (select G6 G7) (select G8 G7) (lookup-join G9 G10 pqr,keyCols=[1],outCols=(2-4)) + │ └── [presentation: q:2,r:3,s:4] + │ ├── best: (lookup-join G9 G10 pqr,keyCols=[1],outCols=(2-4)) + │ └── cost: 7.48 + ├── G2: (scan pqr,cols=(2-4)) + │ └── [] + │ ├── best: (scan pqr,cols=(2-4)) + │ └── cost: 1084.02 + ├── G3: (filters G11 G12) + ├── G4: (index-join G13 pqr,cols=(2-4)) + │ └── [] + │ ├── best: (index-join G13 pqr,cols=(2-4)) + │ └── cost: 75.22 + ├── G5: (filters G12) + ├── G6: (index-join G14 pqr,cols=(2-4)) + │ └── [] + │ ├── best: (index-join G14 pqr,cols=(2-4)) + │ └── cost: 75.22 + ├── G7: (filters G11) + ├── G8: (index-join G15 pqr,cols=(2-4)) + │ └── [] + │ ├── best: (index-join G15 pqr,cols=(2-4)) + │ └── cost: 75.32 + ├── G9: (zigzag-join G3 pqr@q pqr@r) + │ └── [] + │ ├── best: (zigzag-join G3 pqr@q pqr@r) + │ └── cost: 1.94 + ├── G10: (filters) + ├── G11: (eq G16 G17) + ├── G12: (eq G18 G19) + ├── G13: (scan pqr@q,cols=(1,2),constrained) + │ └── [] + │ ├── best: (scan pqr@q,cols=(1,2),constrained) + │ └── cost: 14.41 + ├── G14: (scan pqr@r,cols=(1,3),constrained) + │ └── [] + │ ├── best: (scan pqr@r,cols=(1,3),constrained) + │ └── cost: 14.41 + ├── G15: (scan pqr@rs,cols=(1,3,4),constrained) + │ └── [] + │ ├── best: (scan pqr@rs,cols=(1,3,4),constrained) + │ └── cost: 14.61 + ├── G16: (variable q) + ├── G17: (const 1) + ├── G18: (variable r) + └── G19: (const 2) + +# Zigzag with fixed columns of different types. +opt +SELECT q,s FROM pqr WHERE q = 1 AND s = 'foo' +---- +inner-join (zigzag pqr@q pqr@s) + ├── columns: q:2!null s:4!null + ├── eq columns: [1] = [1] + ├── left fixed columns: [2] = [1] + ├── right fixed columns: [4] = ['foo'] + ├── fd: ()-->(2,4) + └── filters + ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] + +memo +SELECT q,s FROM pqr WHERE q = 1 AND s = 'foo' +---- +memo (optimized, ~11KB, required=[presentation: q:2,s:4]) + ├── G1: (select G2 G3) (select G4 G5) (select G6 G7) (zigzag-join G3 pqr@q pqr@s) + │ └── [presentation: q:2,s:4] + │ ├── best: (zigzag-join G3 pqr@q pqr@s) + │ └── cost: 1.94 + ├── G2: (scan pqr,cols=(2,4)) + │ └── [] + │ ├── best: (scan pqr,cols=(2,4)) + │ └── cost: 1074.02 + ├── G3: (filters G8 G9) + ├── G4: (index-join G10 pqr,cols=(2,4)) + │ └── [] + │ ├── best: (index-join G10 pqr,cols=(2,4)) + │ └── cost: 75.12 + ├── G5: (filters G9) + ├── G6: (index-join G11 pqr,cols=(2,4)) + │ └── [] + │ ├── best: (index-join G11 pqr,cols=(2,4)) + │ └── cost: 75.22 + ├── G7: (filters G8) + ├── G8: (eq G12 G13) + ├── G9: (eq G14 G15) + ├── G10: (scan pqr@q,cols=(1,2),constrained) + │ └── [] + │ ├── best: (scan pqr@q,cols=(1,2),constrained) + │ └── cost: 14.41 + ├── G11: (scan pqr@s,cols=(1,4),constrained) + │ └── [] + │ ├── best: (scan pqr@s,cols=(1,4),constrained) + │ └── cost: 14.51 + ├── G12: (variable q) + ├── G13: (const 1) + ├── G14: (variable s) + └── G15: (const 'foo') + +# Zigzag with implicit equality column in addition to primary key: +# indexes on (r,s) and (t,s) should be chosen even though s is not being fixed +# in the ON clause. +opt +SELECT r,t FROM pqr WHERE r = 1 AND t = 'foo' +---- +inner-join (zigzag pqr@rs pqr@ts) + ├── columns: r:3!null t:5!null + ├── eq columns: [4 1] = [4 1] + ├── left fixed columns: [3] = [1] + ├── right fixed columns: [5] = ['foo'] + ├── fd: ()-->(3,5) + └── filters + ├── r:3 = 1 [outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] + └── t:5 = 'foo' [outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] + +memo +SELECT r,t FROM pqr WHERE r = 1 AND t = 'foo' +---- +memo (optimized, ~13KB, required=[presentation: r:3,t:5]) + ├── G1: (select G2 G3) (select G4 G5) (select G6 G5) (select G7 G8) (zigzag-join G3 pqr@rs pqr@ts) + │ └── [presentation: r:3,t:5] + │ ├── best: (zigzag-join G3 pqr@rs pqr@ts) + │ └── cost: 1.95 + ├── G2: (scan pqr,cols=(3,5)) + │ └── [] + │ ├── best: (scan pqr,cols=(3,5)) + │ └── cost: 1074.02 + ├── G3: (filters G9 G10) + ├── G4: (index-join G11 pqr,cols=(3,5)) + │ └── [] + │ ├── best: (index-join G11 pqr,cols=(3,5)) + │ └── cost: 75.12 + ├── G5: (filters G10) + ├── G6: (index-join G12 pqr,cols=(3,5)) + │ └── [] + │ ├── best: (index-join G12 pqr,cols=(3,5)) + │ └── cost: 75.22 + ├── G7: (index-join G13 pqr,cols=(3,5)) + │ └── [] + │ ├── best: (index-join G13 pqr,cols=(3,5)) + │ └── cost: 75.22 + ├── G8: (filters G9) + ├── G9: (eq G14 G15) + ├── G10: (eq G16 G17) + ├── G11: (scan pqr@r,cols=(1,3),constrained) + │ └── [] + │ ├── best: (scan pqr@r,cols=(1,3),constrained) + │ └── cost: 14.41 + ├── G12: (scan pqr@rs,cols=(1,3),constrained) + │ └── [] + │ ├── best: (scan pqr@rs,cols=(1,3),constrained) + │ └── cost: 14.51 + ├── G13: (scan pqr@ts,cols=(1,5),constrained) + │ └── [] + │ ├── best: (scan pqr@ts,cols=(1,5),constrained) + │ └── cost: 14.51 + ├── G14: (variable r) + ├── G15: (const 1) + ├── G16: (variable t) + └── G17: (const 'foo') + +# Zigzag with choice between indexes for multiple equality predicates. +opt +SELECT p,q,r,s FROM pqr WHERE q = 1 AND r = 1 AND s = 'foo' +---- +inner-join (zigzag pqr@q pqr@s) + ├── columns: p:1!null q:2!null r:3!null s:4!null + ├── eq columns: [1] = [1] + ├── left fixed columns: [2] = [1] + ├── right fixed columns: [4] = ['foo'] + ├── key: (1) + ├── fd: ()-->(2-4) + └── filters + ├── q:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + ├── r:3 = 1 [outer=(3), constraints=(/3: [/1 - /1]; tight), fd=()-->(3)] + └── s:4 = 'foo' [outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] + +# Tests for zigzag joins over partial indexes. + +exec-ddl +CREATE TABLE zz_partial ( + k INT PRIMARY KEY, + i INT, + j INT, + b1 BOOL, + b2 BOOL, + s STRING +) +---- + +exec-ddl +CREATE INDEX i ON zz_partial (i) WHERE b1 +---- + +exec-ddl +CREATE INDEX j ON zz_partial (j) WHERE b2 +---- + +# Generate a zigzag join over two partial indexes. +opt expect=GenerateZigzagJoins +SELECT k FROM zz_partial WHERE i = 10 AND b1 AND j = 20 AND b2 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── inner-join (lookup zz_partial) + ├── columns: k:1!null i:2!null j:3!null b1:4!null b2:5!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: ()-->(2-5) + ├── inner-join (zigzag zz_partial@i zz_partial@j) + │ ├── columns: k:1!null i:2!null j:3!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [10] + │ ├── right fixed columns: [3] = [20] + │ ├── fd: ()-->(2,3) + │ └── filters + │ ├── i:2 = 10 [outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] + │ └── j:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] + └── filters (true) + +# Don't generate a zigzag join when the first index predicate is not implied. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE i = 10 AND j = 20 AND b2 +---- +project + └── select + ├── index-join zz_partial + │ └── select + │ ├── scan zz_partial@j,partial + │ └── filters + │ └── j = 20 + └── filters + └── i = 10 + +# Don't generate a zigzag join when the second index predicate is not implied. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE i = 10 AND b1 AND j = 20 +---- +project + └── select + ├── index-join zz_partial + │ └── select + │ ├── scan zz_partial@i,partial + │ └── filters + │ └── i = 10 + └── filters + └── j = 20 + +exec-ddl +DROP INDEX j +---- + +exec-ddl +CREATE INDEX j ON zz_partial (j) +---- + +# Generate a zigzag join over one partial and one non-partial index. +opt expect=GenerateZigzagJoins +SELECT k FROM zz_partial WHERE i = 10 AND b1 AND j = 20 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── inner-join (lookup zz_partial) + ├── columns: k:1!null i:2!null j:3!null b1:4!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: ()-->(2-4) + ├── inner-join (zigzag zz_partial@i zz_partial@j) + │ ├── columns: k:1!null i:2!null j:3!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [10] + │ ├── right fixed columns: [3] = [20] + │ ├── fd: ()-->(2,3) + │ └── filters + │ ├── i:2 = 10 [outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] + │ └── j:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] + └── filters (true) + +# Don't generate a zigzag join when the partial index predicate is not implied. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE i = 10 AND j = 20 +---- +project + └── select + ├── index-join zz_partial + │ └── scan zz_partial@j + │ └── constraint: /3/1: [/20 - /20] + └── filters + └── i = 10 + +exec-ddl +DROP INDEX i +---- + +exec-ddl +DROP INDEX j +---- + +exec-ddl +CREATE INDEX i ON zz_partial (i) WHERE i = 10 +---- + +exec-ddl +CREATE INDEX j ON zz_partial (j) +---- + +# Don't generate a zigzag join when the expression that fixes the left columns +# is removed during partial index implication of the left index. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE i = 10 AND j = 20 +---- +project + └── select + ├── index-join zz_partial + │ └── scan zz_partial@i,partial + └── filters + └── j = 20 + +exec-ddl +DROP INDEX i +---- + +exec-ddl +DROP INDEX j +---- + +exec-ddl +CREATE INDEX i ON zz_partial (i) +---- + +exec-ddl +CREATE INDEX j ON zz_partial (j) WHERE j = 20 +---- + +# Don't generate a zigzag join when the expression that fixes the right columns +# is removed during partial index implication of the right index. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE i = 10 AND j = 20 +---- +project + └── select + ├── index-join zz_partial + │ └── scan zz_partial@j,partial + └── filters + └── i = 10 + +exec-ddl +DROP INDEX i +---- + +exec-ddl +DROP INDEX j +---- + +exec-ddl +CREATE INDEX zz_partial_s ON zz_partial (s) +---- + +exec-ddl +CREATE INDEX j ON zz_partial (j) WHERE s = 'foo' +---- + +# Don't generate a zigzag join when the expression that fixes the left columns +# is removed during partial index implication of the right index. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE s = 'foo' AND j = 20 +---- +project + └── scan zz_partial@j,partial + └── constraint: /3/1: [/20 - /20] + +exec-ddl +DROP INDEX zz_partial_s +---- + +exec-ddl +DROP INDEX j +---- + +exec-ddl +CREATE INDEX i ON zz_partial (i) WHERE s = 'foo' +---- + +exec-ddl +CREATE INDEX zz_partial_s ON zz_partial (s) +---- + +# Don't generate a zigzag join when the expression that fixes the right columns +# is removed during partial index implication of the left index. +opt expect-not=GenerateZigzagJoins format=hide-all +SELECT k FROM zz_partial WHERE i = 10 AND s = 'foo' +---- +project + └── scan zz_partial@i,partial + └── constraint: /2/1: [/10 - /10] + +exec-ddl +DROP INDEX i +---- + +exec-ddl +DROP INDEX zz_partial_s +---- + +exec-ddl +CREATE INDEX i ON zz_partial (i) +---- + +exec-ddl +CREATE INDEX b1 ON zz_partial (b1) WHERE s = 'foo' +---- + +exec-ddl +CREATE INDEX j ON zz_partial (j) +---- + +# The filters should be reset during each iteration over the left and right +# indexes if they are reduced while proving partial index implication. In this +# test, (s = 'foo') must be applied after the zigzag join. +opt expect=GenerateZigzagJoins +SELECT k FROM zz_partial WHERE i = 10 AND j = 20 AND b1 AND s = 'foo' +---- +project + ├── columns: k:1!null + ├── key: (1) + └── inner-join (lookup zz_partial) + ├── columns: k:1!null i:2!null j:3!null b1:4!null s:6!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── key: (1) + ├── fd: ()-->(2-4,6) + ├── inner-join (zigzag zz_partial@i zz_partial@j) + │ ├── columns: k:1!null i:2!null j:3!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [10] + │ ├── right fixed columns: [3] = [20] + │ ├── fd: ()-->(2,3) + │ └── filters + │ ├── i:2 = 10 [outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] + │ └── j:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] + └── filters + ├── b1:4 [outer=(4), constraints=(/4: [/true - /true]; tight), fd=()-->(4)] + └── s:6 = 'foo' [outer=(6), constraints=(/6: [/'foo' - /'foo']; tight), fd=()-->(6)] + +# Don't generate a zigzag which has the PK as its equality columns against +# nullable unique indexes where the primary key is not part of the indexed +# columns. + +# Regression test for #36051: prior to fixing this, we would try to use the PK +# as the equality column here, but it's not actually part of the key so we +# can't zigzag on it. +opt expect-not=GenerateZigzagJoins +SELECT * FROM zz WHERE b IS NULL AND c = 2 +---- +select + ├── columns: a:1!null b:2 c:3!null + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(1-3) + ├── index-join zz + │ ├── columns: a:1!null b:2 c:3 + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ ├── fd: ()-->(1-3) + │ └── scan zz@idx_c + │ ├── columns: a:1!null c:3!null + │ ├── constraint: /3: [/2 - /2] + │ ├── cardinality: [0 - 1] + │ ├── key: () + │ └── fd: ()-->(1,3) + └── filters + └── b:2 IS NULL [outer=(2), constraints=(/2: [/NULL - /NULL]; tight), fd=()-->(2)] + +memo +SELECT p,q,r,s FROM pqr WHERE q = 1 AND r = 1 AND s = 'foo' +---- +memo (optimized, ~31KB, required=[presentation: p:1,q:2,r:3,s:4]) + ├── G1: (select G2 G3) (select G4 G5) (select G6 G7) (select G8 G9) (select G10 G9) (lookup-join G11 G12 pqr,keyCols=[1],outCols=(1-4)) (zigzag-join G3 pqr@q pqr@s) (zigzag-join G3 pqr@q pqr@rs) (lookup-join G13 G9 pqr,keyCols=[1],outCols=(1-4)) + │ └── [presentation: p:1,q:2,r:3,s:4] + │ ├── best: (zigzag-join G3 pqr@q pqr@s) + │ └── cost: 1.95 + ├── G2: (scan pqr,cols=(1-4)) + │ └── [] + │ ├── best: (scan pqr,cols=(1-4)) + │ └── cost: 1094.02 + ├── G3: (filters G14 G15 G16) + ├── G4: (index-join G17 pqr,cols=(1-4)) + │ └── [] + │ ├── best: (index-join G17 pqr,cols=(1-4)) + │ └── cost: 75.22 + ├── G5: (filters G15 G16) + ├── G6: (index-join G18 pqr,cols=(1-4)) + │ └── [] + │ ├── best: (index-join G18 pqr,cols=(1-4)) + │ └── cost: 75.22 + ├── G7: (filters G14 G16) + ├── G8: (index-join G19 pqr,cols=(1-4)) + │ └── [] + │ ├── best: (index-join G19 pqr,cols=(1-4)) + │ └── cost: 21.76 + ├── G9: (filters G14) + ├── G10: (index-join G20 pqr,cols=(1-4)) + │ └── [] + │ ├── best: (index-join G20 pqr,cols=(1-4)) + │ └── cost: 10.51 + ├── G11: (zigzag-join G21 pqr@q pqr@r) + │ └── [] + │ ├── best: (zigzag-join G21 pqr@q pqr@r) + │ └── cost: 1.94 + ├── G12: (filters G16) + ├── G13: (zigzag-join G5 pqr@r pqr@s) + │ └── [] + │ ├── best: (zigzag-join G5 pqr@r pqr@s) + │ └── cost: 1.95 + ├── G14: (eq G22 G23) + ├── G15: (eq G24 G23) + ├── G16: (eq G25 G26) + ├── G17: (scan pqr@q,cols=(1,2),constrained) + │ └── [] + │ ├── best: (scan pqr@q,cols=(1,2),constrained) + │ └── cost: 14.41 + ├── G18: (scan pqr@r,cols=(1,3),constrained) + │ └── [] + │ ├── best: (scan pqr@r,cols=(1,3),constrained) + │ └── cost: 14.41 + ├── G19: (select G27 G28) + │ └── [] + │ ├── best: (select G27 G28) + │ └── cost: 14.73 + ├── G20: (scan pqr@rs,cols=(1,3,4),constrained) + │ └── [] + │ ├── best: (scan pqr@rs,cols=(1,3,4),constrained) + │ └── cost: 4.98 + ├── G21: (filters G14 G15) + ├── G22: (variable q) + ├── G23: (const 1) + ├── G24: (variable r) + ├── G25: (variable s) + ├── G26: (const 'foo') + ├── G27: (scan pqr@s,cols=(1,3,4),constrained) + │ └── [] + │ ├── best: (scan pqr@s,cols=(1,3,4),constrained) + │ └── cost: 14.61 + └── G28: (filters G15) + +# Zigzag joins cannot be planned for indexes where equality columns do not +# immediately follow fixed columns. Here, the only index on t is (t,s,p) and +# s is not a fixed or equal column, so a zigzag join shouldn't be planned. +opt +SELECT q,t FROM pqr WHERE q = 1 AND t = 'foo' +---- +select + ├── columns: q:2!null t:5!null + ├── fd: ()-->(2,5) + ├── index-join pqr + │ ├── columns: q:2 t:5 + │ ├── fd: ()-->(2) + │ └── scan pqr@q + │ ├── columns: p:1!null q:2!null + │ ├── constraint: /2/1: [/1 - /1] + │ ├── key: (1) + │ └── fd: ()-->(2) + └── filters + └── t:5 = 'foo' [outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] + +memo +SELECT q,t FROM pqr WHERE q = 1 AND t = 'foo' +---- +memo (optimized, ~9KB, required=[presentation: q:2,t:5]) + ├── G1: (select G2 G3) (select G4 G5) (select G6 G7) + │ └── [presentation: q:2,t:5] + │ ├── best: (select G4 G5) + │ └── cost: 75.24 + ├── G2: (scan pqr,cols=(2,5)) + │ └── [] + │ ├── best: (scan pqr,cols=(2,5)) + │ └── cost: 1074.02 + ├── G3: (filters G8 G9) + ├── G4: (index-join G10 pqr,cols=(2,5)) + │ └── [] + │ ├── best: (index-join G10 pqr,cols=(2,5)) + │ └── cost: 75.12 + ├── G5: (filters G9) + ├── G6: (index-join G11 pqr,cols=(2,5)) + │ └── [] + │ ├── best: (index-join G11 pqr,cols=(2,5)) + │ └── cost: 75.22 + ├── G7: (filters G8) + ├── G8: (eq G12 G13) + ├── G9: (eq G14 G15) + ├── G10: (scan pqr@q,cols=(1,2),constrained) + │ └── [] + │ ├── best: (scan pqr@q,cols=(1,2),constrained) + │ └── cost: 14.41 + ├── G11: (scan pqr@ts,cols=(1,5),constrained) + │ └── [] + │ ├── best: (scan pqr@ts,cols=(1,5),constrained) + │ └── cost: 14.51 + ├── G12: (variable q) + ├── G13: (const 1) + ├── G14: (variable t) + └── G15: (const 'foo') + +# Don't zigzag on two identical indexes. +memo +SELECT c FROM zz_redundant WHERE b = 1 +---- +memo (optimized, ~6KB, required=[presentation: c:3]) + ├── G1: (project G2 G3 c) + │ └── [presentation: c:3] + │ ├── best: (project G2 G3 c) + │ └── cost: 14.62 + ├── G2: (select G4 G5) (scan zz_redundant@idx_u,cols=(2,3),constrained) (scan zz_redundant@idx_v,cols=(2,3),constrained) + │ └── [] + │ ├── best: (scan zz_redundant@idx_u,cols=(2,3),constrained) + │ └── cost: 14.51 + ├── G3: (projections) + ├── G4: (scan zz_redundant,cols=(2,3)) (scan zz_redundant@idx_u,cols=(2,3)) (scan zz_redundant@idx_v,cols=(2,3)) + │ └── [] + │ ├── best: (scan zz_redundant,cols=(2,3)) + │ └── cost: 1054.02 + ├── G5: (filters G6) + ├── G6: (eq G7 G8) + ├── G7: (variable b) + └── G8: (const 1) + +# GenerateZigzagJoins is disabled in the presence of a row-level locking clause. +opt +SELECT q,r FROM pqr WHERE q = 1 AND r = 2 FOR UPDATE +---- +select + ├── columns: q:2!null r:3!null + ├── volatile + ├── fd: ()-->(2,3) + ├── index-join pqr + │ ├── columns: q:2 r:3 + │ ├── volatile + │ ├── fd: ()-->(2) + │ └── scan pqr@q + │ ├── columns: p:1!null q:2!null + │ ├── constraint: /2/1: [/1 - /1] + │ ├── locking: for-update + │ ├── volatile + │ ├── key: (1) + │ └── fd: ()-->(2) + └── filters + └── r:3 = 2 [outer=(3), constraints=(/3: [/2 - /2]; tight), fd=()-->(3)] + +# -------------------------------------------------- +# GenerateInvertedIndexZigzagJoins +# -------------------------------------------------- + +exec-ddl +CREATE TABLE t5 ( + a INT PRIMARY KEY, + b JSONB, + c INT, + INVERTED INDEX b_idx(b) +) +---- + +# Query only the primary key with a remaining filter. 2+ paths in containment +# query should favor zigzag joins. +opt +SELECT k FROM b WHERE j @> '{"a": "b", "c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inner-join (lookup b) + ├── columns: k:1!null j:4!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── inner-join (zigzag b@inv_idx b@inv_idx) + │ ├── columns: k:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [4] = ['{"a": "b"}'] + │ ├── right fixed columns: [4] = ['{"c": "d"}'] + │ └── filters (true) + └── filters + └── j:4 @> '{"a": "b", "c": "d"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] + +# Query requiring a zigzag join with a remaining filter. +# TODO(itsbilal): remove filter from index join if zigzag join covers it. +opt +SELECT j, k FROM b WHERE j @> '{"a": "b", "c": "d"}' +---- +inner-join (lookup b) + ├── columns: j:4!null k:1!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── inner-join (zigzag b@inv_idx b@inv_idx) + │ ├── columns: k:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [4] = ['{"a": "b"}'] + │ ├── right fixed columns: [4] = ['{"c": "d"}'] + │ └── filters (true) + └── filters + └── j:4 @> '{"a": "b", "c": "d"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] + +opt +SELECT * FROM b WHERE j @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' +---- +inner-join (lookup b) + ├── columns: k:1!null u:2 v:3 j:4!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(2-4), (3)~~>(1,2,4) + ├── inner-join (zigzag b@inv_idx b@inv_idx) + │ ├── columns: k:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [4] = ['{"a": {"b": "c"}}'] + │ ├── right fixed columns: [4] = ['{"a": {"d": "e"}}'] + │ └── filters (true) + └── filters + └── j:4 @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] + +opt +SELECT k FROM c WHERE a @> ARRAY[1,3,1,5] +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inner-join (lookup c) + ├── columns: k:1!null a:2!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(2) + ├── inner-join (zigzag c@inv_idx c@inv_idx) + │ ├── columns: k:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [ARRAY[1]] + │ ├── right fixed columns: [2] = [ARRAY[3]] + │ └── filters (true) + └── filters + └── a:2 @> ARRAY[1,3,1,5] [outer=(2), immutable, constraints=(/2: (/NULL - ])] + +# Two paths. Should generate a zigzag join. +opt +SELECT b,a FROM t5 WHERE b @> '{"a":1, "c":2}' +---- +inner-join (lookup t5) + ├── columns: b:2!null a:1!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(2) + ├── inner-join (zigzag t5@b_idx t5@b_idx) + │ ├── columns: a:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = ['{"a": 1}'] + │ ├── right fixed columns: [2] = ['{"c": 2}'] + │ └── filters (true) + └── filters + └── b:2 @> '{"a": 1, "c": 2}' [outer=(2), immutable, constraints=(/2: (/NULL - ])] + +memo +SELECT a FROM t5 WHERE b @> '{"a":1, "c":2}' +---- +memo (optimized, ~14KB, required=[presentation: a:1]) + ├── G1: (project G2 G3 a) + │ └── [presentation: a:1] + │ ├── best: (project G2 G3 a) + │ └── cost: 100.53 + ├── G2: (select G4 G5) (select G6 G5) (lookup-join G7 G5 t5,keyCols=[1],outCols=(1,2)) + │ └── [] + │ ├── best: (lookup-join G7 G5 t5,keyCols=[1],outCols=(1,2)) + │ └── cost: 100.40 + ├── G3: (projections) + ├── G4: (scan t5,cols=(1,2)) + │ └── [] + │ ├── best: (scan t5,cols=(1,2)) + │ └── cost: 1054.02 + ├── G5: (filters G8) + ├── G6: (index-join G9 t5,cols=(1,2)) + │ └── [] + │ ├── best: (index-join G9 t5,cols=(1,2)) + │ └── cost: 782.82 + ├── G7: (zigzag-join G10 t5@b_idx t5@b_idx) + │ └── [] + │ ├── best: (zigzag-join G10 t5@b_idx t5@b_idx) + │ └── cost: 25.69 + ├── G8: (contains G11 G12) + ├── G9: (scan t5@b_idx,cols=(1),constrained) + │ └── [] + │ ├── best: (scan t5@b_idx,cols=(1),constrained) + │ └── cost: 117.31 + ├── G10: (filters) + ├── G11: (variable b) + └── G12: (const '{"a": 1, "c": 2}') + +# Three or more paths. Should generate zigzag joins. +opt +SELECT b,a FROM t5 WHERE b @> '{"a":[{"b":"c", "d":3}, 5]}' +---- +inner-join (lookup t5) + ├── columns: b:2!null a:1!null + ├── key columns: [1] = [1] + ├── lookup columns are key + ├── immutable + ├── key: (1) + ├── fd: (1)-->(2) + ├── inner-join (zigzag t5@b_idx t5@b_idx) + │ ├── columns: a:1!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = ['{"a": [{"b": "c"}]}'] + │ ├── right fixed columns: [2] = ['{"a": [{"d": 3}]}'] + │ └── filters (true) + └── filters + └── b:2 @> '{"a": [{"b": "c", "d": 3}, 5]}' [outer=(2), immutable, constraints=(/2: (/NULL - ])] + +# GenerateInvertedIndexZigzagJoins is disabled in the presence of a row-level +# locking clause. +opt expect-not=GenerateInvertedIndexZigzagJoins +SELECT b,a FROM t5 WHERE b @> '{"a":1, "c":2}' FOR UPDATE +---- +select + ├── columns: b:2!null a:1!null + ├── volatile + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t5 + │ ├── columns: a:1!null b:2 + │ ├── volatile + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── scan t5@b_idx + │ ├── columns: a:1!null + │ ├── constraint: /2/1: [/'{"a": 1}' - /'{"a": 1}'] + │ ├── locking: for-update + │ ├── volatile + │ └── key: (1) + └── filters + └── b:2 @> '{"a": 1, "c": 2}' [outer=(2), immutable, constraints=(/2: (/NULL - ])] + +# -------------------------------------------------- +# SplitDisjunction +# -------------------------------------------------- + +# TODO(mgartner): PruneAggCols should be pruning columns from the DistinctOn +# and further down the expression tree, ultimately eliminating the index-joins. +# PruneAggCols does not run in this case because normalization rules do not run +# at the root tree generated by an exploration rule. +opt expect=SplitDisjunction +SELECT k FROM d WHERE u = 1 OR v = 1 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── distinct-on ├── columns: k:1!null u:2 v:3 ├── grouping columns: k:1!null ├── key: (1) @@ -3814,7 +5122,6 @@ scalar-group-by └── aggregations └── count-rows [as=count_rows:6] - # Multi-column primary key. opt expect=SplitDisjunctionAddKey SELECT u, v FROM f WHERE u = 1 OR v = 2