diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial index 5f0c24f24de6..a8855289aaec 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial @@ -183,6 +183,126 @@ ORDER BY rk 13 16 +# Left join is supported by having the optimizer convert it to an inner join. +query II +SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom) ORDER BY (lk, rk) +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +4 NULL +5 12 +5 16 +6 NULL + +query II +SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk) +---- +1 12 +1 13 +1 14 +1 16 +2 14 +2 16 +3 11 +3 12 +3 16 +4 NULL +5 11 +5 12 +5 16 +6 NULL + +query II +SELECT lk, rk FROM ltable LEFT JOIN rtable +ON ST_Intersects(rtable.geom, ltable.geom1) OR ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk) +---- +1 12 +1 13 +1 14 +1 16 +2 14 +2 16 +3 11 +3 12 +3 16 +4 NULL +5 11 +5 12 +5 16 +6 NULL + +query II +SELECT lk, rk FROM ltable LEFT JOIN rtable +ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom1, 2) ORDER BY (lk, rk) +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +4 NULL +5 12 +5 16 +6 NULL + +query II +SELECT lk, rk FROM ltable LEFT JOIN rtable +ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom2, 2) ORDER BY (lk, rk) +---- +1 13 +1 16 +2 14 +2 16 +3 12 +3 16 +4 NULL +5 NULL +6 NULL + +query II +SELECT lk, rk FROM ltable LEFT JOIN rtable +ON ST_Intersects(ltable.geom1, rtable.geom) OR ST_DWithin(rtable.geom, ltable.geom2, 2) ORDER BY (lk, rk) +---- +1 12 +1 13 +1 14 +1 16 +2 12 +2 13 +2 14 +2 16 +3 12 +3 13 +3 14 +3 16 +4 12 +4 13 +4 14 +4 16 +5 12 +5 16 +6 NULL + +query III +WITH q AS ( + SELECT * FROM ltable WHERE lk > 2 +) +SELECT lk, count(*), (SELECT count(*) FROM q) FROM ( + SELECT lk, rk + FROM q + LEFT JOIN rtable ON ST_Intersects(q.geom1, rtable.geom) +) GROUP BY lk ORDER BY lk +---- +3 2 4 +4 1 4 +5 2 4 +6 1 4 + # These queries perform anti-joins, which do not currently use an inverted join. query I SELECT lk FROM ltable WHERE NOT EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom)) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist index 083de9e63c21..96937ab4fcf7 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_dist @@ -87,6 +87,26 @@ SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(lta ---- https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E +# Left joins are also converted to an inner join by the optimizer. +query T +SELECT url FROM [EXPLAIN (DISTSQL) +SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJyUk9Fv2jAQxt_3V5zuCSRr4AAvkSZ52tI1FSMdZNKkClVpctAMY2e2M1Eh_vcpCRojLQweff4-3---S7Zof0n0cRaMg08xlEbCzTT6Cg_Bj_vxx3ACnc_hLJ59G3dhL5ErBmbVqKRLniTBOLiJ4S4KJ2CaQjQB6x5z5chYSp3tNML3S9Jrzvaq-tSdI0OlM5oka7LoPyDHOcPC6JSs1aYqbWtBmG3Q7zPMVVG6qjxnmGpD6G_R5U4S-ih1mkiwaaLgqVwsyEC_10eGGbkkl7Vpx1CX7vCEdcmS0Oc7dnmbUP0m4yi707ki0-NHHbCZTlTTPeYqow2yv45gU5hWNMJjIIZdZBiVzgcxYIIz4eEpUu8a0opwSklGpue9SVmYfJ2YF2Q41npVFvBT5wq08kFUY71apBgwEKMDbYXKKubRSeDBNcCvN8h7g4s2ODzZ5vB6qbTJyFDWfvH_kjdYbxP7vP8Ehsfhxi8F-TANv9zGEH2Pg2n9dyBDSQvXEV73g8mXz64j-CHJERP8ZIajazKcki20snRRbP1qMsqW1CRldWlSujc6rds0x6j21YWMrGtueXMIVXNVAf5r5mfN3nmzd9Y8PDLztnlwhdlrm4dnzaMW9nz37k8AAAD__-1Ht2g= + +query T +SELECT url FROM [EXPLAIN (DISTSQL) +WITH q AS ( + SELECT * FROM ltable WHERE lk > 2 +) +SELECT count(*), (SELECT count(*) FROM q) FROM ( + SELECT lk, rk + FROM q + LEFT JOIN rtable ON ST_Intersects(q.geom1, rtable.geom) +) GROUP BY lk] +---- +https://cockroachdb.github.io/distsqlplan/decode.html#eJyUlM9u2kAQxu99itGccLQq2EAOliqZpk5wRO3UGKVRipBjT4iLswu76ypRlHev_CehkEDDCe3M_Ga-nY_1E6pVjjaO3ZF7EkEhczgNg-9w7f68GA08H1rfvHE0_jEy4NKLhrCCwRhaTfVRXZvr-CYnuBy6oQv5An4VnU6XwDKgqUtEwXXryGCv5EukbrBqfl-y-YKBXDQ5GLmnEZwHng-yHhT4oPQs45qkokSr1urznMS9yZqC6mQYcBYGkwv4egX5YooMuUjJj-9JoX2NJk4ZLqVISCkhy9BTVeClD2h3GGZ8WegyPGWYCEloP6HOdE5oYy6SOAeVxBxuittbktBpd5BhSjrO8gp6ZigKvW6hdDwntM1n9vExHv9DUlN6LjJOsm1uTMD6qk551VnGU3pA9kq4D0u5tSLHYuD0DGQYFNoGp8sckzkW7lJqHaK0VBhSnJJsW--qXMrsPpaPyHAkxKJYwm-RcRDcBqe81htDnS4Dp79W22eOxZzuTrXdQ9S-tc9sdz9kX2_nmHX3gguZkqR0u-P_S97ROozVXeN_b3Oz0eOSbAi9s2EEwSRyw-qJIMOcbnXLsYwvMpvf6ZZjrtfY27nA_iELHMznkuaxFrLd3xRVuTnwr2Z-EM38yWjUjD8JJn40C4PLcas8hsRTkjaUf8rmzb98EMo3vzJ26jw-RGdIaim4og952ynXT-mcajuVKGRCF1Ik1Zj6GFRcFUhJ6Tpr1geP16lS4L-wuRe29sPWXri3AZvbcPcA2NqGe3vh_n7Z_b3w8RY8ff70NwAA__-fpxhU + # This query performs an anti-join, and does not currently use an inverted join. query T SELECT url FROM [EXPLAIN (DISTSQL) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain index e0fea368bdd8..deeea6785bc0 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain @@ -178,6 +178,136 @@ project · · · table ltable@primary · · · spans FULL SCAN · · +# Left joins are also converted to an inner join by the optimizer. +query TTTTT +EXPLAIN (VERBOSE) +SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom) +---- +· distribution local · · +· vectorized false · · +root · · (lk, rk1) · + ├── project · · (lk, rk1) · + │ │ estimated row count 10000 (missing stats) · · + │ └── project · · (lk, geom1, rk1, geom) · + │ │ estimated row count 10000 (missing stats) · · + │ └── hash join (right outer) · · (rk1, geom, lk, geom1, lk, geom1) · + │ │ estimated row count 10000 (missing stats) · · + │ │ equality (lk) = (lk) · · + │ │ right cols are key · · · + │ ├── project · · (rk1, geom, lk, geom1) · + │ │ │ estimated row count 10000 (missing stats) · · + │ │ └── lookup join (inner) · · (rk1, rk2, lk, geom1, geom) · + │ │ │ table rtable@primary · · + │ │ │ equality (rk1, rk2) = (rk1,rk2) · · + │ │ │ equality cols are key · · · + │ │ │ pred st_intersects(geom1, geom) · · + │ │ └── project · · (rk1, rk2, lk, geom1) · + │ │ │ estimated row count 10000 (missing stats) · · + │ │ └── inverted join · · (lk, geom1, rk1, rk2, geom_inverted_key) · + │ │ │ table rtable@geom_index · · + │ │ │ inverted expr st_intersects(geom1, geom_inverted_key) · · + │ │ └── scan buffer · · (lk, geom1) · + │ │ estimated row count 1000 (missing stats) · · + │ │ label buffer 1 · · + │ └── scan buffer · · (lk, geom1) · + │ estimated row count 1000 (missing stats) · · + │ label buffer 1 · · + └── subquery · · · · + │ id @S1 · · + │ exec mode all rows · · + └── buffer · · (lk, geom1) · + │ label buffer 1 · · + └── render · · (lk, geom1) · + │ estimated row count 1000 (missing stats) · · + │ render 0 lk · · + │ render 1 geom1 · · + └── scan · · (lk, geom1) · +· estimated row count 1000 (missing stats) · · +· table ltable@primary · · +· spans FULL SCAN · · + +query TTTTT +EXPLAIN (VERBOSE) +WITH q AS ( + SELECT * FROM ltable WHERE lk > 2 +) +SELECT count(*), (SELECT count(*) FROM q) FROM ( + SELECT lk, rk1 + FROM q + LEFT JOIN rtable ON ST_Intersects(q.geom1, rtable.geom) +) GROUP BY lk +---- +· distribution local · · +· vectorized false · · +root · · (count, count) · + ├── render · · (count, count) · + │ │ estimated row count 333 (missing stats) · · + │ │ render 0 @S3 · · + │ │ render 1 count_rows · · + │ └── group · · (lk, count_rows) · + │ │ estimated row count 333 (missing stats) · · + │ │ aggregate 0 count_rows() · · + │ │ group by lk · · + │ └── project · · (lk) · + │ └── project · · (lk, geom1, geom) · + │ │ estimated row count 3333 (missing stats) · · + │ └── hash join (right outer) · · (geom, lk, geom1, lk, geom1) · + │ │ estimated row count 3333 (missing stats) · · + │ │ equality (lk) = (lk) · · + │ │ right cols are key · · · + │ ├── project · · (geom, lk, geom1) · + │ │ │ estimated row count 3333 (missing stats) · · + │ │ └── lookup join (inner) · · (rk1, rk2, lk, geom1, geom) · + │ │ │ table rtable@primary · · + │ │ │ equality (rk1, rk2) = (rk1,rk2) · · + │ │ │ equality cols are key · · · + │ │ │ pred st_intersects(geom1, geom) · · + │ │ └── project · · (rk1, rk2, lk, geom1) · + │ │ │ estimated row count 3333 (missing stats) · · + │ │ └── inverted join · · (lk, geom1, rk1, rk2, geom_inverted_key) · + │ │ │ table rtable@geom_index · · + │ │ │ inverted expr st_intersects(geom1, geom_inverted_key) · · + │ │ └── scan buffer · · (lk, geom1) · + │ │ estimated row count 333 (missing stats) · · + │ │ label buffer 2 · · + │ └── scan buffer · · (lk, geom1) · + │ estimated row count 333 (missing stats) · · + │ label buffer 2 · · + ├── subquery · · · · + │ │ id @S1 · · + │ │ original sql SELECT * FROM ltable WHERE lk > 2 · · + │ │ exec mode all rows · · + │ └── buffer · · (lk, geom1, geom2) · + │ │ label buffer 1 (q) · · + │ └── scan · · (lk, geom1, geom2) · + │ estimated row count 333 (missing stats) · · + │ table ltable@primary · · + │ spans /3- · · + ├── subquery · · · · + │ │ id @S2 · · + │ │ exec mode all rows · · + │ └── buffer · · (lk, geom1) · + │ │ label buffer 2 · · + │ └── render · · (lk, geom1) · + │ │ estimated row count 333 (missing stats) · · + │ │ render 0 lk · · + │ │ render 1 geom1 · · + │ └── project · · (lk, geom1) · + │ │ estimated row count 333 (missing stats) · · + │ └── scan buffer · · (lk, geom1, geom2) · + │ label buffer 1 (q) · · + └── subquery · · · · + │ id @S3 · · + │ original sql (SELECT count(*) FROM q) · · + │ exec mode one row · · + └── group (scalar) · · (count_rows) · + │ estimated row count 1 (missing stats) · · + │ aggregate 0 count_rows() · · + └── project · · () · + │ estimated row count 333 (missing stats) · · + └── scan buffer · · (lk, geom1, geom2) · +· label buffer 1 (q) · · + # This query performs an anti-join, and does not currently use an inverted join. query TTTTT EXPLAIN (VERBOSE) diff --git a/pkg/sql/opt/exec/explain/emit.go b/pkg/sql/opt/exec/explain/emit.go index f6a6f39cd824..9d6c4ed3bed4 100644 --- a/pkg/sql/opt/exec/explain/emit.go +++ b/pkg/sql/opt/exec/explain/emit.go @@ -71,7 +71,9 @@ func Emit(plan *Plan, ob *OutputBuilder, spanFormatFn SpanFormatFn) error { // This field contains the original subquery (which could have been modified // by optimizer transformations). - ob.Attr("original sql", tree.AsStringWithFlags(s.ExprNode, tree.FmtSimple)) + if s.ExprNode != nil { + ob.Attr("original sql", tree.AsStringWithFlags(s.ExprNode, tree.FmtSimple)) + } var mode string switch s.Mode { case exec.SubqueryExists: diff --git a/pkg/sql/opt/memo/interner.go b/pkg/sql/opt/memo/interner.go index 4a64ca55630e..7f3110d1a926 100644 --- a/pkg/sql/opt/memo/interner.go +++ b/pkg/sql/opt/memo/interner.go @@ -410,7 +410,9 @@ func (h *hasher) HashTypedExpr(val tree.TypedExpr) { } func (h *hasher) HashStatement(val tree.Statement) { - h.HashUint64(uint64(reflect.ValueOf(val).Pointer())) + if val != nil { + h.HashUint64(uint64(reflect.ValueOf(val).Pointer())) + } } func (h *hasher) HashColumnID(val opt.ColumnID) { diff --git a/pkg/sql/opt/xform/custom_funcs.go b/pkg/sql/opt/xform/custom_funcs.go index 3310f1383406..4ca98fb9a724 100644 --- a/pkg/sql/opt/xform/custom_funcs.go +++ b/pkg/sql/opt/xform/custom_funcs.go @@ -2640,6 +2640,239 @@ func (c *CustomFuncs) ConvertIndexToLookupJoinPrivate( } } +// AddWithBinding adds a With binding for the given binding expression. +// Returns the WithID associated with the binding. +func (c *CustomFuncs) AddWithBinding(expr memo.RelExpr) opt.WithID { + withID := c.e.mem.NextWithID() + c.e.mem.Metadata().AddWithBinding(withID, expr) + return withID +} + +// MakeWithScan constructs a WithScan expression that scans the With expression +// with the given WithID. It creates new columns in the metadata for the +// WithScan output columns. +func (c *CustomFuncs) MakeWithScan(withID opt.WithID) memo.RelExpr { + binding := c.e.mem.Metadata().WithBinding(withID).(memo.RelExpr) + cols := binding.Relational().OutputCols + inCols := make(opt.ColList, cols.Len()) + outCols := make(opt.ColList, len(inCols)) + + i := 0 + for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { + colMeta := c.e.mem.Metadata().ColumnMeta(col) + inCols[i] = col + outCols[i] = c.e.mem.Metadata().AddColumn(colMeta.Alias, colMeta.Type) + i++ + } + + return c.e.f.ConstructWithScan(&memo.WithScanPrivate{ + With: withID, + InCols: inCols, + OutCols: outCols, + ID: c.e.mem.Metadata().NextUniqueID(), + }) +} + +// MakeWithScanUsingCols constructs a WithScan expression that scans the With +// expression with the given WithID. It uses the provided columns for the +// output columns of the WithScan. +func (c *CustomFuncs) MakeWithScanUsingCols(withID opt.WithID, outColSet opt.ColSet) memo.RelExpr { + binding := c.e.mem.Metadata().WithBinding(withID).(memo.RelExpr) + inColSet := binding.Relational().OutputCols + if outColSet.Len() != inColSet.Len() { + panic(errors.AssertionFailedf( + "outColSet.Len() must match the number of output columns of the given With expression (%d != %d)", + outColSet.Len(), inColSet.Len(), + )) + } + inCols := make(opt.ColList, inColSet.Len()) + outCols := make(opt.ColList, outColSet.Len()) + + i := 0 + for col, ok := inColSet.Next(0); ok; col, ok = inColSet.Next(col + 1) { + inCols[i] = col + i++ + } + + i = 0 + for col, ok := outColSet.Next(0); ok; col, ok = outColSet.Next(col + 1) { + outCols[i] = col + i++ + } + + return c.e.f.ConstructWithScan(&memo.WithScanPrivate{ + With: withID, + InCols: inCols, + OutCols: outCols, + ID: c.e.mem.Metadata().NextUniqueID(), + }) +} + +// MakeWithScanKeyEqualityFilters takes two WithScan expressions that scan the +// same With expression. It returns a filters expression that contains equality +// conditions between the primary key columns from each side. For example, +// if WithScans a and b are both scanning a With expression that has key +// columns x and y, MakeWithScanKeyEqualityFilters will return filters +// a.x = b.x AND a.y = b.y. +func (c *CustomFuncs) MakeWithScanKeyEqualityFilters(left, right opt.Expr) memo.FiltersExpr { + leftWithScan := left.(*memo.WithScanExpr) + rightWithScan := right.(*memo.WithScanExpr) + if leftWithScan.With != rightWithScan.With { + panic(errors.AssertionFailedf( + "attempt to make equality filters between WithScans of different With expressions", + )) + } + if !leftWithScan.InCols.Equals(rightWithScan.InCols) { + panic(errors.AssertionFailedf( + "attempt to make equality filters between WithScans with different input columns", + )) + } + + binding := c.e.mem.Metadata().WithBinding(leftWithScan.With).(memo.RelExpr) + keyCols, ok := binding.Relational().FuncDeps.StrictKey() + if !ok { + panic(errors.AssertionFailedf("WithBinding has no key (was EnsureKey called?)")) + } + + filters := make(memo.FiltersExpr, keyCols.Len()) + keyIdx := 0 + for i := 0; i < len(leftWithScan.InCols); i++ { + if !keyCols.Contains(leftWithScan.InCols[i]) { + continue + } + + filters[keyIdx] = c.e.f.ConstructFiltersItem(c.e.f.ConstructEq( + c.e.f.ConstructVariable(leftWithScan.OutCols[i]), + c.e.f.ConstructVariable(rightWithScan.OutCols[i]), + )) + keyIdx++ + } + + return filters +} + +// MakeWithPrivate returns a WithPrivate containing the given WithID. +func (c *CustomFuncs) MakeWithPrivate(id opt.WithID) *memo.WithPrivate { + return &memo.WithPrivate{ + ID: id, + } +} + +// ReplaceOutputCols replaces the output columns of the given expression by +// wrapping the expression in a project expression that projects each of the +// original output columns as a new column with a new ColumnID. +func (c *CustomFuncs) ReplaceOutputCols(expr memo.RelExpr) memo.RelExpr { + srcCols := expr.Relational().OutputCols + projections := make(memo.ProjectionsExpr, srcCols.Len()) + + i := 0 + for srcCol, ok := srcCols.Next(0); ok; srcCol, ok = srcCols.Next(srcCol + 1) { + colMeta := c.e.mem.Metadata().ColumnMeta(srcCol) + dstCol := c.e.mem.Metadata().AddColumn(colMeta.Alias, colMeta.Type) + projections[i] = c.e.f.ConstructProjectionsItem(c.e.f.ConstructVariable(srcCol), dstCol) + i++ + } + + return c.e.f.ConstructProject(expr, projections, opt.ColSet{}) +} + +// MapFilterCols returns a new FiltersExpr with all the src column IDs in +// the input expression replaced with column IDs in dst. +// +// NOTE: Every ColumnID in src must map to the a ColumnID in dst with the same +// relative position in the ColSets. For example, if src and dst are (1, 5, 6) +// and (7, 12, 15), then the following mapping would be applied: +// +// 1 => 7 +// 5 => 12 +// 6 => 15 +func (c *CustomFuncs) MapFilterCols( + filters memo.FiltersExpr, src, dst opt.ColSet, +) memo.FiltersExpr { + if src.Len() != dst.Len() { + panic(errors.AssertionFailedf( + "src and dst must have the same number of columns, src: %v, dst: %v", + src, + dst, + )) + } + + // Map each column in src to a column in dst based on the relative position + // of both the src and dst ColumnIDs in the ColSet. + var colMap opt.ColMap + dstCol, _ := dst.Next(0) + for srcCol, ok := src.Next(0); ok; srcCol, ok = src.Next(srcCol + 1) { + colMap.Set(int(srcCol), int(dstCol)) + dstCol, _ = dst.Next(dstCol + 1) + } + + newFilters := c.RemapCols(&filters, colMap).(*memo.FiltersExpr) + return *newFilters +} + +// CanGenerateInvertedJoin is a best-effort check that returns true if it +// may be possible to generate an inverted join with the given left and right +// inputs and on conditions. It may return some false positives, but it is +// used to avoid applying certain rules such as ConvertLeftToInnerJoin in cases +// where they will not be beneficial. +func (c *CustomFuncs) CanGenerateInvertedJoin(left, right memo.RelExpr, on memo.FiltersExpr) bool { + if !c.exprCanGenerateInvertedJoin(left) && !c.exprCanGenerateInvertedJoin(right) { + return false + } + + for i := range on { + if c.exprContainsGeoIndexRelationship(on[i].Condition) { + return true + } + } + + return false +} + +// exprCanGenerateInvertedJoin returns true if the input is either a canonical +// Scan or a Select wrapping a canonical Scan on a table that has inverted +// indexes. These are the conditions checked by GenerateInvertedJoins and +// GenerateInvertedJoinsFromSelect, so those rules can only match if +// exprCanGenerateInvertedJoin returns true. +func (c *CustomFuncs) exprCanGenerateInvertedJoin(expr memo.RelExpr) bool { + scan, ok := expr.(*memo.ScanExpr) + if !ok { + sel, ok := expr.(*memo.SelectExpr) + if !ok { + return false + } + scan, ok = sel.Input.(*memo.ScanExpr) + if !ok { + return false + } + } + + if c.IsCanonicalScan(&scan.ScanPrivate) && c.HasInvertedIndexes(&scan.ScanPrivate) { + return true + } + + return false +} + +// exprContainsGeoIndexRelationship returns true if the given expression +// contains a geospatial function or bounding box operator that can be +// index accelerated. It is not a guarantee that an inverted join will be +// produced for the given ON condition, but it eliminates expressions that +// definitely cannot produce an inverted join. +func (c *CustomFuncs) exprContainsGeoIndexRelationship(expr opt.ScalarExpr) bool { + switch t := expr.(type) { + case *memo.AndExpr: + return c.exprContainsGeoIndexRelationship(t.Left) || c.exprContainsGeoIndexRelationship(t.Right) + case *memo.OrExpr: + return c.exprContainsGeoIndexRelationship(t.Left) || c.exprContainsGeoIndexRelationship(t.Right) + default: + if _, ok := invertedidx.GetGeoIndexRelationship(expr); ok { + return true + } + return false + } +} + // ---------------------------------------------------------------------- // // GroupBy Rules @@ -3038,25 +3271,7 @@ func (c *CustomFuncs) canMaybeConstrainIndexWithCols(sp *memo.ScanPrivate, cols func (c *CustomFuncs) MapScanFilterCols( filters memo.FiltersExpr, src *memo.ScanPrivate, dst *memo.ScanPrivate, ) memo.FiltersExpr { - if src.Cols.Len() != dst.Cols.Len() { - panic(errors.AssertionFailedf( - "src and dst must have the same number of columns, src.Cols: %v, dst.Cols: %v", - src.Cols, - dst.Cols, - )) - } - - // Map each column in src to a column in dst based on the relative position - // of both the src and dst ColumnIDs in the ColSet. - var colMap opt.ColMap - dstCol, _ := dst.Cols.Next(0) - for srcCol, ok := src.Cols.Next(0); ok; srcCol, ok = src.Cols.Next(srcCol + 1) { - colMap.Set(int(srcCol), int(dstCol)) - dstCol, _ = dst.Cols.Next(dstCol + 1) - } - - newFilters := c.RemapCols(&filters, colMap).(*memo.FiltersExpr) - return *newFilters + return c.MapFilterCols(filters, src.Cols, dst.Cols) } // MakeSetPrivateForSplitDisjunction constructs a new SetPrivate with column sets diff --git a/pkg/sql/opt/xform/rules/join.opt b/pkg/sql/opt/xform/rules/join.opt index a29c3dc125e2..0645f93b9b08 100644 --- a/pkg/sql/opt/xform/rules/join.opt +++ b/pkg/sql/opt/xform/rules/join.opt @@ -118,6 +118,82 @@ (OutputCols $left) ) +# ConvertLeftToInnerJoin converts a left join to an inner join with the same +# ON condition, and then wraps the expression in another left join with the +# original left side. In order to avoid computing the left side of the join +# twice, we create a With expression for the left side, and then reference it +# with two WithScans. For example (assuming x is the primary key of a): +# +# SELECT a.x, b.y FROM a LEFT JOIN b ON ST_Intersects(a.geom, b.geom); +# +# is converted to: +# +# WITH a_buf AS ( +# SELECT * FROM a +# ) +# SELECT a_buf.x, inr.y FROM a_buf LEFT JOIN ( +# SELECT * FROM a_buf JOIN b ON ST_Intersects(a_buf.geom, b.geom) +# ) AS inr +# ON a_buf.x = inr.x; +# +# Note that this transformation is not desirable in the general case, but it +# is useful if there is a possibility of creating an inverted join (such as in +# the above example). For this reason, we only perform this transformation if +# CanGenerateInvertedJoin returns true. +# +# We also don't match if the right hand side is an InnerJoin, to prevent this +# rule from matching an infinite number of times. +# +# There is some added complexity due to the need to maintain the same output +# column IDs. Therefore, the WithScan in the outer left join must retain the +# original column IDs, but the binding expression of the With and the WithScan +# in the inner join must have different column IDs. See the ReplaceOutputCols +# and MakeWithScanUsingCols functions for details about how this works. Only +# the columns from the outer left side and the inner right side are ultimately +# projected. +# +# TODO(rytaft): This is a temporary solution to support index acceleration of +# geospatial left joins. Eventually we would like to support index acceleration +# of left joins directly by adding some extra bookkeeping to the execution +# operators. See #53576. +[ConvertLeftToInnerJoin, Explore] +(LeftJoin + $left:* + $right:^(InnerJoin) + $on:* & (CanGenerateInvertedJoin $left $right $on) + $private:* & (NoJoinHints $private) +) +=> +(With + $bindingExpr:(ReplaceOutputCols $newLeft:(EnsureKey $left)) + (Project + (LeftJoin + $outerLeft:(MakeWithScanUsingCols + $id:(AddWithBinding $bindingExpr) + (OutputCols $newLeft) + ) + (InnerJoin + $innerLeft:(MakeWithScan $id) + $right + (MapFilterCols + $on + (OutputCols $newLeft) + (OutputCols $innerLeft) + ) + (EmptyJoinPrivate) + ) + (MakeWithScanKeyEqualityFilters + $outerLeft + $innerLeft + ) + $private + ) + [] + (OutputCols2 $left $right) + ) + (MakeWithPrivate $id) +) + # GenerateMergeJoins creates MergeJoin operators for the join, using the # interesting orderings property. [GenerateMergeJoins, Explore] diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 89c6b57a68fd..b9d8b3ca98ca 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -3603,7 +3603,7 @@ project └── const-agg [as=c.geom:10, outer=(10)] └── c.geom:10 -# We don't yet support inverted joins with left- or anti-joins. +# We don't yet support inverted joins with anti-joins. opt expect=GenerateInvertedJoins SELECT * FROM nyc_census_blocks AS c @@ -3626,27 +3626,246 @@ anti-join (cross) └── filters └── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable] -opt expect-not=GenerateInvertedJoins +# Left joins are supported by converting them to an inner join wrapped in a +# left join. +opt expect=(GenerateInvertedJoins,ConvertLeftToInnerJoin) SELECT * FROM nyc_census_blocks AS c -LEFT JOIN nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n ON ST_Covers(c.geom, n.geom) +LEFT JOIN nyc_neighborhoods AS n ON ST_Covers(c.geom, n.geom) ---- -left-join (cross) +with &1 ├── columns: gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 boroname:9 geom:10 gid:13 boroname:14 name:15 geom:16 ├── immutable ├── key: (1,13) ├── fd: (1)-->(2-10), (13)-->(14-16) + ├── project + │ ├── columns: gid:19!null blkid:20 popn_total:21 popn_white:22 popn_black:23 popn_nativ:24 popn_asian:25 popn_other:26 boroname:27 geom:28 + │ ├── key: (19) + │ ├── fd: (19)-->(20-28) + │ ├── scan c + │ │ ├── columns: c.gid:1!null c.blkid:2 c.popn_total:3 c.popn_white:4 c.popn_black:5 c.popn_nativ:6 c.popn_asian:7 c.popn_other:8 c.boroname:9 c.geom:10 + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2-10) + │ └── projections + │ ├── c.gid:1 [as=gid:19, outer=(1)] + │ ├── c.blkid:2 [as=blkid:20, outer=(2)] + │ ├── c.popn_total:3 [as=popn_total:21, outer=(3)] + │ ├── c.popn_white:4 [as=popn_white:22, outer=(4)] + │ ├── c.popn_black:5 [as=popn_black:23, outer=(5)] + │ ├── c.popn_nativ:6 [as=popn_nativ:24, outer=(6)] + │ ├── c.popn_asian:7 [as=popn_asian:25, outer=(7)] + │ ├── c.popn_other:8 [as=popn_other:26, outer=(8)] + │ ├── c.boroname:9 [as=boroname:27, outer=(9)] + │ └── c.geom:10 [as=geom:28, outer=(10)] + └── project + ├── columns: c.gid:1!null c.blkid:2 c.popn_total:3 c.popn_white:4 c.popn_black:5 c.popn_nativ:6 c.popn_asian:7 c.popn_other:8 c.boroname:9 c.geom:10 n.gid:13 n.boroname:14 name:15 n.geom:16 + ├── immutable + ├── fd: (1)-->(2-10), (13)-->(14-16) + └── right-join (hash) + ├── columns: c.gid:1!null c.blkid:2 c.popn_total:3 c.popn_white:4 c.popn_black:5 c.popn_nativ:6 c.popn_asian:7 c.popn_other:8 c.boroname:9 c.geom:10 n.gid:13 n.boroname:14 name:15 n.geom:16 gid:29 geom:38 + ├── immutable + ├── key: (1,13,29) + ├── fd: (1)-->(2-10), (29)-->(38), (13)-->(14-16) + ├── inner-join (lookup nyc_neighborhoods) + │ ├── columns: n.gid:13!null n.boroname:14 name:15 n.geom:16 gid:29!null geom:38 + │ ├── key columns: [13] = [13] + │ ├── lookup columns are key + │ ├── immutable + │ ├── key: (13,29) + │ ├── fd: (29)-->(38), (13)-->(14-16) + │ ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx) + │ │ ├── columns: n.gid:13!null gid:29!null geom:38 + │ │ ├── inverted-expr + │ │ │ └── st_covers(geom:38, n.geom:16) + │ │ ├── key: (13,29) + │ │ ├── fd: (29)-->(38) + │ │ ├── with-scan &1 + │ │ │ ├── columns: gid:29!null geom:38 + │ │ │ ├── mapping: + │ │ │ │ ├── gid:19 => gid:29 + │ │ │ │ └── geom:28 => geom:38 + │ │ │ ├── key: (29) + │ │ │ └── fd: (29)-->(38) + │ │ └── filters (true) + │ └── filters + │ └── st_covers(geom:38, n.geom:16) [outer=(16,38), immutable] + ├── with-scan &1 + │ ├── columns: c.gid:1!null c.blkid:2 c.popn_total:3 c.popn_white:4 c.popn_black:5 c.popn_nativ:6 c.popn_asian:7 c.popn_other:8 c.boroname:9 c.geom:10 + │ ├── mapping: + │ │ ├── gid:19 => c.gid:1 + │ │ ├── blkid:20 => c.blkid:2 + │ │ ├── popn_total:21 => c.popn_total:3 + │ │ ├── popn_white:22 => c.popn_white:4 + │ │ ├── popn_black:23 => c.popn_black:5 + │ │ ├── popn_nativ:24 => c.popn_nativ:6 + │ │ ├── popn_asian:25 => c.popn_asian:7 + │ │ ├── popn_other:26 => c.popn_other:8 + │ │ ├── boroname:27 => c.boroname:9 + │ │ └── geom:28 => c.geom:10 + │ ├── key: (1) + │ └── fd: (1)-->(2-10) + └── filters + └── c.gid:1 = gid:29 [outer=(1,29), constraints=(/1: (/NULL - ]; /29: (/NULL - ]), fd=(1)==(29), (29)==(1)] + +# Case with a CTE and GROUP BY wrapping the left join. +opt expect=(GenerateInvertedJoins,ConvertLeftToInnerJoin) +WITH q AS ( + SELECT * FROM nyc_census_blocks WHERE boroname = 'Manhattan' +) +SELECT count(*), (SELECT count(*) FROM q) FROM ( + SELECT n.boroname + FROM q + LEFT JOIN nyc_neighborhoods AS n ON ST_Intersects(q.geom, n.geom) +) GROUP BY boroname +---- +with &1 (q) + ├── columns: count:29!null count:41 + ├── immutable + ├── fd: ()-->(41) + ├── select + │ ├── columns: nyc_census_blocks.gid:1!null nyc_census_blocks.blkid:2 nyc_census_blocks.popn_total:3 nyc_census_blocks.popn_white:4 nyc_census_blocks.popn_black:5 nyc_census_blocks.popn_nativ:6 nyc_census_blocks.popn_asian:7 nyc_census_blocks.popn_other:8 nyc_census_blocks.boroname:9!null nyc_census_blocks.geom:10 + │ ├── key: (1) + │ ├── fd: ()-->(9), (1)-->(2-8,10) + │ ├── scan nyc_census_blocks + │ │ ├── columns: nyc_census_blocks.gid:1!null nyc_census_blocks.blkid:2 nyc_census_blocks.popn_total:3 nyc_census_blocks.popn_white:4 nyc_census_blocks.popn_black:5 nyc_census_blocks.popn_nativ:6 nyc_census_blocks.popn_asian:7 nyc_census_blocks.popn_other:8 nyc_census_blocks.boroname:9 nyc_census_blocks.geom:10 + │ │ ├── key: (1) + │ │ └── fd: (1)-->(2-10) + │ └── filters + │ └── nyc_census_blocks.boroname:9 = 'Manhattan' [outer=(9), constraints=(/9: [/'Manhattan' - /'Manhattan']; tight), fd=()-->(9)] + └── project + ├── columns: count:41 count_rows:29!null + ├── immutable + ├── fd: ()-->(41) + ├── group-by + │ ├── columns: n.boroname:24 count_rows:29!null + │ ├── grouping columns: n.boroname:24 + │ ├── immutable + │ ├── key: (24) + │ ├── fd: (24)-->(29) + │ ├── with &2 + │ │ ├── columns: geom:22 n.boroname:24 n.geom:26 + │ │ ├── immutable + │ │ ├── project + │ │ │ ├── columns: geom:43 rownum:44!null + │ │ │ ├── key: (44) + │ │ │ ├── fd: (44)-->(43) + │ │ │ ├── ordinality + │ │ │ │ ├── columns: geom:22 rownum:42!null + │ │ │ │ ├── key: (42) + │ │ │ │ ├── fd: (42)-->(22) + │ │ │ │ └── with-scan &1 (q) + │ │ │ │ ├── columns: geom:22 + │ │ │ │ └── mapping: + │ │ │ │ └── nyc_census_blocks.geom:10 => geom:22 + │ │ │ └── projections + │ │ │ ├── geom:22 [as=geom:43, outer=(22)] + │ │ │ └── rownum:42 [as=rownum:44, outer=(42)] + │ │ └── project + │ │ ├── columns: geom:22 n.boroname:24 n.geom:26 + │ │ ├── immutable + │ │ └── right-join (hash) + │ │ ├── columns: geom:22 n.boroname:24 n.geom:26 rownum:42!null geom:45 rownum:46 + │ │ ├── immutable + │ │ ├── fd: (42)-->(22), (46)-->(45) + │ │ ├── inner-join (lookup nyc_neighborhoods) + │ │ │ ├── columns: n.boroname:24 n.geom:26 geom:45 rownum:46!null + │ │ │ ├── key columns: [23] = [23] + │ │ │ ├── lookup columns are key + │ │ │ ├── immutable + │ │ │ ├── fd: (46)-->(45) + │ │ │ ├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx) + │ │ │ │ ├── columns: n.gid:23!null geom:45 rownum:46!null + │ │ │ │ ├── inverted-expr + │ │ │ │ │ └── st_intersects(geom:45, n.geom:26) + │ │ │ │ ├── key: (23,46) + │ │ │ │ ├── fd: (46)-->(45) + │ │ │ │ ├── with-scan &2 + │ │ │ │ │ ├── columns: geom:45 rownum:46!null + │ │ │ │ │ ├── mapping: + │ │ │ │ │ │ ├── geom:43 => geom:45 + │ │ │ │ │ │ └── rownum:44 => rownum:46 + │ │ │ │ │ ├── key: (46) + │ │ │ │ │ └── fd: (46)-->(45) + │ │ │ │ └── filters (true) + │ │ │ └── filters + │ │ │ └── st_intersects(geom:45, n.geom:26) [outer=(26,45), immutable] + │ │ ├── with-scan &2 + │ │ │ ├── columns: geom:22 rownum:42!null + │ │ │ ├── mapping: + │ │ │ │ ├── geom:43 => geom:22 + │ │ │ │ └── rownum:44 => rownum:42 + │ │ │ ├── key: (42) + │ │ │ └── fd: (42)-->(22) + │ │ └── filters + │ │ └── rownum:42 = rownum:46 [outer=(42,46), constraints=(/42: (/NULL - ]; /46: (/NULL - ]), fd=(42)==(46), (46)==(42)] + │ └── aggregations + │ └── count-rows [as=count_rows:29] + └── projections + └── subquery [as=count:41, subquery] + └── scalar-group-by + ├── columns: count_rows:40!null + ├── cardinality: [1 - 1] + ├── key: () + ├── fd: ()-->(40) + ├── with-scan &1 (q) + │ └── mapping: + └── aggregations + └── count-rows [as=count_rows:40] + +# No-op due to inner join on the right side. +opt expect-not=ConvertLeftToInnerJoin +SELECT * +FROM nyc_census_blocks AS c +LEFT JOIN ( + SELECT n1.*, n2.name FROM nyc_neighborhoods n1 JOIN nyc_neighborhoods n2 ON n1.boroname LIKE n2.boroname +) AS n ON ST_Covers(c.geom, n.geom) +---- +project + ├── columns: gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 boroname:9 geom:10 gid:13 boroname:14 name:15 geom:16 name:21 + ├── immutable + ├── fd: (1)-->(2-10), (13)-->(14-16) + └── left-join (cross) + ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 n1.gid:13 n1.boroname:14 n1.name:15 n1.geom:16 n2.boroname:20 n2.name:21 + ├── immutable + ├── fd: (1)-->(2-10), (13)-->(14-16) + ├── scan c + │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 + │ ├── key: (1) + │ └── fd: (1)-->(2-10) + ├── inner-join (cross) + │ ├── columns: n1.gid:13!null n1.boroname:14!null n1.name:15 n1.geom:16 n2.boroname:20!null n2.name:21 + │ ├── fd: (13)-->(14-16) + │ ├── scan n1 + │ │ ├── columns: n1.gid:13!null n1.boroname:14 n1.name:15 n1.geom:16 + │ │ ├── key: (13) + │ │ └── fd: (13)-->(14-16) + │ ├── scan n2 + │ │ └── columns: n2.boroname:20 n2.name:21 + │ └── filters + │ └── n1.boroname:14 LIKE n2.boroname:20 [outer=(14,20), constraints=(/14: (/NULL - ]; /20: (/NULL - ])] + └── filters + └── st_covers(c.geom:10, n1.geom:16) [outer=(10,16), immutable] + +# No-op due to lack of geospatial function in the ON condition. +opt expect-not=(GenerateInvertedJoins,ConvertLeftToInnerJoin) +SELECT * +FROM nyc_census_blocks AS c +LEFT JOIN nyc_neighborhoods AS n ON c.boroname LIKE n.boroname +---- +left-join (cross) + ├── columns: gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 boroname:9 geom:10 gid:13 boroname:14 name:15 geom:16 + ├── key: (1,13) + ├── fd: (1)-->(2-10), (13)-->(14-16) ├── scan c │ ├── columns: c.gid:1!null blkid:2 popn_total:3 popn_white:4 popn_black:5 popn_nativ:6 popn_asian:7 popn_other:8 c.boroname:9 c.geom:10 │ ├── key: (1) │ └── fd: (1)-->(2-10) ├── scan n │ ├── columns: n.gid:13!null n.boroname:14 name:15 n.geom:16 - │ ├── flags: force-index=nyc_neighborhoods_geo_idx │ ├── key: (13) │ └── fd: (13)-->(14-16) └── filters - └── st_covers(c.geom:10, n.geom:16) [outer=(10,16), immutable] + └── c.boroname:9 LIKE n.boroname:14 [outer=(9,14), constraints=(/9: (/NULL - ]; /14: (/NULL - ])] # Bounding box operations. opt expect=GenerateInvertedJoins