Skip to content

Commit

Permalink
opt: add transformation rule to convert left join to inner join
Browse files Browse the repository at this point in the history
Release justification: bug fixes and low-risk updates to new functionality

This commit adds an exploration rule ConvertLeftToInnerJoin, which converts
a left join to an inner join with the same ON condition, and then wraps the
expression in another left join with the original left side. In order to
avoid computing the left side of the join twice, we create a With expression
for the left side, and then reference it with two WithScans. For example
(assuming x is the primary key of a):

  SELECT a.x, b.y FROM a LEFT JOIN b ON ST_Intersects(a.geom, b.geom);

is converted to:

  WITH a_buf AS (
    SELECT * FROM a
  )
  SELECT a_buf.x, inr.y FROM a_buf LEFT JOIN (
    SELECT * FROM a_buf JOIN b ON ST_Intersects(a_buf.geom, b.geom)
  ) AS inr
  ON a_buf.x = inr.x;

Note that this transformation is not desirable in the general case, but it
is useful if there is a possibility of creating an inverted join (such as in
the above example). For this reason, we only perform this transformation if
it is possible to generate an inverted join.

This tranformation allows us to index-accelerate spatial left joins, which
was not possible before.

Informs cockroachdb#53576

Release note (performance improvement): left outer spatial joins can now
be index-accelerated, which can lead to performance improvements in some
cases.
  • Loading branch information
rytaft committed Sep 4, 2020
1 parent 7cd72a7 commit f709177
Show file tree
Hide file tree
Showing 8 changed files with 482 additions and 32 deletions.
105 changes: 105 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,111 @@ ORDER BY rk
13
16

# Left join is supported by having the optimizer convert it to an inner join.
query II
SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom) ORDER BY (lk, rk)
----
1 13
1 16
2 14
2 16
3 12
3 16
4 NULL
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 14
2 16
3 11
3 12
3 16
4 NULL
5 11
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(rtable.geom, ltable.geom1) OR ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 14
2 16
3 11
3 12
3 16
4 NULL
5 11
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom1, 2) ORDER BY (lk, rk)
----
1 13
1 16
2 14
2 16
3 12
3 16
4 NULL
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom2, 2) ORDER BY (lk, rk)
----
1 13
1 16
2 14
2 16
3 12
3 16
4 NULL
5 NULL
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(ltable.geom1, rtable.geom) OR ST_DWithin(rtable.geom, ltable.geom2, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 12
2 13
2 14
2 16
3 12
3 13
3 14
3 16
4 12
4 13
4 14
4 16
5 12
5 16
6 NULL

# These queries perform anti-joins, which do not currently use an inverted join.
query I
SELECT lk FROM ltable WHERE NOT EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(lta
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E

# Left joins are also converted to an inner join by the optimizer.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJyUk9Fv2jAQxt_3V5zuCSRr4AAvkSZ52tI1FSMdZNKkClVpctAMY2e2M1Eh_vcpCRojLQweff4-3---S7Zof0n0cRaMg08xlEbCzTT6Cg_Bj_vxx3ACnc_hLJ59G3dhL5ErBmbVqKRLniTBOLiJ4S4KJ2CaQjQB6x5z5chYSp3tNML3S9Jrzvaq-tSdI0OlM5oka7LoPyDHOcPC6JSs1aYqbWtBmG3Q7zPMVVG6qjxnmGpD6G_R5U4S-ih1mkiwaaLgqVwsyEC_10eGGbkkl7Vpx1CX7vCEdcmS0Oc7dnmbUP0m4yi707ki0-NHHbCZTlTTPeYqow2yv45gU5hWNMJjIIZdZBiVzgcxYIIz4eEpUu8a0opwSklGpue9SVmYfJ2YF2Q41npVFvBT5wq08kFUY71apBgwEKMDbYXKKubRSeDBNcCvN8h7g4s2ODzZ5vB6qbTJyFDWfvH_kjdYbxP7vP8Ehsfhxi8F-TANv9zGEH2Pg2n9dyBDSQvXEV73g8mXz64j-CHJERP8ZIajazKcki20snRRbP1qMsqW1CRldWlSujc6rds0x6j21YWMrGtueXMIVXNVAf5r5mfN3nmzd9Y8PDLztnlwhdlrm4dnzaMW9nz37k8AAAD__-1Ht2g=

# This query performs an anti-join, and does not currently use an inverted join.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,50 @@ project · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# Left joins are also converted to an inner join by the optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)
----
· distribution local · ·
· vectorized false · ·
root · · (lk, rk1) ·
├── project · · (lk, rk1) ·
│ │ estimated row count 10000 (missing stats) · ·
│ └── project · · (lk, geom1, rk1, geom) ·
│ │ estimated row count 10000 (missing stats) · ·
│ └── hash join (right outer) · · (rk1, geom, lk, geom1, lk, geom1) ·
│ │ estimated row count 10000 (missing stats) · ·
│ │ equality (lk) = (lk) · ·
│ │ right cols are key · · ·
│ ├── project · · (rk1, geom, lk, geom1) ·
│ │ │ estimated row count 10000 (missing stats) · ·
│ │ └── lookup join (inner) · · (rk1, rk2, lk, geom1, geom) ·
│ │ │ table rtable@primary · ·
│ │ │ equality (rk1, rk2) = (rk1,rk2) · ·
│ │ │ equality cols are key · · ·
│ │ │ pred st_intersects(geom1, geom) · ·
│ │ └── project · · (rk1, rk2, lk, geom1) ·
│ │ │ estimated row count 10000 (missing stats) · ·
│ │ └── inverted join · · (lk, geom1, rk1, rk2, geom_inverted_key) ·
│ │ │ table rtable@geom_index · ·
│ │ │ inverted expr st_intersects(geom1, geom_inverted_key) · ·
│ │ └── scan buffer · · (lk, geom1) ·
│ │ estimated row count 1000 (missing stats) · ·
│ │ label buffer 1 · ·
│ └── scan buffer · · (lk, geom1) ·
│ estimated row count 1000 (missing stats) · ·
│ label buffer 1 · ·
└── subquery · · · ·
│ id @S1 · ·
│ exec mode all rows · ·
└── buffer · · (lk, geom1) ·
│ label buffer 1 · ·
└── scan · · (lk, geom1) ·
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# This query performs an anti-join, and does not currently use an inverted join.
query TTTTT
EXPLAIN (VERBOSE)
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/opt/exec/explain/emit.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ func Emit(plan *Plan, ob *OutputBuilder, spanFormatFn SpanFormatFn) error {

// This field contains the original subquery (which could have been modified
// by optimizer transformations).
ob.Attr("original sql", tree.AsStringWithFlags(s.ExprNode, tree.FmtSimple))
if s.ExprNode != nil {
ob.Attr("original sql", tree.AsStringWithFlags(s.ExprNode, tree.FmtSimple))
}
var mode string
switch s.Mode {
case exec.SubqueryExists:
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/opt/memo/interner.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,9 @@ func (h *hasher) HashTypedExpr(val tree.TypedExpr) {
}

func (h *hasher) HashStatement(val tree.Statement) {
h.HashUint64(uint64(reflect.ValueOf(val).Pointer()))
if val != nil {
h.HashUint64(uint64(reflect.ValueOf(val).Pointer()))
}
}

func (h *hasher) HashColumnID(val opt.ColumnID) {
Expand Down
Loading

0 comments on commit f709177

Please sign in to comment.