Skip to content

Commit

Permalink
opt: add transformation rule to convert left join to inner join
Browse files Browse the repository at this point in the history
Release justification: bug fixes and low-risk updates to new functionality

This commit adds an exploration rule ConvertLeftToInnerJoin, which converts
a left join to an inner join with the same ON condition, and then wraps the
expression in another left join with the original left side. In order to
avoid computing the left side of the join twice, we create a With expression
for the left side, and then reference it with two WithScans. For example
(assuming x is the primary key of a):

  SELECT a.x, b.y FROM a LEFT JOIN b ON ST_Intersects(a.geom, b.geom);

is converted to:

  WITH a_buf AS (
    SELECT * FROM a
  )
  SELECT a_buf.x, inr.y FROM a_buf LEFT JOIN (
    SELECT * FROM a_buf JOIN b ON ST_Intersects(a_buf.geom, b.geom)
  ) AS inr
  ON a_buf.x = inr.x;

Note that this transformation is not desirable in the general case, but it
is useful if there is a possibility of creating an inverted join (such as in
the above example). For this reason, we only perform this transformation if
it is possible to generate an inverted join.

This tranformation allows us to index-accelerate spatial left joins, which
was not possible before.

Informs cockroachdb#53576

Release note (performance improvement): left outer spatial joins can now
be index-accelerated, which can lead to performance improvements in some
cases.
  • Loading branch information
rytaft committed Sep 6, 2020
1 parent 7cd72a7 commit 8330294
Show file tree
Hide file tree
Showing 8 changed files with 811 additions and 27 deletions.
120 changes: 120 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,126 @@ ORDER BY rk
13
16

# Left join is supported by having the optimizer convert it to an inner join.
query II
SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom) ORDER BY (lk, rk)
----
1 13
1 16
2 14
2 16
3 12
3 16
4 NULL
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 14
2 16
3 11
3 12
3 16
4 NULL
5 11
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(rtable.geom, ltable.geom1) OR ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 14
2 16
3 11
3 12
3 16
4 NULL
5 11
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom1, 2) ORDER BY (lk, rk)
----
1 13
1 16
2 14
2 16
3 12
3 16
4 NULL
5 12
5 16
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom2, 2) ORDER BY (lk, rk)
----
1 13
1 16
2 14
2 16
3 12
3 16
4 NULL
5 NULL
6 NULL

query II
SELECT lk, rk FROM ltable LEFT JOIN rtable
ON ST_Intersects(ltable.geom1, rtable.geom) OR ST_DWithin(rtable.geom, ltable.geom2, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 12
2 13
2 14
2 16
3 12
3 13
3 14
3 16
4 12
4 13
4 14
4 16
5 12
5 16
6 NULL

query III
WITH q AS (
SELECT * FROM ltable WHERE lk > 2
)
SELECT lk, count(*), (SELECT count(*) FROM q) FROM (
SELECT lk, rk
FROM q
LEFT JOIN rtable ON ST_Intersects(q.geom1, rtable.geom)
) GROUP BY lk ORDER BY lk
----
3 2 4
4 1 4
5 2 4
6 1 4

# These queries perform anti-joins, which do not currently use an inverted join.
query I
SELECT lk FROM ltable WHERE NOT EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,26 @@ SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(lta
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E

# Left joins are also converted to an inner join by the optimizer.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk, rk FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJyUk9Fv2jAQxt_3V5zuCSRr4AAvkSZ52tI1FSMdZNKkClVpctAMY2e2M1Eh_vcpCRojLQweff4-3---S7Zof0n0cRaMg08xlEbCzTT6Cg_Bj_vxx3ACnc_hLJ59G3dhL5ErBmbVqKRLniTBOLiJ4S4KJ2CaQjQB6x5z5chYSp3tNML3S9Jrzvaq-tSdI0OlM5oka7LoPyDHOcPC6JSs1aYqbWtBmG3Q7zPMVVG6qjxnmGpD6G_R5U4S-ih1mkiwaaLgqVwsyEC_10eGGbkkl7Vpx1CX7vCEdcmS0Oc7dnmbUP0m4yi707ki0-NHHbCZTlTTPeYqow2yv45gU5hWNMJjIIZdZBiVzgcxYIIz4eEpUu8a0opwSklGpue9SVmYfJ2YF2Q41npVFvBT5wq08kFUY71apBgwEKMDbYXKKubRSeDBNcCvN8h7g4s2ODzZ5vB6qbTJyFDWfvH_kjdYbxP7vP8Ehsfhxi8F-TANv9zGEH2Pg2n9dyBDSQvXEV73g8mXz64j-CHJERP8ZIajazKcki20snRRbP1qMsqW1CRldWlSujc6rds0x6j21YWMrGtueXMIVXNVAf5r5mfN3nmzd9Y8PDLztnlwhdlrm4dnzaMW9nz37k8AAAD__-1Ht2g=

query T
SELECT url FROM [EXPLAIN (DISTSQL)
WITH q AS (
SELECT * FROM ltable WHERE lk > 2
)
SELECT count(*), (SELECT count(*) FROM q) FROM (
SELECT lk, rk
FROM q
LEFT JOIN rtable ON ST_Intersects(q.geom1, rtable.geom)
) GROUP BY lk]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJyUlM9u2kAQxu99itGccLQq2EAOliqZpk5wRO3UGKVRipBjT4iLswu76ypRlHev_CehkEDDCe3M_Ga-nY_1E6pVjjaO3ZF7EkEhczgNg-9w7f68GA08H1rfvHE0_jEy4NKLhrCCwRhaTfVRXZvr-CYnuBy6oQv5An4VnU6XwDKgqUtEwXXryGCv5EukbrBqfl-y-YKBXDQ5GLmnEZwHng-yHhT4oPQs45qkokSr1urznMS9yZqC6mQYcBYGkwv4egX5YooMuUjJj-9JoX2NJk4ZLqVISCkhy9BTVeClD2h3GGZ8WegyPGWYCEloP6HOdE5oYy6SOAeVxBxuittbktBpd5BhSjrO8gp6ZigKvW6hdDwntM1n9vExHv9DUlN6LjJOsm1uTMD6qk551VnGU3pA9kq4D0u5tSLHYuD0DGQYFNoGp8sckzkW7lJqHaK0VBhSnJJsW--qXMrsPpaPyHAkxKJYwm-RcRDcBqe81htDnS4Dp79W22eOxZzuTrXdQ9S-tc9sdz9kX2_nmHX3gguZkqR0u-P_S97ROozVXeN_b3Oz0eOSbAi9s2EEwSRyw-qJIMOcbnXLsYwvMpvf6ZZjrtfY27nA_iELHMznkuaxFrLd3xRVuTnwr2Z-EM38yWjUjD8JJn40C4PLcas8hsRTkjaUf8rmzb98EMo3vzJ26jw-RGdIaim4og952ynXT-mcajuVKGRCF1Ik1Zj6GFRcFUhJ6Tpr1geP16lS4L-wuRe29sPWXri3AZvbcPcA2NqGe3vh_n7Z_b3w8RY8ff70NwAA__-fpxhU

# This query performs an anti-join, and does not currently use an inverted join.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
Expand Down
Loading

0 comments on commit 8330294

Please sign in to comment.