Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: use paired joins for left semi inverted joins #55986

Merged
merged 1 commit into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0
----
1 13

# These queries perform semi-joins, which are converted to inner joins by the
# These queries perform semi-joins, which are converted to paired joins by the
# optimizer.
query I
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzsVUFv2jAYve9XWN-lIBmIE6DUp1QrTKlo0gGHTRWqMuJ1WVM7s52uVcV_n5JUKwlgUk27wQnb7_l9ed_35BdQvxKgMB9Pxx8XKJMJmsyCK3Qz_nI9Pfd81Lrw5ov552kbvUKSe4zkfYlKdPgtYegy8Hwki__uHRMPtzGP2BMKfNRS-jbmmknFVlq1Snw3xxD8yihWbXTuXyClb1fikckK0K4CS2R-b_Q9S5Ln37H-EfPWBgSjqgyxrDYKZqhaysl14PmLFulaiHSt9klNZQkYuIiYHz4wBfQGCGCwAYMDSwypFCumlJD50UsB9KInoBaGmKeZzreXGFZCMqAvoGOdMKCwyAVmLIyY7FmAIWI6jJPi-rJkN5XxQyifAcM8DbmiqNOzYbnGIDL9dq_S4R0DSta4ubbHH5nULLoUMWey51Tlt5oH-C9j_JTKeiNdGyN3UO-a6xS7e1pUcop-dC1K6WQanC9Gu1pjEYtYW7-J5Uzq6xNK6adxcDVezL6W2oAhyDRFLsGujV0Hu_29BtrvMTA37rV3w53mvfVuKsR9lqKfIuZIcIrcfl7VVhoKQ4Y7TRzuNXGIUc77jyYOqybut8_Za9-baxkXMmKSRRXLlusdBvuiI9LeWQ24W7pfkSbNY0eaxa5nd3pOw-AdUK8Fr38MXj14BwzcCN7pMXjbwbObT7_dcPqdTsPZP6Bdm_3Bcfbrs3_AwI3ZHx1n3_zo7LBvxlQquGKN3hQrf5RYdMfKF0yJTK7YtRSrQqZcBgWv2IiY0uUpKRceL4_yAjfJxEi2zWTbSHYqZFInO-ayLbN038gemMkDI3loJg__5aNPjeSRWXlkJJ-ZyWfvKnu5_vAnAAD__96MMnU=

# This query performs a semi-join, which is converted to an inner join by the
# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query T
SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzElVFP2zAQx9_3Kax7opu71klaIE-ZRqZ16lrWIg0JVSg0J8gIcWY7Ewj1u09JKtKG1k4Ho49J7uf7-ey_8gjydwwuTP2h__mMZCImXybj7-TCPz8dfhqMyMHJYHo2_TFskWVJfFtWxCq4ipH8_OpPfOKf51XkYFnzviwRqyVSXUaJQiFxruRBSX-8Rn5n0WVh8dRqzYBCwkMcBXcowb0ABhQsoGDDjEIq-Byl5CL_9FgUDsJ7cLsUoiTNVP56RmHOBYL7CCpSMYILZ3mDCQYhik4XKISogiguli9VvFREd4F4AArTNEikS9qdvOk4Uy7xGPVsmC0o8ExVTaQKrhFctqDNRQbJHxQKw288SlB07HWXchBePojLKAnxHugT4d-nojZFz6LEc1qrmpbO1NrFNDdcTqy_0bKa2JDz2ywlv3iUEJ64xMu3NR5tsu2t225VtXdRPYmkipK56hyvi3r51RmLEAWGecNat2qBqwdyE8ibZ_RsURk5W42qdXjZq77Oh3KhrdrMMng_jWzrvHoN7LJkk99GtRFv87TDerXKzb37a71Z8ySyZknsWO0iKDtn0aBSy6KzxywaTFeyeLjvLBpUq0vdfbMwslcNo_3KYbSaB8JqGAi7_S9xMIjU4tDbYxwMpitxONp3HAyq1bVibxYH61Xj4PzHf9OGxhOUKU8kNvrzdHN1DK-x3KrkmZjjqeDzok35OC644kWIUpVfWfkwSMpPueAqzLSwpYctLWzrYbsOs1XYWYPZbjDrvoju6WlHu2sD3NMfVl8_s76WPtTDh1r4SA8faeFjPXz8kqPWw6ajNtCG02L6bJlofbiYIV1MHy9myBd7dsvXcceAP7vmuxyagTadmgk3DV6fsjo9W7z7GwAA__9Kzz7E
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlN9v2j4Uxd-_f4V1n8pXZpAEaJunTFuqpaLQAdIqVVGVxXdV1tTObGeiQvzvkxM2fghM2F7aR8fn5J7P1ZEXoH7k4MM0HIYfZqSUObmajG_IfXh3O3wfjcjZx2g6m34etshKkj_VilwnX3MkXz6Fk5CEd0ZFzlaa_2uJ3JQo_ZBxjVJhqtVZ7X73iOLZpSthdWq1YqDABcNR8owK_HtwgIILFDyIKRRSpKiUkOZqUQkjNge_SyHjRanN55hCKiSCvwCd6RzBh5kZMMGEoex0gQJDnWR59fs6SlDI7DmRL0BhWiRc-aTdMUPHpfZJ4NDAg3hJQZR6PUTp5BHBd5a0eZCI_0SpkV2LjKPseNtZ6kUEZhEPGWc4B_rHEc4LubPFwKUk6LWAwrdMKk2-i4yTjJMiySSytjluIrg08GjQPwjingJiAFYLHWxDzF4K9MkwvJqRaXgTketxNAL6m22956EQT2VRhxbcJ4FZxni0j3FgGBWmgjM75EE27yDbGqnkQjKUyLZ44uUe-pFoi6JzuSPcP7q3NdppXlSnWVE7brvq0clVPRJlp6q911vVIyAbVT1_c1V1m_fFbdgXr_03bTkSZKct_dfbliMgG225eHNtOfJoT1AVgits9G51zcOH7BHrV1KJUqZ4K0VajamP48pXfWCodH3r1IeI11cm4KbZsZpdu9m1mr0ts7Nr9uyxu_bRPau7bzf3reaB3Tz4F-hzq_nCPvnCar60my9Pih0v__sVAAD__xtCcVg=

# Left joins are converted to paired joins by the optimizer.
query T
Expand Down Expand Up @@ -199,7 +199,7 @@ SELECT url FROM [EXPLAIN (DISTSQL)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable2@geom_index
WHERE ST_Intersects(ltable.geom1, rtable2.geom)) ORDER BY lk]
----
https://cockroachdb.github.io/distsqlplan/decode.html#eJzMlVFP2zwUhu-_X2GdK_rNXeskLZCrbCPTOnUta5HGhCoUGgsyQpzZzgRC_e-Tk440KbXT0QGXic_j8-bYj3IP4mcMLkz9of_hBGU8Rh8n4y_ozD89Hr4bjNDe0WB6Mv06bKFlSXxdVMQyuIgp-vbJn_jIP1VVaG9Z839RwvMSy7uk7OY8SkJ6uywX8jxKJOWCzqXYK3Z6q6oI_gPlj60WGk-O_Al6_x3F1zPAkLCQjoIbKsA9AwIYLMBgwwxDytmcCsG4WrrPCwfhLbhdDFGSZlK9nmGYM07BvQcZyZiCCyeq24QGIeWdLmAIqQyiON--yOWlPLoJ-B1gmKZBIlzU7qim40y6yCPYs2C2wMAyWTYRMrik4JIFbh5kkPyiXNLwM4sSyjt2Ncv6KAE_IP5tymsz9SyMPKdVyYk9G3u9jWmtbdKqlMup9R9PWo5tyNh1lqIfLEoQS1ykYjgq2eix0P1Ws-Ha28Q9ioSMkrnsHFbDeuoKjXlIOQ1Vw1q3coOLO3QViKs1erYoEzkbE5X7sKJXfZ83xUYbYxPLkPthZBvn1dt5uhFrs7RDepXyTe37lfakuZSkmZQdq507s7WWhig1LZ0X1tKQdkXL_degpSFueb-7z-Yl2amX9o69tJqLYTUUw27_jRaGIDUtei-shSHtihYHr0ELQ9zyepFn08LaqRbOP_xdPdJ4QkXKEkEb_Ym6KjoNL2nxqYJlfE6POZvnbYrHcc7lL0IqZLFKiodBUiypgKsw0cKWHra0sK2H7TpMVmGnApPtYNJ9Et3T0472qw1wT39Yff3M-lp6Xw_va-EDPXyghQ_18OFTjloPm47aQBtOi-jdMtF6uYjBLqLXixj8Imu3vIo7Bnztmm9zaAbadGom3DR4vWV1erb473cAAAD__7TDS8A=
https://cockroachdb.github.io/distsqlplan/decode.html#eJzUlFFP2z4Uxd__n8K6T_Q_d22StkCeso2gBZWWtZXGhCKUxXcoI9iZ7Uwg1O8-OemAdNRNtxd4dHxO7vldHfke1I8cfJiH4_DDgpQyJ8ez6Sm5CM_Pxu-iCdk7iuaL-adxh6wk-XWtyHXyNUfy-WM4C0l4blRkb6X5v5bISuIGVyhuLjPO8HYlV_oy4xqlwlSrvfpPb43Kob9N1bHTIdPZUTgj77-Q_DoGClwwnCQ3qMC_AAcouEDBg5hCIUWKSglpru4rYcRuwe9TyHhRavM5ppAKieDfg850juDDwkybYcJQ9vpAgaFOsrz6fZ0rKGR2k8g7oDAvEq580u2ZodNS-yRwaOBCvKQgSv04ROnkCsF3lrR9kIj_RKmRnYiMo-x5zSx_rhLogyW8LeTaTgOXkmDQAQrfMqk0-S4yTjJOiiSTyLrm2GCggUeDIQ1GG2HcXWAMxGqpoybI4q5An4zD4wWZh6cROZlGE6APfI_LHgtxXRZ1cMF9YgIOTObJc6j7BlVhKjizs27k8zbyPWIJyVAiaxIFzhuIl88sYSK6ougdNtSbpg8a0532nXXadbbndqtK7dzaLVHWWjt42a3dAvOktfuvsrVu-964LXvjdf-mNVuCrLVm-LJbswXmSWsOXmVrtrzlM1SF4ApbvWN98xAiu8L64VSilCmeSZFWY-rjtPJVHxgqXd869SHi9ZUJ-NTsWM2u3exazV7D7KybPXvsvn30wOoe2s1Dq3lkN4_-BXrfaj6wTz6wmg_t5sOdYsfL_34FAAD__7Tvflc=

query T
SELECT url FROM [EXPLAIN (DISTSQL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,39 +146,34 @@ project · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# This query performs a semi-join, which is converted to an inner join by the
# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
----
· distribution local · ·
· vectorized true · ·
project · · (lk) ·
│ estimated row count 10 (missing stats) · ·
└── distinct · · (lk, geom2) ·
│ estimated row count 1000 (missing stats) · ·
│ distinct on lk · ·
│ order key lk · ·
└── project · · (lk, geom2) +lk
└── project · · (lk, geom2, geom) +lk
│ estimated row count 9801 (missing stats) · ·
└── lookup join (inner) · · (lk, geom2, rk1, rk2, geom) +lk
│ table rtable@primary · ·
│ equality (rk1, rk2) = (rk1,rk2) · ·
│ equality cols are key · · ·
│ pred st_intersects(geom2, geom) · ·
└── project · · (lk, geom2, rk1, rk2) +lk
│ estimated row count 10000 (missing stats) · ·
└── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key) +lk
│ table rtable@geom_index · ·
│ inverted expr st_intersects(geom2, geom_inverted_key) · ·
└── scan · · (lk, geom2) +lk
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·
· distribution local · ·
· vectorized true · ·
project · · (lk) ·
│ estimated row count 10 (missing stats) · ·
└── project · · (lk, geom2) ·
│ estimated row count 10 (missing stats) · ·
└── lookup join (semi) · · (lk, geom2, rk1, rk2, cont) ·
│ table rtable@primary · ·
│ equality (rk1, rk2) = (rk1,rk2) · ·
│ equality cols are key · · ·
│ pred st_intersects(geom2, geom) · ·
└── project · · (lk, geom2, rk1, rk2, cont) ·
│ estimated row count 10000 (missing stats) · ·
└── inverted join (inner) · · (lk, geom2, rk1, rk2, geom_inverted_key, cont) ·
│ table rtable@geom_index · ·
│ inverted expr st_intersects(geom2, geom_inverted_key) · ·
└── scan · · (lk, geom2) ·
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

# Left joins are also converted to an inner join by the optimizer.
# Left outer joins are also converted to paired joins by the optimizer.
query TTTTT
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)
Expand Down
71 changes: 24 additions & 47 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -785,59 +785,36 @@ query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE EXISTS (SELECT * FROM geo_table@geom_index
WHERE ST_Intersects(geo_table2.geom, geo_table.geom))
----
project
semi-join (lookup geo_table)
├── columns: k:1 geom:2
├── key columns: [5] = [5]
├── lookup columns are key
├── immutable
├── stats: [rows=10]
├── cost: 112690.199
├── cost: 112684.05
├── key: (1)
├── fd: (1)-->(2)
├── prune: (1)
└── distinct-on
├── columns: geo_table2.k:1 geo_table2.geom:2
├── grouping columns: geo_table2.k:1
├── internal-ordering: +1
├── immutable
├── stats: [rows=999.947218, distinct(1)=999.947218, null(1)=0]
├── cost: 112690.089
├── key: (1)
├── fd: (1)-->(2)
├── inner-join (lookup geo_table)
│ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.geom:6
│ ├── key columns: [5] = [5]
│ ├── lookup columns are key
│ ├── immutable
│ ├── stats: [rows=9801, distinct(1)=999.947218, null(1)=0]
│ ├── cost: 112484.05
│ ├── fd: (1)-->(2)
│ ├── ordering: +1
│ ├── prune: (1)
│ ├── interesting orderings: (+1)
│ ├── inner-join (inverted-lookup geo_table@geom_index)
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5
│ │ ├── inverted-expr
│ │ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6)
│ │ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0]
│ │ ├── cost: 41784.03
│ │ ├── key: (1,5)
│ │ ├── fd: (1)-->(2)
│ │ ├── ordering: +1
│ │ ├── scan geo_table2
│ │ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ │ ├── cost: 1084.02
│ │ │ ├── key: (1)
│ │ │ ├── fd: (1)-->(2)
│ │ │ ├── ordering: +1
│ │ │ ├── prune: (1,2)
│ │ │ ├── interesting orderings: (+1)
│ │ │ └── unfiltered-cols: (1-4)
│ │ └── filters (true)
│ └── filters
│ └── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])]
└── aggregations
└── const-agg [as=geo_table2.geom:2, outer=(2)]
└── geo_table2.geom:2
├── inner-join (inverted-lookup geo_table@geom_index)
│ ├── columns: geo_table2.k:1 geo_table2.geom:2 geo_table.k:5 continuation:11
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:6)
│ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0]
│ ├── cost: 41984.03
│ ├── key: (1,5)
│ ├── fd: (1)-->(2), (5)-->(11)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ ├── cost: 1084.02
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
│ │ ├── interesting orderings: (+1)
│ │ └── unfiltered-cols: (1-4)
│ └── filters (true)
└── filters
└── st_intersects(geo_table2.geom:2, geo_table.geom:6) [outer=(2,6), immutable, constraints=(/2: (/NULL - ]; /6: (/NULL - ])]

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM geo_table2 WHERE NOT EXISTS (SELECT * FROM geo_table@geom_index
Expand Down
Loading