Skip to content

Commit

Permalink
sql: support range based lookup join spans
Browse files Browse the repository at this point in the history
Informs #51576

If filters exist on a lookup join that match columns that we are doing
the lookup against add them to the lookupExpr in the join reader spec
and build those filters into the multispan generator.

Remove map based key to input row index and replace it with a binary
search against a new span+inputRowIndices slice.

Release note (sql change): support range based lookup joins when join
conditions align with index/table being looked up into.   This can
greatly reduce the amount of rows we need to process, essentially a form
of predicate push down.

51576 also encompasses allowing inequalities on columns from the index
to reference columns from the input, that will come in a later commit.
  • Loading branch information
cucaroach committed Jun 25, 2021
1 parent da760c3 commit edca4c4
Show file tree
Hide file tree
Showing 15 changed files with 822 additions and 166 deletions.
232 changes: 232 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/lookup_join_spans
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
statement ok
CREATE TABLE metrics (
id SERIAL PRIMARY KEY,
name STRING,
INDEX name_index (name)
)

statement ok
insert into metrics (id,name) values (1,'cpu'), (2,'cpu'), (3,'mem'), (4,'disk')

statement ok
CREATE TABLE metric_values (
metric_id INT8,
time TIMESTAMPTZ,
value INT8,
PRIMARY KEY (metric_id, time)
)

statement ok
insert into metric_values (metric_id, time, value) values
(1,'2020-01-01 00:00:00+00:00',0),
(1,'2020-01-01 00:00:01+00:00',1),
(2,'2020-01-01 00:00:00+00:00',3),
(2,'2020-01-01 00:00:01+00:00',4)

statement ok
CREATE TABLE metric_valuesd (
metric_id INT8,
time TIMESTAMPTZ,
value INT8,
PRIMARY KEY (metric_id, time DESC)
)

statement ok
insert into metric_valuesd (metric_id, time, value) values
(1,'2020-01-01 00:00:00+00:00',0),
(1,'2020-01-01 00:00:01+00:00',1),
(2,'2020-01-01 00:00:00+00:00',3),
(2,'2020-01-01 00:00:01+00:00',4)

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time > '2020-01-01 00:00:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time > '2020-01-01 00:00:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time >= '2020-01-01 00:00:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time >= '2020-01-01 00:00:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time < '2020-01-01 00:00:00+00:00' AND
name='cpu'
----

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time < '2020-01-01 00:00:00+00:00' AND
name='cpu'
----

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time <= '2020-01-01 00:00:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time <= '2020-01-01 00:00:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time in ('2020-01-01 00:00:00+00:00','2020-01-01 00:00:01+00:00') AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time in ('2020-01-01 00:00:00+00:00','2020-01-01 00:00:01+00:00') AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time < '2020-01-01 00:00:10+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time < '2020-01-01 00:00:10+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_values
INNER JOIN metrics
ON metric_id=id
WHERE
time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

query ITIIT
SELECT *
FROM metric_valuesd
INNER JOIN metrics
ON metric_id=id
WHERE
time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND
name='cpu'
ORDER BY value
----
1 2020-01-01 00:00:00 +0000 UTC 0 1 cpu
1 2020-01-01 00:00:01 +0000 UTC 1 1 cpu
2 2020-01-01 00:00:00 +0000 UTC 3 2 cpu
2 2020-01-01 00:00:01 +0000 UTC 4 2 cpu

3 changes: 1 addition & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ vectorized: true
│ columns: (a, b, c, d, e, f)
│ estimated row count: 33
│ table: def@primary
│ equality: (b) = (f)
│ pred: e > 1
│ lookup condition: (f = b) AND (e > 1)
└── • scan
columns: (a, b, c)
Expand Down
20 changes: 12 additions & 8 deletions pkg/sql/opt/memo/testdata/logprops/lookup-join
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,21 @@ inner-join (lookup abcd)
├── prune: (2,8)
├── interesting orderings: (+6,+7)
├── inner-join (lookup abcd@secondary)
│ ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int!null) abcd.rowid:9(int!null)
│ ├── key columns: [1] = [6]
│ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1)
│ ├── columns: m:1(int!null) n:2(int) a:5(int!null) b:6(int!null) abcd.rowid:8(int!null)
│ ├── lookup expression
│ │ └── filters
│ │ ├── eq [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)]
│ │ │ ├── variable: a:5 [type=int]
│ │ │ └── variable: m:1 [type=int]
│ │ └── gt [type=bool, outer=(6), constraints=(/6: [/3 - ]; tight)]
│ │ ├── variable: b:6 [type=int]
│ │ └── const: 2 [type=int]
│ ├── fd: (8)-->(5,6), (1)==(5), (5)==(1)
│ ├── scan small
│ │ ├── columns: m:1(int) n:2(int)
│ │ ├── prune: (1,2)
│ │ └── unfiltered-cols: (1-5)
│ └── filters
│ └── gt [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)]
│ ├── variable: b:7 [type=int]
│ └── const: 2 [type=int]
│ │ └── unfiltered-cols: (1-4)
│ └── filters (true)
└── filters (true)

# Filter that can only be applied after the primary index join.
Expand Down
14 changes: 8 additions & 6 deletions pkg/sql/opt/memo/testdata/stats/lookup-join
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,17 @@ inner-join (lookup abcd)
├── stats: [rows=33.5823697, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0]
├── fd: (1)==(6), (6)==(1)
├── inner-join (lookup abcd@secondary)
│ ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int!null) abcd.rowid:9(int!null)
│ ├── key columns: [1] = [6]
│ ├── stats: [rows=33, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0, distinct(7)=33, null(7)=0]
│ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1)
│ ├── columns: m:1(int!null) n:2(int) a:5(int!null) b:6(int!null) abcd.rowid:8(int!null)
│ ├── lookup expression
│ │ └── filters
│ │ ├── a:5 = m:1 [type=bool, outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)]
│ │ └── b:6 > 2 [type=bool, outer=(6), constraints=(/6: [/3 - ]; tight)]
│ ├── stats: [rows=33, distinct(1)=10, null(1)=0, distinct(5)=10, null(5)=0, distinct(6)=33, null(6)=0]
│ ├── fd: (8)-->(5,6), (1)==(5), (5)==(1)
│ ├── scan small
│ │ ├── columns: m:1(int) n:2(int)
│ │ └── stats: [rows=10, distinct(1)=10, null(1)=0]
│ └── filters
│ └── b:7 > 2 [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)]
│ └── filters (true)
└── filters (true)

# Filter that can only be applied after the primary index join.
Expand Down
Loading

0 comments on commit edca4c4

Please sign in to comment.