Skip to content

Commit

Permalink
sql: try to avoid distinct while decorrelating
Browse files Browse the repository at this point in the history
For sufficiently simple subqueries, we can avoid the extra join
introduced by branch.
  • Loading branch information
benesch committed Jul 25, 2020
1 parent 7b8e95a commit fb64d76
Show file tree
Hide file tree
Showing 6 changed files with 300 additions and 483 deletions.
60 changes: 60 additions & 0 deletions src/sql/src/plan/decorrelate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
use std::collections::{BTreeSet, HashMap};

use anyhow::bail;
use itertools::Itertools;

use ore::collections::CollectionExt;
use repr::RelationType;
Expand Down Expand Up @@ -80,6 +81,32 @@ impl ColumnMap {
fn len(&self) -> usize {
self.inner.len()
}

/// Updates references in the `ColumnMap` for use in a nested scope. The
/// provided `arity` must specify the arity of the current scope.
fn enter_scope(&self, arity: usize) -> ColumnMap {
// From the perspective of the nested scope, all existing column
// references will be one level greater.
let existing = self
.inner
.clone()
.into_iter()
.update(|(col, _i)| col.level += 1);

// All columns in the current scope become explicit entries in the
// immediate parent scope.
let new = (0..arity).map(|i| {
(
ColumnRef {
level: 1,
column: i,
},
self.len() + i,
)
});

ColumnMap::new(existing.chain(new).collect())
}
}

impl RelationExpr {
Expand Down Expand Up @@ -427,6 +454,32 @@ impl RelationExpr {
}
}
}

/// Computes whether this RelationExpr is safe to apply directly in a a
/// branch.
fn is_branch_safe(&self) -> bool {
let mut is_safe = true;
// Exhaustive match to prevent accidents when expression variants
// change.
self.visit(&mut |expr| match expr {
RelationExpr::Distinct { .. }
| RelationExpr::Reduce { .. }
| RelationExpr::Threshold { .. }
| RelationExpr::Negate { .. }
| RelationExpr::TopK { .. } => {
is_safe = false;
}
RelationExpr::Constant { .. }
| RelationExpr::Get { .. }
| RelationExpr::Project { .. }
| RelationExpr::Map { .. }
| RelationExpr::Filter { .. }
| RelationExpr::Union { .. }
| RelationExpr::Join { .. }
| RelationExpr::CallTable { .. } => (),
});
is_safe
}
}

impl ScalarExpr {
Expand Down Expand Up @@ -603,6 +656,13 @@ where
// at the least for purposes of understanding. It was difficult for one reader
// to understand the required properties of `outer` and `col_map`.

if inner.is_branch_safe() {
let new_col_map = col_map.enter_scope(outer.arity() - col_map.len());
return outer.let_in(id_gen, |id_gen, get_outer| {
apply(id_gen, inner, get_outer, &new_col_map)
});
}

// The key consists of the columns from the outer expression upon which the
// inner relation depends. We discover these dependencies by walking the
// inner relation expression and looking for column references whose level
Expand Down
179 changes: 69 additions & 110 deletions test/sqllogictest/chbench.slt
Original file line number Diff line number Diff line change
Expand Up @@ -392,31 +392,18 @@ ORDER BY o_ol_cnt
----
%0 =
| Get materialize.public.order (u16)
| Filter (datetots(#4) < 2012-01-02 00:00:00), (datetots(#4) >= 2007-01-02 00:00:00)

%1 =
| Get %0

%2 =
| Get %0
| ArrangeBy (#0, #1, #2)

%3 =
%1 =
| Get materialize.public.orderline (u19)
| ArrangeBy (#2, #1, #0)

%4 =
| Join %2 %3 (= #0 #8) (= #1 #9) (= #2 #10)
| | implementation = DeltaQuery %2 %3.(#2, #1, #0) | %3 %2.(#0, #1, #2)
| | demand = (#0..#2, #4, #14)
| Filter (#14 >= #4)
| Distinct group=(#0, #1, #2, #4)
| ArrangeBy (#0, #1, #2, #3)

%5 =
| Join %1 %4 (= #0 #8) (= #1 #9) (= #2 #10) (= #4 #11)
| | implementation = Differential %1 %4.(#0, #1, #2, #3)
| | demand = (#6)
%2 =
| Join %0 %1 (= #2 #10) (= #1 #9) (= #0 #8)
| | implementation = DeltaQuery %0 %1.(#2, #1, #0) | %1 %0.(#0, #1, #2)
| | demand = (#0..#7, #14)
| Filter (datetots(#4) < 2012-01-02 00:00:00), (#14 >= #4), (datetots(#4) >= 2007-01-02 00:00:00)
| Distinct group=(#0, #1, #2, #3, #4, #5, #6, #7)
| Reduce group=(#6) countall(true)

Finish order_by=(#0 asc) limit=none offset=0 project=(#0, #1)
Expand Down Expand Up @@ -1088,46 +1075,38 @@ ORDER BY supplier_cnt DESC
%2 =
| Join %0 %1 (= #0 #18)
| | implementation = DeltaQuery %0 %1.(#0) | %1 %0.(#0)
| | demand = (#17, #20..#22)
| | demand = (#0..#17, #19..#22)
| Filter !("^zz.*$" ~(#22))

%3 =
| Get %2
| Distinct group=(#17)

%4 =
| Get %2
| ArrangeBy (#17)
| Get materialize.public.supplier (u34)
| ArrangeBy (#0)

%5 =
| Get %3
| ArrangeBy (#0)
| Join %3 %4 (= #17 #23)
| | implementation = Differential %3 %4.(#0)
| | demand = (#0..#17, #19..#22, #29)
| Filter "^.*bad.*$" ~(#29)
| Negate
| Project (#0..#17, #0, #19..#22)

%6 =
| Get materialize.public.supplier (u34)
| ArrangeBy (#0)
| Get %2
| Project (#0..#17, #0, #19..#22)

%7 =
| Join %5 %6 (= #0 #1)
| | implementation = DeltaQuery %5 %6.(#0) | %6 %5.(#0)
| | demand = (#0, #7)
| Filter "^.*bad.*$" ~(#7)
| Negate
| Project (#0)
| Union %5 %6

%8 =
| Get %3
| Get %2
| ArrangeBy (#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #19, #20, #21, #22)

%9 =
| Union %7 %8

%10 =
| Get %3
| ArrangeBy (#0)

%11 =
| Join %4 %9 %10 (= #17 #23 #24)
| | implementation = Differential %9 %10.(#0) %4.(#17)
| Join %7 %8 (= #0 #18 #23) (= #1 #24) (= #2 #25) (= #3 #26) (= #4 #27) (= #5 #28) (= #6 #29) (= #7 #30) (= #8 #31) (= #9 #32) (= #10 #33) (= #11 #34) (= #12 #35) (= #13 #36) (= #14 #37) (= #15 #38) (= #16 #39) (= #17 #40) (= #19 #42) (= #20 #43) (= #21 #44) (= #22 #45)
| | implementation = Differential %7 %8.(#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #19, #20, #21, #22)
| | demand = (#17, #20..#22)
| Reduce group=(#20, substr(#22, 1, 3), #21) count(distinct #17)

Expand Down Expand Up @@ -1338,51 +1317,38 @@ ORDER BY su_name

%3 =
| Get %2
| ArrangeBy ()

%4 =
| Get materialize.public.stock (u26)
| Get %2
| ArrangeBy ()

%5 =
| Get materialize.public.orderline (u19)
| ArrangeBy (#4)
| Get materialize.public.stock (u26)

%6 =
| Join %3 %4 %5 (= #11 #33)
| | implementation = Differential %4 %5.(#4) %3.()
| | demand = (#0, #11..#13, #35, #36)
| Filter (datetots(#35) > 2010-05-23 12:00:00)
| Get materialize.public.orderline (u19)
| ArrangeBy (#4)

%7 =
| Get %2

%8 =
| Get %6

%9 =
| Get %6
| Distinct group=(#11)
| ArrangeBy (#0)

%10 =
| Get materialize.public.item (u24)
| ArrangeBy (#0)

%11 =
| Join %8 %9 %10 (= #11 #39 #40)
| | implementation = Differential %8 %9.(#0) %10.(#0)
| | demand = (#0, #11..#13, #36, #44)
| Filter "^co.*$" ~(#44)
| Reduce group=(#0, #11, #12, #13) sum(#36)
%8 =
| Join %4 %5 %6 %7 (= #11 #33 #39)
| | implementation = Differential %5 %7.(#0) %6.(#4) %4.()
| | demand = (#0, #11..#32, #34..#38, #43)
| Filter "^co.*$" ~(#43), (datetots(#35) > 2010-05-23 12:00:00)
| Distinct group=(#0, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #25, #26, #27, #28, #29, #30, #31, #32, #11, #34, #35, #36, #37, #38)
| Reduce group=(#0, #1, #2, #3) sum(#26)
| Filter ((2 * #3) > #4)
| Map ((#1 * #2) % 10000)
| Filter (#0 = #5)
| Distinct group=(#0)
| ArrangeBy (#0)

%12 =
| Join %7 %11 (= #0 #11)
| | implementation = Differential %7 %11.(#0)
%9 =
| Join %3 %8 (= #0 #11)
| | implementation = Differential %3 %8.(#0)
| | demand = (#1, #2)
| Project (#1, #2)

Expand Down Expand Up @@ -1439,45 +1405,38 @@ ORDER BY numwait DESC, su_name
%5 =
| Join %0 %1 %2 %3 %4 (= #0 #42) (= #3 #43) (= #7 #17) (= #8 #18) (= #9 #19 #26) (= #11 #25)
| | implementation = Differential %1 %2.(#0, #1, #2) %3.(#0, #1) %0.(#0) %4.(#0)
| | demand = (#1, #7..#9, #13, #21, #44)
| | demand = (#0..#16, #20..#24, #27..#41, #44..#46)
| Filter (#44 = "GERMANY"), (#13 > #21)

%6 =
| Get %5
| Distinct group=(#7, #8, #9, #13)

%7 =
| Get %5
| ArrangeBy (#7, #8, #9, #13)
| Get materialize.public.orderline (u19)
| ArrangeBy (#2, #1, #0)

%8 =
| Get %6
| Join %6 %7 (= #7 #47) (= #8 #48) (= #9 #49)
| | implementation = Differential %6 %7.(#2, #1, #0)
| | demand = (#0..#16, #20..#24, #27..#41, #45, #46, #53)
| Filter (#53 > #13)
| Distinct group=(#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #7, #8, #9, #20, #21, #22, #23, #24, #11, #9, #27, #28, #29, #30, #31, #32, #33, #34, #35, #36, #37, #38, #39, #40, #41, #0, #3, "GERMANY", #45, #46)
| Negate

%9 =
| Get materialize.public.orderline (u19)
| ArrangeBy (#2, #1, #0)
| Get %5
| Distinct group=(#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #7, #8, #9, #20, #21, #22, #23, #24, #11, #9, #27, #28, #29, #30, #31, #32, #33, #34, #35, #36, #37, #38, #39, #40, #41, #0, #3, "GERMANY", #45, #46)

%10 =
| Join %8 %9 (= #0 #4) (= #1 #5) (= #2 #6)
| | implementation = Differential %8 %9.(#2, #1, #0)
| | demand = (#0..#3, #10)
| Filter (#10 > #3)
| Distinct group=(#0, #1, #2, #3)
| Negate
| Union %8 %9
| ArrangeBy (#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #25, #26, #27, #28, #29, #30, #31, #32, #33, #34, #35, #36, #37, #38, #39, #40, #41, #42, #43, #44, #45, #46)

%11 =
| Get %6
| Get %5

%12 =
| Union %10 %11

%13 =
| Get %6
| ArrangeBy (#0, #1, #2, #3)

%14 =
| Join %7 %12 %13 (= #7 #47 #51) (= #8 #48 #52) (= #9 #49 #53) (= #13 #50 #54)
| | implementation = Differential %12 %13.(#0, #1, #2, #3) %7.(#7, #8, #9, #13)
| Join %10 %11 (= #0 #42 #47) (= #1 #48) (= #2 #49) (= #3 #43 #50) (= #4 #51) (= #5 #52) (= #6 #53) (= #7 #17 #54) (= #8 #18 #55) (= #9 #19 #26 #56) (= #10 #57) (= #11 #25 #58) (= #12 #59) (= #13 #60) (= #14 #61) (= #15 #62) (= #16 #63) (= #20 #67) (= #21 #68) (= #22 #69) (= #23 #70) (= #24 #71) (= #27 #74) (= #28 #75) (= #29 #76) (= #30 #77) (= #31 #78) (= #32 #79) (= #33 #80) (= #34 #81) (= #35 #82) (= #36 #83) (= #37 #84) (= #38 #85) (= #39 #86) (= #40 #87) (= #41 #88) (= #44 #91) (= #45 #92) (= #46 #93)
| | implementation = Differential %11 %10.(#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21, #22, #23, #24, #25, #26, #27, #28, #29, #30, #31, #32, #33, #34, #35, #36, #37, #38, #39, #40, #41, #42, #43, #44, #45, #46)
| | demand = (#1)
| Reduce group=(#1) countall(true)

Expand Down Expand Up @@ -1522,38 +1481,38 @@ ORDER BY substr(c_state, 1, 1)
%2 =
| Join %0 %1
| | implementation = Differential %0 %1.()
| | demand = (#0..#2, #9, #16, #24)
| | demand = (#0..#21, #24)
| Filter ((#16 * 1000000dec) > #24)

%3 =
| Get %2
| ArrangeBy (#0, #1, #2)

%4 =
| Get %2
| ArrangeBy (#0, #1, #2)

%5 =
| Get materialize.public.order (u16)
| ArrangeBy (#2, #1, #3)

%6 =
| Join %4 %5 (= #0 #28) (= #1 #26) (= #2 #27)
| | implementation = DeltaQuery %4 %5.(#2, #1, #3) | %5 %4.(#0, #1, #2)
| | demand = (#0..#2)
| Distinct group=(#0, #1, #2)
%5 =
| Join %3 %4 (= #0 #28) (= #1 #26) (= #2 #27)
| | implementation = DeltaQuery %3 %4.(#2, #1, #3) | %4 %3.(#0, #1, #2)
| | demand = (#0..#21)
| Distinct group=(#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21)
| Negate

%7 =
%6 =
| Get %2
| Project (#0..#2)
| Project (#0..#21)

%7 =
| Union %5 %6

%8 =
| Union %6 %7
| Get %2
| ArrangeBy (#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21)

%9 =
| Join %3 %8 (= #0 #25) (= #1 #26) (= #2 #27)
| | implementation = Differential %8 %3.(#0, #1, #2)
| Join %7 %8 (= #0 #22) (= #1 #23) (= #2 #24) (= #3 #25) (= #4 #26) (= #5 #27) (= #6 #28) (= #7 #29) (= #8 #30) (= #9 #31) (= #10 #32) (= #11 #33) (= #12 #34) (= #13 #35) (= #14 #36) (= #15 #37) (= #16 #38) (= #17 #39) (= #18 #40) (= #19 #41) (= #20 #42) (= #21 #43)
| | implementation = Differential %7 %8.(#0, #1, #2, #3, #4, #5, #6, #7, #8, #9, #10, #11, #12, #13, #14, #15, #16, #17, #18, #19, #20, #21)
| | demand = (#9, #16)
| Reduce group=(substr(#9, 1, 1)) countall(true) sum(#16)

Expand Down
Loading

0 comments on commit fb64d76

Please sign in to comment.