From df34c56a2eda750413f57b3fc86b37a402025f67 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Fri, 15 May 2020 13:12:46 -0500 Subject: [PATCH] opt: add support for calculating selectivity from multi-column stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds support for calculating selectivity from multi-column statistics. It changes selectivityFromDistinctCounts to have the following semantics: selectivityFromDistinctCounts calculates the selectivity of a filter by using estimated distinct counts of each constrained column before and after the filter was applied. We can perform this calculation in two different ways: (1) by treating the columns as completely independent, or (2) by assuming they are correlated. (1) Assuming independence between columns, we can calculate the selectivity by taking the product of selectivities of each constrained column. In the general case, this can be represented by the formula: ``` ┬-┬ ⎛ new_distinct(i) ⎞ selectivity = │ │ ⎜ --------------- ⎟ ┴ ┴ ⎝ old_distinct(i) ⎠ i in {constrained columns} ``` (2) If useMultiCol is true, we assume there is some correlation between columns. In this case, we calculate the selectivity using multi-column statistics. ``` ⎛ new_distinct({constrained columns}) ⎞ selectivity = ⎜ ----------------------------------- ⎟ ⎝ old_distinct({constrained columns}) ⎠ ``` This formula looks simple, but the challenge is that it is difficult to determine the correct value for new_distinct({constrained columns}) if each column is not constrained to a single value. For example, if new_distinct(x)=2 and new_distinct(y)=2, new_distinct({x,y}) could be 2, 3 or 4. We estimate the new distinct count as follows, using the concept of "soft functional dependency (FD) strength": ``` new_distinct({x,y}) = min_value + range * (1 - FD_strength_scaled) where min_value = max(new_distinct(x), new_distinct(y)) max_value = new_distinct(x) * new_distinct(y) range = max_value - min_value ⎛ max(old_distinct(x),old_distinct(y)) ⎞ FD_strength = ⎜ ------------------------------------ ⎟ ⎝ old_distinct({x,y}) ⎠ ⎛ max(old_distinct(x), old_distinct(y)) ⎞ min_FD_strength = ⎜ ------------------------------------- ⎟ ⎝ old_distinct(x) * old_distinct(y) ⎠ ⎛ FD_strength - min_FD_strength ⎞ FD_strength_scaled = ⎜ ----------------------------- ⎟ ⎝ 1 - min_FD_strength ⎠ ``` Suppose that old_distinct(x)=100 and old_distinct(y)=10. If x and y are perfectly correlated, old_distinct({x,y})=100. Using the example from above, new_distinct(x)=2 and new_distinct(y)=2. Plugging in the values into the equation, we get: ``` FD_strength_scaled = 1 new_distinct({x,y}) = 2 + (4 - 2) * (1 - 1) = 2 ``` If x and y are completely independent, however, old_distinct({x,y})=1000. In this case, we get: ``` FD_strength_scaled = 0 new_distinct({x,y}) = 2 + (4 - 2) * (1 - 0) = 4 ``` Note that even if useMultiCol is true and we calculate the selectivity based on equation (2) above, we still want to take equation (1) into account. This is because it is possible that there are two predicates that each have selectivity s, but the multi-column selectivity is also s. In order to ensure that the cost model considers the two predicates combined to be more selective than either one individually, we must give some weight to equation (1). Therefore, instead of equation (2) we actually return the following selectivity: ``` selectivity = (1 - w) * (eq. 1) + w * (eq. 2) ``` where w currently set to 0.9. This selectivity will be used later to update the row count and the distinct count for the unconstrained columns. Fixes #34422 Release note (performance improvement): Added support for calculating the selectivity of filter predicates in the optimizer using multi-column statistics. This improves the cardinality estimates of the optimizer when a query has filter predicates constraining multiple columns. As a result, the optimizer may choose a better query plan in some cases. --- pkg/sql/opt/exec/execbuilder/testdata/upsert | 79 +- pkg/sql/opt/memo/statistics_builder.go | 303 ++++- pkg/sql/opt/memo/statistics_builder_test.go | 46 +- pkg/sql/opt/memo/testdata/logprops/scan | 3 +- pkg/sql/opt/memo/testdata/stats/join | 72 ++ pkg/sql/opt/memo/testdata/stats/scan | 1026 ++++++++++++++++- pkg/sql/opt/memo/testdata/stats/select | 108 +- pkg/sql/opt/memo/testdata/stats/values | 4 +- pkg/sql/opt/memo/testdata/stats_quality/tpcc | 58 +- .../opt/memo/testdata/stats_quality/tpch/q06 | 2 +- .../opt/memo/testdata/stats_quality/tpch/q12 | 2 +- .../opt/memo/testdata/stats_quality/tpch/q16 | 4 +- .../opt/memo/testdata/stats_quality/tpch/q17 | 4 +- .../opt/memo/testdata/stats_quality/tpch/q19 | 10 +- pkg/sql/opt/norm/testdata/rules/combo | 12 +- pkg/sql/opt/xform/testdata/coster/join | 12 +- pkg/sql/opt/xform/testdata/coster/scan | 20 +- pkg/sql/opt/xform/testdata/external/customer | 2 +- .../opt/xform/testdata/external/tpcc-no-stats | 24 +- pkg/sql/opt/xform/testdata/external/trading | 239 ++-- .../xform/testdata/external/trading-mutation | 233 ++-- pkg/sql/opt/xform/testdata/rules/join | 20 +- pkg/sql/opt/xform/testdata/rules/select | 2 +- 23 files changed, 1800 insertions(+), 485 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/upsert b/pkg/sql/opt/exec/execbuilder/testdata/upsert index 3f4c24c56b59..0e7f1a73a650 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/upsert +++ b/pkg/sql/opt/exec/execbuilder/testdata/upsert @@ -528,42 +528,43 @@ EXPLAIN (VERBOSE) INSERT INTO target SELECT x, y, z FROM source WHERE (y IS NULL OR y > 0) AND x <> 1 ON CONFLICT (b, c) DO UPDATE SET b=5 ---- -· distributed false · · -· vectorized false · · -count · · () · - └── upsert · · () · - │ into target(a, b, c) · · - │ strategy opt upserter · · - │ auto commit · · · - └── render · · (x, y, z, a, b, c, upsert_b, a) · - │ render 0 x · · - │ render 1 y · · - │ render 2 z · · - │ render 3 a · · - │ render 4 b · · - │ render 5 c · · - │ render 6 upsert_b · · - │ render 7 a · · - └── render · · (upsert_b, x, y, z, a, b, c) · - │ render 0 CASE WHEN a IS NULL THEN y ELSE 5 END · · - │ render 1 x · · - │ render 2 y · · - │ render 3 z · · - │ render 4 a · · - │ render 5 b · · - │ render 6 c · · - └── lookup-join · · (x, y, z, a, b, c) · - │ table target@target_b_c_key · · - │ type left outer · · - │ equality (y, z) = (b, c) · · - │ equality cols are key · · · - │ parallel · · · - └── distinct · · (x, y, z) · - │ distinct on y, z · · - │ nulls are distinct · · · - │ error on duplicate · · · - │ order key y, z · · - └── scan · · (x, y, z) +y,+z -· table source@source_y_z_idx · · -· spans /NULL-/!NULL /1- · · -· filter x != 1 · · +· distributed false · · +· vectorized false · · +count · · () · + └── upsert · · () · + │ into target(a, b, c) · · + │ strategy opt upserter · · + │ auto commit · · · + └── render · · (x, y, z, a, b, c, upsert_b, a) · + │ render 0 x · · + │ render 1 y · · + │ render 2 z · · + │ render 3 a · · + │ render 4 b · · + │ render 5 c · · + │ render 6 upsert_b · · + │ render 7 a · · + └── render · · (upsert_b, x, y, z, a, b, c) · + │ render 0 CASE WHEN a IS NULL THEN y ELSE 5 END · · + │ render 1 x · · + │ render 2 y · · + │ render 3 z · · + │ render 4 a · · + │ render 5 b · · + │ render 6 c · · + └── merge-join · · (a, b, c, x, y, z) · + │ type right outer · · + │ equality (b, c) = (y, z) · · + │ mergeJoinOrder +"(b=y)",+"(c=z)" · · + ├── scan · · (a, b, c) +b,+c + │ table target@target_b_c_key · · + │ spans FULL SCAN · · + └── distinct · · (x, y, z) +y,+z + │ distinct on y, z · · + │ nulls are distinct · · · + │ error on duplicate · · · + │ order key y, z · · + └── scan · · (x, y, z) +y,+z +· table source@source_y_z_idx · · +· spans /NULL-/!NULL /1- · · +· filter x != 1 · · diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 258ebd4c77cf..af44d150e923 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -609,9 +609,12 @@ func (sb *statisticsBuilder) buildScan(scan *ScanExpr, relProps *props.Relationa // Calculate row count and selectivity // ----------------------------------- s.ApplySelectivity(sb.selectivityFromHistograms(histCols, scan, s)) - s.ApplySelectivity(sb.selectivityFromDistinctCounts(constrainedCols.Difference(histCols), scan, s)) + s.ApplySelectivity(sb.selectivityFromMultiColDistinctCounts(constrainedCols, scan, s)) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(numUnappliedConjuncts)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(scan, relProps, constrainedCols)) + + // Adjust the selectivity so we don't double-count the histogram columns. + s.ApplySelectivity(1.0 / sb.selectivityFromSingleColDistinctCounts(histCols, scan, s)) } sb.finalizeFromCardinality(relProps) @@ -680,13 +683,16 @@ func (sb *statisticsBuilder) buildSelect(sel *SelectExpr, relProps *props.Relati inputStats := &sel.Input.Relational().Stats s.RowCount = inputStats.RowCount s.ApplySelectivity(sb.selectivityFromHistograms(histCols, sel, s)) - s.ApplySelectivity(sb.selectivityFromDistinctCounts(constrainedCols.Difference(histCols), sel, s)) + s.ApplySelectivity(sb.selectivityFromMultiColDistinctCounts(constrainedCols, sel, s)) s.ApplySelectivity(sb.selectivityFromEquivalencies(equivReps, &relProps.FuncDeps, sel, s)) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(numUnappliedConjuncts)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(sel, relProps, constrainedCols)) + // Adjust the selectivity so we don't double-count the histogram columns. + s.ApplySelectivity(1.0 / sb.selectivityFromSingleColDistinctCounts(histCols, sel, s)) + // Update distinct counts based on equivalencies; this should happen after - // selectivityFromDistinctCounts and selectivityFromEquivalencies. + // selectivityFromMultiColDistinctCounts and selectivityFromEquivalencies. sb.applyEquivalencies(equivReps, &relProps.FuncDeps, sel, relProps) sb.finalizeFromCardinality(relProps) @@ -937,12 +943,20 @@ func (sb *statisticsBuilder) buildJoin( s.ApplySelectivity(sb.selectivityFromGeoRelationship(join, s)) } s.ApplySelectivity(sb.selectivityFromHistograms(histCols, join, s)) - s.ApplySelectivity(sb.selectivityFromDistinctCounts(constrainedCols.Difference(histCols), join, s)) + s.ApplySelectivity(sb.selectivityFromMultiColDistinctCounts( + constrainedCols.Intersection(leftCols), join, s, + )) + s.ApplySelectivity(sb.selectivityFromMultiColDistinctCounts( + constrainedCols.Intersection(rightCols), join, s, + )) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(numUnappliedConjuncts)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(join, relProps, constrainedCols)) + // Adjust the selectivity so we don't double-count the histogram columns. + s.ApplySelectivity(1.0 / sb.selectivityFromSingleColDistinctCounts(histCols, join, s)) + // Update distinct counts based on equivalencies; this should happen after - // selectivityFromDistinctCounts and selectivityFromEquivalencies. + // selectivityFromMultiColDistinctCounts and selectivityFromEquivalencies. sb.applyEquivalencies(equivReps, &h.filtersFD, join, relProps) switch h.joinType { @@ -1441,13 +1455,13 @@ func (sb *statisticsBuilder) buildZigzagJoin( // Calculate selectivity and row count // ----------------------------------- - s.ApplySelectivity(sb.selectivityFromDistinctCounts(constrainedCols, zigzag, s)) + s.ApplySelectivity(sb.selectivityFromMultiColDistinctCounts(constrainedCols, zigzag, s)) s.ApplySelectivity(sb.selectivityFromEquivalencies(equivReps, &relProps.FuncDeps, zigzag, s)) s.ApplySelectivity(sb.selectivityFromUnappliedConjuncts(numUnappliedConjuncts)) s.ApplySelectivity(sb.selectivityFromNullsRemoved(zigzag, relProps, constrainedCols)) // Update distinct counts based on equivalencies; this should happen after - // selectivityFromDistinctCounts and selectivityFromEquivalencies. + // selectivityFromMultiColDistinctCounts and selectivityFromEquivalencies. sb.applyEquivalencies(equivReps, &relProps.FuncDeps, zigzag, relProps) sb.finalizeFromCardinality(relProps) @@ -2549,6 +2563,12 @@ const ( // until we can get better statistics on inverted indexes and geospatial // columns. unknownGeoRelationshipSelectivity = 1.0 / 100.0 + + // multiColWeight is the weight to assign the selectivity calculation using + // multi-column statistics versus the calculation using single-column + // statistics. See the comment above selectivityFromMultiColDistinctCounts for + // details. + multiColWeight = 9.0 / 10.0 ) // countJSONPaths returns the number of JSON paths in the specified @@ -2636,8 +2656,8 @@ func (sb *statisticsBuilder) applyFilter( // Update constrainedCols after the above check for isEqualityWithTwoVars. // We will use constrainedCols later to determine which columns to use for - // selectivity calculation in selectivityFromDistinctCounts, and we want to - // make sure that we don't include columns that were only present in + // selectivity calculation in selectivityFromMultiColDistinctCounts, and we + // want to make sure that we don't include columns that were only present in // equality conjuncts such as var1=var2. The selectivity of these conjuncts // will be accounted for in selectivityFromEquivalencies. scalarProps := conjunct.ScalarProps() @@ -3054,58 +3074,263 @@ func (sb *statisticsBuilder) updateDistinctNullCountsFromEquivalency( }) } -// selectivityFromDistinctCounts calculates the selectivity of a filter by -// taking the product of selectivities of each constrained column. In the -// general case, this can be represented by the formula: +// selectivityFromMultiColDistinctCounts calculates the selectivity of a filter +// by using estimated distinct counts of each constrained column before +// and after the filter was applied. We can perform this calculation in +// two different ways: (1) by treating the columns as completely independent, +// or (2) by assuming they are correlated. +// +// (1) Assuming independence between columns, we can calculate the selectivity +// by taking the product of selectivities of each constrained column. In +// the general case, this can be represented by the formula: +// +// ┬-┬ ⎛ new_distinct(i) ⎞ +// selectivity = │ │ ⎜ --------------- ⎟ +// ┴ ┴ ⎝ old_distinct(i) ⎠ +// i in +// {constrained +// columns} +// +// (2) If we instead assume there is some correlation between columns, we +// calculate the selectivity using multi-column statistics. +// +// ⎛ new_distinct({constrained columns}) ⎞ +// selectivity = ⎜ ----------------------------------- ⎟ +// ⎝ old_distinct({constrained columns}) ⎠ +// +// This formula looks simple, but the challenge is that it is difficult +// to determine the correct value for new_distinct({constrained columns}) +// if each column is not constrained to a single value. For example, if +// new_distinct(x)=2 and new_distinct(y)=2, new_distinct({x,y}) could be 2, +// 3 or 4. We estimate the new distinct count as follows, using the concept +// of "soft functional dependency (FD) strength" as defined in [1]: +// +// new_distinct({x,y}) = min_value + range * (1 - FD_strength_scaled) +// +// where +// +// min_value = max(new_distinct(x), new_distinct(y)) +// max_value = new_distinct(x) * new_distinct(y) +// range = max_value - min_value +// +// ⎛ max(old_distinct(x),old_distinct(y)) ⎞ +// FD_strength = ⎜ ------------------------------------ ⎟ +// ⎝ old_distinct({x,y}) ⎠ +// +// ⎛ max(old_distinct(x), old_distinct(y)) ⎞ +// min_FD_strength = ⎜ ------------------------------------- ⎟ +// ⎝ old_distinct(x) * old_distinct(y) ⎠ +// +// ⎛ FD_strength - min_FD_strength ⎞ // scales FD_strength +// FD_strength_scaled = ⎜ ----------------------------- ⎟ // to be between +// ⎝ 1 - min_FD_strength ⎠ // 0 and 1 +// +// Suppose that old_distinct(x)=100 and old_distinct(y)=10. If x and y are +// perfectly correlated, old_distinct({x,y})=100. Using the example from +// above, new_distinct(x)=2 and new_distinct(y)=2. Plugging in the values +// into the equation, we get: +// +// FD_strength_scaled = 1 +// new_distinct({x,y}) = 2 + (4 - 2) * (1 - 1) = 2 +// +// If x and y are completely independent, however, old_distinct({x,y})=1000. +// In this case, we get: +// +// FD_strength_scaled = 0 +// new_distinct({x,y}) = 2 + (4 - 2) * (1 - 0) = 4 // -// ┬-┬ ⎛ new distinct(i) ⎞ -// selectivity = │ │ ⎜ --------------- ⎟ -// ┴ ┴ ⎝ old distinct(i) ⎠ -// i in -// {constrained -// columns} +// Note that even if we calculate the selectivity based on equation (2) above, +// we still want to take equation (1) into account. This is because it is +// possible that there are two predicates that each have selectivity s, but the +// multi-column selectivity is also s. In order to ensure that the cost model +// considers the two predicates combined to be more selective than either one +// individually, we must give some weight to equation (1). Therefore, instead +// of equation (2) we actually return the following selectivity: +// +// selectivity = (1 - w) * (equation 1) + w * (equation 2) +// +// where w is the constant multiColWeight. // // This selectivity will be used later to update the row count and the // distinct count for the unconstrained columns. // -// This algorithm assumes the columns are completely independent. +// [1] Ilyas, Ihab F., et al. "CORDS: automatic discovery of correlations and +// soft functional dependencies." SIGMOD 2004. // -func (sb *statisticsBuilder) selectivityFromDistinctCounts( +func (sb *statisticsBuilder) selectivityFromMultiColDistinctCounts( cols opt.ColSet, e RelExpr, s *props.Statistics, ) (selectivity float64) { - selectivity = 1.0 + // Respect the session setting OptimizerUseMultiColStats. + if !sb.evalCtx.SessionData.OptimizerUseMultiColStats { + return sb.selectivityFromSingleColDistinctCounts(cols, e, s) + } + + // Make a copy of cols so we can remove columns that are not constrained. + multiColSet := cols.Copy() + + // First calculate the selectivity from equation (1) (see function comment), + // and collect the inputs to equation (2). + singleColSelectivity := 1.0 + newDistinctProduct, oldDistinctProduct := 1.0, 1.0 + maxNewDistinct, maxOldDistinct := float64(0), float64(0) + multiColNullCount := -1.0 + minLocalSel := math.MaxFloat64 for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { colStat, ok := s.ColStats.Lookup(opt.MakeColSet(col)) if !ok { + multiColSet.Remove(col) continue } inputColStat, inputStats := sb.colStatFromInput(colStat.Cols, e) - newDistinct := colStat.DistinctCount - oldDistinct := inputColStat.DistinctCount + localSel := sb.selectivityFromDistinctCount(colStat, inputColStat, inputStats.RowCount) + singleColSelectivity *= localSel + + // Don't bother including columns in the multi-column calculation that + // don't contribute to the selectivity. + if localSel == 1 { + multiColSet.Remove(col) + continue + } - // Nulls are included in the distinct count, so remove 1 from the - // distinct counts if needed. - if inputColStat.NullCount > 0 { - oldDistinct = max(oldDistinct-1, 0) + // Calculate values needed for the multi-column stats calculation below. + newDistinctProduct *= colStat.DistinctCount + oldDistinctProduct *= inputColStat.DistinctCount + if colStat.DistinctCount > maxNewDistinct { + maxNewDistinct = colStat.DistinctCount } - if colStat.NullCount > 0 { - newDistinct = max(newDistinct-1, 0) + if inputColStat.DistinctCount > maxOldDistinct { + maxOldDistinct = inputColStat.DistinctCount } + if localSel < minLocalSel { + minLocalSel = localSel + } + if multiColNullCount < 0 { + multiColNullCount = inputStats.RowCount + } + // Multiply by the expected chance of collisions with nulls already + // collected. + multiColNullCount *= colStat.NullCount / inputStats.RowCount + } - // Calculate the selectivity of the predicate. - nonNullSelectivity := fraction(newDistinct, oldDistinct) - nullSelectivity := fraction(colStat.NullCount, inputColStat.NullCount) - selectivity *= sb.predicateSelectivity( - nonNullSelectivity, nullSelectivity, inputColStat.NullCount, inputStats.RowCount, - ) + // If we don't need to use a multi-column statistic, we're done. + if multiColSet.Len() <= 1 { + return singleColSelectivity + } + + // Otherwise, calculate the selectivity using multi-column stats from + // equation (2). See the comment above the function definition for details + // about the formula. + inputColStat, inputStats := sb.colStatFromInput(multiColSet, e) + fdStrength := min(maxOldDistinct/inputColStat.DistinctCount, 1.0) + maxMutiColOldDistinct := min(oldDistinctProduct, inputStats.RowCount) + minFdStrength := min(maxOldDistinct/maxMutiColOldDistinct, fdStrength) + if minFdStrength < 1 { + // Scale the fdStrength so it ranges between 0 and 1. + fdStrength = (fdStrength - minFdStrength) / (1 - minFdStrength) + } + distinctCountRange := max(newDistinctProduct-maxNewDistinct, 0) + + colStat, _ := s.ColStats.Add(multiColSet) + colStat.DistinctCount = maxNewDistinct + distinctCountRange*(1-fdStrength) + colStat.NullCount = multiColNullCount + multiColSelectivity := sb.selectivityFromDistinctCount(colStat, inputColStat, inputStats.RowCount) + + // Now, we must adjust multiColSelectivity so that it is not greater than + // the selectivity of any subset of the columns in multiColSet. This would + // be internally inconsistent and could lead to bad plans. For example, + // x=1 AND y=1 should always be considered more selective (i.e., with lower + // selectivity) than x=1 alone. + // + // We have already found the minimum selectivity of all the individual + // columns (subsets of size 1) above and stored it in minLocalSel. It's not + // practical, however, to calculate the minimum selectivity for all subsets + // larger than size 1. + // + // Instead, we focus on a specific case known to occasionally have this + // problem: when multiColSet contains 3 or more columns and at least one has + // distinct count greater than 1, the subset of columns that have distinct + // count less than or equal to 1 may have a smaller selectivity according to + // equation (2). + // + // In this case, update minLocalSel and adjust multiColSelectivity as needed. + // + if maxNewDistinct > 1 && multiColSet.Len() > 2 { + var lowDistinctCountCols opt.ColSet + multiColSet.ForEach(func(col opt.ColumnID) { + // We already know the column stat exists if it's in multiColSet. + colStat, _ := s.ColStats.Lookup(opt.MakeColSet(col)) + if colStat.DistinctCount <= 1 { + lowDistinctCountCols.Add(col) + } + }) + + if lowDistinctCountCols.Len() > 1 { + selLowDistinctCountCols := sb.selectivityFromMultiColDistinctCounts( + lowDistinctCountCols, e, s, + ) + if selLowDistinctCountCols < minLocalSel { + minLocalSel = selLowDistinctCountCols + } + } + } + multiColSelectivity = min(multiColSelectivity, minLocalSel) + + // As described in the function comment, we actually return a weighted sum + // of multi-column and single-column selectivity estimates. + return (1-multiColWeight)*singleColSelectivity + multiColWeight*multiColSelectivity +} + +// selectivityFromSingleColDistinctCounts calculates the selectivity of a +// filter by using estimated distinct counts of each constrained column before +// and after the filter was applied. It assumes independence between columns, +// so it uses equation (1) from selectivityFromMultiColDistinctCounts. See the +// comment above that function for details. +func (sb *statisticsBuilder) selectivityFromSingleColDistinctCounts( + cols opt.ColSet, e RelExpr, s *props.Statistics, +) (selectivity float64) { + selectivity = 1.0 + for col, ok := cols.Next(0); ok; col, ok = cols.Next(col + 1) { + colStat, ok := s.ColStats.Lookup(opt.MakeColSet(col)) + if !ok { + continue + } + + inputColStat, inputStats := sb.colStatFromInput(colStat.Cols, e) + selectivity *= sb.selectivityFromDistinctCount(colStat, inputColStat, inputStats.RowCount) } return selectivity } -// selectivityFromHistograms is similar to selectivityFromDistinctCounts, in -// that it calculates the selectivity of a filter by taking the product of +// selectivityFromDistinctCount calculates the selectivity of a filter by using +// the estimated distinct count of a single constrained column or set of +// columns before and after the filter was applied. +func (sb *statisticsBuilder) selectivityFromDistinctCount( + colStat, inputColStat *props.ColumnStatistic, inputRowCount float64, +) float64 { + newDistinct := colStat.DistinctCount + oldDistinct := inputColStat.DistinctCount + + // Nulls are included in the distinct count, so remove 1 from the + // distinct counts if needed. + if inputColStat.NullCount > 0 { + oldDistinct = max(oldDistinct-1, 0) + } + if colStat.NullCount > 0 { + newDistinct = max(newDistinct-1, 0) + } + + // Calculate the selectivity of the predicate. + nonNullSelectivity := fraction(newDistinct, oldDistinct) + nullSelectivity := fraction(colStat.NullCount, inputColStat.NullCount) + return sb.predicateSelectivity( + nonNullSelectivity, nullSelectivity, inputColStat.NullCount, inputRowCount, + ) +} + +// selectivityFromHistograms is similar to selectivityFromSingleColDistinctCounts, +// in that it calculates the selectivity of a filter by taking the product of // selectivities of each constrained column. // // For histograms, the selectivity of a constrained column is calculated as @@ -3141,7 +3366,7 @@ func (sb *statisticsBuilder) selectivityFromHistograms( } // selectivityFromNullsRemoved calculates the selectivity from null-rejecting -// filters that were not already accounted for in selectivityFromDistinctCounts +// filters that were not already accounted for in selectivityFromMultiColDistinctCounts // or selectivityFromHistograms. The columns for filters already accounted for // should be designated by ignoreCols. func (sb *statisticsBuilder) selectivityFromNullsRemoved( @@ -3362,7 +3587,7 @@ func (sb *statisticsBuilder) numConjunctsInConstraint( // Cases of NULL in a constraint should be ignored. For example, // without knowledge of the data distribution, /a: (/NULL - /10] should // have the same estimated selectivity as /a: [/10 - ]. Selectivity - // of NULL constraints is handled in selectivityFromDistinctCounts, + // of NULL constraints is handled in selectivityFromMultiColDistinctCounts, // selectivityFromHistograms, and selectivityFromNullsRemoved. if c.Columns.Get(nth).Descending() || span.StartKey().Value(nth) != tree.DNull { diff --git a/pkg/sql/opt/memo/statistics_builder_test.go b/pkg/sql/opt/memo/statistics_builder_test.go index 75717e8eafda..8ff73fe549c4 100644 --- a/pkg/sql/opt/memo/statistics_builder_test.go +++ b/pkg/sql/opt/memo/statistics_builder_test.go @@ -30,6 +30,7 @@ import ( // by the optimizer. func TestGetStatsFromConstraint(t *testing.T) { evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) + evalCtx.SessionData.OptimizerUseMultiColStats = true catalog := testcat.New() if _, err := catalog.ExecuteDDL( @@ -93,8 +94,8 @@ func TestGetStatsFromConstraint(t *testing.T) { tabID := mem.Metadata().AddTable(tab, tn) // Test that applyConstraintSet correctly updates the statistics from - // constraint set cs, and selectivity is calculated correctly. - statsFunc := func(cs *constraint.Set, expectedStats string, expectedSelectivity float64) { + // constraint set cs. + statsFunc := func(cs *constraint.Set, expectedStats string) { t.Helper() var cols opt.ColSet @@ -121,13 +122,13 @@ func TestGetStatsFromConstraint(t *testing.T) { // Calculate row count and selectivity. s.RowCount = scan.Relational().Stats.RowCount - s.ApplySelectivity(sb.selectivityFromDistinctCounts(cols, sel, s)) + s.ApplySelectivity(sb.selectivityFromMultiColDistinctCounts(cols, sel, s)) // Update null counts. sb.updateNullCountsFromProps(sel, relProps) // Check if the statistics match the expected value. - testStats(t, s, expectedStats, expectedSelectivity) + testStats(t, s, expectedStats) } c1 := constraint.ParseConstraint(&evalCtx, "/1: [/2 - /5] [/8 - /10]") @@ -155,105 +156,86 @@ func TestGetStatsFromConstraint(t *testing.T) { statsFunc( cs1, "[rows=140000000, distinct(1)=7, null(1)=0]", - 7.0/500, ) cs2 := constraint.SingleConstraint(&c2) statsFunc( cs2, "[rows=3.33333333e+09, distinct(2)=166.666667, null(2)=0]", - 1.0/3, ) cs3 := constraint.SingleConstraint(&c3) statsFunc( cs3, "[rows=20000000, distinct(3)=1, null(3)=0]", - 1.0/500, ) cs12 := constraint.SingleConstraint(&c12) statsFunc( cs12, "[rows=20000000, distinct(1)=1, null(1)=0]", - 1.0/500, ) cs123 := constraint.SingleConstraint(&c123) statsFunc( cs123, - "[rows=400, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=5, null(3)=0]", - 5.0/125000000, + "[rows=36040, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=5, null(3)=0, distinct(1,2)=1, null(1,2)=0, distinct(1-3)=5, null(1-3)=0]", ) cs123n := constraint.SingleConstraint(&c123n) statsFunc( cs123n, - "[rows=40000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0]", - 1.0/250000, + "[rows=40000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=1, null(1,2)=0]", ) cs32 := constraint.SingleConstraint(&c32) statsFunc( cs32, - "[rows=80000, distinct(2)=2, null(2)=0, distinct(3)=1, null(3)=0]", - 2.0/250000, + "[rows=80000, distinct(2)=2, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=2, null(2,3)=0]", ) cs321 := constraint.SingleConstraint(&c321) statsFunc( cs321, - "[rows=160000, distinct(2)=2, null(2)=0, distinct(3)=2, null(3)=0]", - 4.0/250000, + "[rows=160000, distinct(2)=2, null(2)=0, distinct(3)=2, null(3)=0, distinct(2,3)=4, null(2,3)=0]", ) cs312 := constraint.SingleConstraint(&c312) statsFunc( cs312, - "[rows=2240, distinct(1)=2, null(1)=0, distinct(2)=7, null(2)=0, distinct(3)=2, null(3)=0]", - 28.0/125000000, + "[rows=24490654.6, distinct(1)=2, null(1)=0, distinct(2)=7, null(2)=0, distinct(3)=2, null(3)=0, distinct(1-3)=26.9394737, null(1-3)=0]", ) cs312n := constraint.SingleConstraint(&c312n) statsFunc( cs312n, - "[rows=160000, distinct(1)=2, null(1)=0, distinct(3)=2, null(3)=0]", - 1.0/62500, + "[rows=160000, distinct(1)=2, null(1)=0, distinct(3)=2, null(3)=0, distinct(1,3)=4, null(1,3)=0]", ) cs := cs3.Intersect(&evalCtx, cs123) statsFunc( cs, - "[rows=80, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0]", - 1.0/125000000, + "[rows=909098.909, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(1-3)=1, null(1-3)=0]", ) cs = cs32.Intersect(&evalCtx, cs123) statsFunc( cs, - "[rows=80, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0]", - 1.0/125000000, + "[rows=909098.909, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(1-3)=1, null(1-3)=0]", ) cs45 := constraint.SingleSpanConstraint(&keyCtx45, &sp45) statsFunc( cs45, "[rows=1e+09, distinct(4)=1, null(4)=0]", - 1.0/10, ) } -func testStats( - t *testing.T, s *props.Statistics, expectedStats string, expectedSelectivity float64, -) { +func testStats(t *testing.T, s *props.Statistics, expectedStats string) { t.Helper() actual := s.String() if actual != expectedStats { t.Fatalf("\nexpected: %s\nactual : %s", expectedStats, actual) } - - if s.Selectivity != expectedSelectivity { - t.Fatalf("\nexpected: %f\nactual : %f", expectedSelectivity, s.Selectivity) - } } diff --git a/pkg/sql/opt/memo/testdata/logprops/scan b/pkg/sql/opt/memo/testdata/logprops/scan index afbbd3c7adbe..050c1d9d8e99 100644 --- a/pkg/sql/opt/memo/testdata/logprops/scan +++ b/pkg/sql/opt/memo/testdata/logprops/scan @@ -114,7 +114,7 @@ values └── prune: (5) opt -SELECT * FROM t WHERE b IN ('a', 'b') AND c IN (1, 2) AND a IN (2, 3) +SELECT * FROM t@bc WHERE b IN ('a', 'b') AND c IN (1, 2) AND a IN (2, 3) ---- index-join t ├── columns: a:1(int!null) b:2(char!null) c:3(int!null) d:4(char) @@ -129,6 +129,7 @@ index-join t │ ├── [/'a'/2/2 - /'a'/2/3] │ ├── [/'b'/1/2 - /'b'/1/3] │ └── [/'b'/2/2 - /'b'/2/3] + ├── flags: force-index=bc ├── cardinality: [0 - 8] ├── key: (1,2) ├── fd: (1,2)-->(3) diff --git a/pkg/sql/opt/memo/testdata/stats/join b/pkg/sql/opt/memo/testdata/stats/join index 71f26b76bb1d..156d4ad7cf8f 100644 --- a/pkg/sql/opt/memo/testdata/stats/join +++ b/pkg/sql/opt/memo/testdata/stats/join @@ -1483,3 +1483,75 @@ anti-join (cross) │ ├── key: (5) │ └── fd: (5)-->(3,4) └── filters (true) + +exec-ddl +ALTER TABLE xysd INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 5000, + "distinct_count": 5000 + }, + { + "columns": ["y"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 5000, + "distinct_count": 400 + }, + { + "columns": ["s"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 5000, + "distinct_count": 10 + } +]' +---- + +exec-ddl +ALTER TABLE uv INJECT STATISTICS '[ + { + "columns": ["u"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 10000, + "distinct_count": 500 + }, + { + "columns": ["v"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10000, + "distinct_count": 100 + }, + { + "columns": ["u","v"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10000, + "distinct_count": 550 + }, + { + "columns": ["rowid"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10000, + "distinct_count": 10000 + } +]' +---- + +# We use multi-column stats split across the join to estimate the selectivity +# here. +opt +SELECT * FROM xysd, uv WHERE (s = 'foo' AND u = 3 AND v = 4) OR (s = 'bar' AND u = 5 AND v = 6) +---- +inner-join (cross) + ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) u:5(int!null) v:6(int!null) + ├── stats: [rows=11979.6897, distinct(3)=2, null(3)=0, distinct(5)=2, null(5)=0, distinct(6)=2, null(6)=0, distinct(5,6)=2.19138756, null(5,6)=0] + ├── fd: (1)-->(2-4), (3,4)-->(1,2) + ├── scan uv + │ ├── columns: u:5(int) v:6(int!null) + │ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0, distinct(5,6)=550, null(5,6)=0] + ├── scan xysd + │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) + │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(3)=10, null(3)=0, distinct(4)=500, null(4)=0] + │ ├── key: (1) + │ └── fd: (1)-->(2-4), (3,4)~~>(1,2) + └── filters + └── (((s:3 = 'foo') AND (u:5 = 3)) AND (v:6 = 4)) OR (((s:3 = 'bar') AND (u:5 = 5)) AND (v:6 = 6)) [type=bool, outer=(3,5,6), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; /5: [/3 - /3] [/5 - /5]; /6: [/4 - /4] [/6 - /6])] diff --git a/pkg/sql/opt/memo/testdata/stats/scan b/pkg/sql/opt/memo/testdata/stats/scan index da894004aac9..5fdce31f07ee 100644 --- a/pkg/sql/opt/memo/testdata/stats/scan +++ b/pkg/sql/opt/memo/testdata/stats/scan @@ -71,6 +71,12 @@ ALTER TABLE a INJECT STATISTICS '[ "created_at": "2018-01-01 2:00:00.00000+00:00", "row_count": 3000, "distinct_count": 2 + }, + { + "columns": ["d"], + "created_at": "2018-01-01 2:00:00.00000+00:00", + "row_count": 3000, + "distinct_count": 2000 } ]' ---- @@ -196,12 +202,12 @@ SELECT * FROM a WHERE ((s >= 'bar' AND s <= 'foo') OR (s >= 'foobar')) AND d > 5 ---- select ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) b:5(bool) - ├── stats: [rows=500, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0] + ├── stats: [rows=650, distinct(3)=1, null(3)=0, distinct(4)=650, null(4)=0, distinct(3,4)=650, null(3,4)=0] ├── key: (1) ├── fd: (1)-->(2-5), (3,4)-->(1,2,5) ├── scan a │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) b:5(bool) - │ ├── stats: [rows=3000, distinct(1)=2000, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=300, null(4)=0] + │ ├── stats: [rows=3000, distinct(1)=2000, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=2000, null(4)=0, distinct(3,4)=3000, null(3,4)=0] │ ├── key: (1) │ └── fd: (1)-->(2-5), (3,4)~~>(1,2,5) └── filters @@ -213,12 +219,12 @@ SELECT * FROM a WHERE ((s >= 'bar' AND s <= 'foo') OR (s >= 'foobar')) AND d <= ---- select ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) b:5(bool) - ├── stats: [rows=500, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0] + ├── stats: [rows=650, distinct(3)=1, null(3)=0, distinct(4)=650, null(4)=0, distinct(3,4)=650, null(3,4)=0] ├── key: (1) ├── fd: (1)-->(2-5), (3,4)-->(1,2,5) ├── scan a │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) b:5(bool) - │ ├── stats: [rows=3000, distinct(1)=2000, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=300, null(4)=0] + │ ├── stats: [rows=3000, distinct(1)=2000, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=2000, null(4)=0, distinct(3,4)=3000, null(3,4)=0] │ ├── key: (1) │ └── fd: (1)-->(2-5), (3,4)~~>(1,2,5) └── filters @@ -353,7 +359,7 @@ SELECT * FROM a WHERE ((s >= 'bar' AND s <= 'foo') OR (s >= 'foobar')) AND d <= ---- index-join a ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) b:5(bool) - ├── stats: [rows=333.333333, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0] + ├── stats: [rows=333.333333, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0, distinct(3,4)=100, null(3,4)=0] ├── key: (1) ├── fd: (1)-->(2-5), (3,4)-->(1,2,5) └── select @@ -390,13 +396,13 @@ SELECT * FROM abcde WHERE b = 1 AND c LIKE '+1-1000%' ---- index-join abcde ├── columns: a:1(int!null) b:2(int!null) c:3(string!null) d:4(int) e:5(int) - ├── stats: [rows=1.11111111, distinct(2)=1, null(2)=0, distinct(3)=1.11111111, null(3)=0] + ├── stats: [rows=9.11111111, distinct(2)=1, null(2)=0, distinct(3)=9.11111111, null(3)=0, distinct(2,3)=9.11111111, null(2,3)=0] ├── key: (1) ├── fd: ()-->(2), (1)-->(3-5) └── scan abcde@good ├── columns: a:1(int!null) b:2(int!null) c:3(string!null) d:4(int) ├── constraint: /2/3/4/1: [/1/'+1-1000' - /1/'+1-1001') - ├── stats: [rows=1.11111111, distinct(2)=1, null(2)=0, distinct(3)=1.11111111, null(3)=0] + ├── stats: [rows=9.11111111, distinct(2)=1, null(2)=0, distinct(3)=9.11111111, null(3)=0, distinct(2,3)=9.11111111, null(2,3)=0] ├── key: (1) └── fd: ()-->(2), (1)-->(3,4) @@ -821,10 +827,10 @@ SELECT * FROM hist WHERE (a = 10 OR a = 20) AND (b = '2018-08-31'::DATE OR b = ' ---- select ├── columns: a:1(int!null) b:2(date!null) c:3(decimal) d:4(float) e:5(timestamp) f:6(timestamptz) g:7(string) - ├── stats: [rows=1.5, distinct(1)=1.5, null(1)=0, distinct(2)=1.5, null(2)=0] - │ histogram(1)= 0 0.5 0 1 + ├── stats: [rows=6.63, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0, distinct(1,2)=4, null(1,2)=0] + │ histogram(1)= 0 2.21 0 4.42 │ <--- 10 --- 20 - │ histogram(2)= 0 0.6 0 0.9 + │ histogram(2)= 0 2.652 0 3.978 │ <--- '2018-08-31' --- '2018-09-30' ├── index-join hist │ ├── columns: a:1(int) b:2(date) c:3(decimal) d:4(float) e:5(timestamp) f:6(timestamptz) g:7(string) @@ -847,10 +853,10 @@ SELECT * FROM hist WHERE (a = 30 OR a = 40) AND (b = '2018-06-30'::DATE OR b = ' ---- select ├── columns: a:1(int!null) b:2(date!null) c:3(decimal) d:4(float) e:5(timestamp) f:6(timestamptz) g:7(string) - ├── stats: [rows=0.7, distinct(1)=0.7, null(1)=0, distinct(2)=0.7, null(2)=0] - │ histogram(1)= 0 0.3 0 0.4 - │ <--- 30 --- 40 - │ histogram(2)= 0 0.7 + ├── stats: [rows=3.094, distinct(1)=2, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=2, null(1,2)=0] + │ histogram(1)= 0 1.326 0 1.768 + │ <--- 30 ---- 40 - + │ histogram(2)= 0 3.094 │ <--- '2018-07-31' ├── index-join hist │ ├── columns: a:1(int) b:2(date) c:3(decimal) d:4(float) e:5(timestamp) f:6(timestamptz) g:7(string) @@ -925,16 +931,16 @@ SELECT * FROM xyz WHERE x=1 AND z>990 ---- index-join xyz ├── columns: x:1(int!null) y:2(int!null) z:3(int!null) other:4(int) - ├── stats: [rows=0.18018018, distinct(1)=0.18018018, null(1)=0, distinct(3)=0.18018018, null(3)=0] - │ histogram(3)= 0 0 0.18018 0 + ├── stats: [rows=0.828828829, distinct(1)=0.828828829, null(1)=0, distinct(3)=0.828828829, null(3)=0, distinct(1,3)=0.828828829, null(1,3)=0] + │ histogram(3)= 0 0 0.82883 0 │ <--- 990 --------- 1000 ├── key: (2) ├── fd: ()-->(1), (2)-->(3,4), (3)-->(2,4) └── scan xyz@xyz_x_z_key ├── columns: x:1(int!null) y:2(int!null) z:3(int!null) ├── constraint: /1/3: [/1/991 - /1] - ├── stats: [rows=0.18018018, distinct(1)=0.18018018, null(1)=0, distinct(3)=0.18018018, null(3)=0] - │ histogram(3)= 0 0 0.18018 0 + ├── stats: [rows=0.828828829, distinct(1)=0.828828829, null(1)=0, distinct(3)=0.828828829, null(3)=0, distinct(1,3)=0.828828829, null(1,3)=0] + │ histogram(3)= 0 0 0.82883 0 │ <--- 990 --------- 1000 ├── key: (2) └── fd: ()-->(1), (2)-->(3), (3)-->(2) @@ -944,16 +950,16 @@ SELECT * FROM xyz WHERE x=1 AND z<990 AND (other=11 OR other=13) ---- select ├── columns: x:1(int!null) y:2(int!null) z:3(int!null) other:4(int!null) - ├── stats: [rows=0.395995996, distinct(1)=0.395995996, null(1)=0, distinct(3)=0.395995996, null(3)=0, distinct(4)=0.395995996, null(4)=0] - │ histogram(3)= 0 0 0.3956 0.0004004 + ├── stats: [rows=1.8395996, distinct(1)=1, null(1)=0, distinct(3)=1.8395996, null(3)=0, distinct(4)=1.8395996, null(4)=0, distinct(1,3,4)=1.8395996, null(1,3,4)=0] + │ histogram(3)= 0 0 1.8377 0.0018601 │ <--- 0 ---------- 989 -- - │ histogram(4)= 0 0.198 0 0.198 - │ <--- 11 ---- 13 - + │ histogram(4)= 0 0.9198 0 0.9198 + │ <---- 11 ----- 13 - ├── key: (2) ├── fd: ()-->(1), (2)-->(3,4), (3)-->(2,4) ├── index-join xyz │ ├── columns: x:1(int!null) y:2(int!null) z:3(int) other:4(int) - │ ├── stats: [rows=0.4] + │ ├── stats: [rows=0.58] │ ├── key: (2) │ ├── fd: ()-->(1), (2)-->(4), (2)-->(3,4), (1,3)~~>(2,4) │ └── scan xyz@xyz_x_other_z @@ -961,8 +967,8 @@ select │ ├── constraint: /1/-4/2 │ │ ├── [/1/13 - /1/13] │ │ └── [/1/11 - /1/11] - │ ├── stats: [rows=0.4, distinct(1)=0.4, null(1)=0, distinct(4)=0.4, null(4)=0] - │ │ histogram(4)= 0 0.2 0 0.2 + │ ├── stats: [rows=0.58, distinct(1)=0.58, null(1)=0, distinct(4)=0.58, null(4)=0, distinct(1,4)=0.58, null(1,4)=0] + │ │ histogram(4)= 0 0.29 0 0.29 │ │ <--- 11 --- 13 │ ├── key: (2) │ └── fd: ()-->(1), (2)-->(4) @@ -1024,3 +1030,973 @@ project │ └── fd: ()-->(2) └── projections └── t47742.b:2::STRING [as=b:4, type=string, outer=(2)] + +# Multi-column stats tests. +exec-ddl +CREATE TABLE multi_col ( + a UUID, + b BOOL, + c INT, + d STRING, + e INT, + f FLOAT, + INDEX abcde_idx (a, b, c DESC, d, e), + INDEX ce_idx (c, e), + INDEX bad_idx (b, a DESC, d), + INDEX def_idx (d, e, f), + INDEX bef_idx (b, e, f) +) +---- + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND c = 5 +AND d = 'foo' +AND e > 10 AND e <= 20 +AND f > 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.810857003, distinct(1)=0.810857003, null(1)=0, distinct(2)=0.810857003, null(2)=0, distinct(3)=0.810857003, null(3)=0, distinct(4)=0.810857003, null(4)=0, distinct(5)=0.810857003, null(5)=0, distinct(6)=0.810857003, null(6)=0, distinct(1-4)=0.810857003, null(1-4)=0, distinct(1-6)=0.810857003, null(1-6)=0] + ├── fd: ()-->(1-4) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=0.810860303] + │ ├── fd: ()-->(1-4) + │ └── scan multi_col@abcde_idx + │ ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /1/2/-3/4/5/7: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/true/5/'foo'/11 - /'37685f26-4b07-40ba-9bbf-42916ed9bc61'/true/5/'foo'/20] + │ ├── stats: [rows=0.810860303, distinct(1)=0.810860303, null(1)=0, distinct(2)=0.810860303, null(2)=0, distinct(3)=0.810860303, null(3)=0, distinct(4)=0.810860303, null(4)=0, distinct(5)=0.810860303, null(5)=0, distinct(1-4)=0.810860303, null(1-4)=0, distinct(1-5)=0.810860303, null(1-5)=0] + │ ├── key: (7) + │ └── fd: ()-->(1-4), (7)-->(5) + └── filters + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +# Make sure stats estimates are as expected when forcing the other indexes. +opt +SELECT * FROM multi_col@ce_idx +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND c = 5 +AND d = 'foo' +AND e > 10 AND e <= 20 +AND f > 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.810857003, distinct(1)=0.810857003, null(1)=0, distinct(2)=0.810857003, null(2)=0, distinct(3)=0.810857003, null(3)=0, distinct(4)=0.810857003, null(4)=0, distinct(5)=0.810857003, null(5)=0, distinct(6)=0.810857003, null(6)=0, distinct(1-4)=0.810857003, null(1-4)=0, distinct(1-6)=0.810857003, null(1-6)=0] + ├── fd: ()-->(1-4) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=9.1] + │ ├── fd: ()-->(3) + │ └── scan multi_col@ce_idx + │ ├── columns: c:3(int!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /3/5/7: [/5/11 - /5/20] + │ ├── flags: force-index=ce_idx + │ ├── stats: [rows=9.1, distinct(3)=1, null(3)=0, distinct(5)=9.1, null(5)=0, distinct(3,5)=9.1, null(3,5)=0] + │ ├── key: (7) + │ └── fd: ()-->(3), (7)-->(5) + └── filters + ├── a:1 = '37685f26-4b07-40ba-9bbf-42916ed9bc61' [type=bool, outer=(1), constraints=(/1: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61' - /'37685f26-4b07-40ba-9bbf-42916ed9bc61']; tight), fd=()-->(1)] + ├── b:2 = true [type=bool, outer=(2), constraints=(/2: [/true - /true]; tight), fd=()-->(2)] + ├── d:4 = 'foo' [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +opt +SELECT * FROM multi_col@bad_idx +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND c = 5 +AND d = 'foo' +AND e > 10 AND e <= 20 +AND f > 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.810857003, distinct(1)=0.810857003, null(1)=0, distinct(2)=0.810857003, null(2)=0, distinct(3)=0.810857003, null(3)=0, distinct(4)=0.810857003, null(4)=0, distinct(5)=0.810857003, null(5)=0, distinct(6)=0.810857003, null(6)=0, distinct(1-4)=0.810857003, null(1-4)=0, distinct(1-6)=0.810857003, null(1-6)=0] + ├── fd: ()-->(1-4) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=0.90585] + │ ├── fd: ()-->(1,2,4) + │ └── scan multi_col@bad_idx + │ ├── columns: a:1(uuid!null) b:2(bool!null) d:4(string!null) rowid:7(int!null) + │ ├── constraint: /2/-1/4/7: [/true/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/'foo' - /true/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/'foo'] + │ ├── flags: force-index=bad_idx + │ ├── stats: [rows=0.90585, distinct(1)=0.90585, null(1)=0, distinct(2)=0.90585, null(2)=0, distinct(4)=0.90585, null(4)=0, distinct(1,2,4)=0.90585, null(1,2,4)=0] + │ ├── key: (7) + │ └── fd: ()-->(1,2,4) + └── filters + ├── (e:5 > 10) AND (e:5 <= 20) [type=bool, outer=(5), constraints=(/5: [/11 - /20]; tight)] + ├── c:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +opt +SELECT * FROM multi_col@def_idx +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND c = 5 +AND d = 'foo' +AND e > 10 AND e <= 20 +AND f > 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.810857003, distinct(1)=0.810857003, null(1)=0, distinct(2)=0.810857003, null(2)=0, distinct(3)=0.810857003, null(3)=0, distinct(4)=0.810857003, null(4)=0, distinct(5)=0.810857003, null(5)=0, distinct(6)=0.810857003, null(6)=0, distinct(1-4)=0.810857003, null(1-4)=0, distinct(1-6)=0.810857003, null(1-6)=0] + ├── fd: ()-->(1-4) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=3.39117229] + │ ├── fd: ()-->(4) + │ └── select + │ ├── columns: d:4(string!null) e:5(int!null) f:6(float!null) rowid:7(int!null) + │ ├── stats: [rows=3.39117229, distinct(6)=2.91208514, null(6)=0] + │ ├── key: (7) + │ ├── fd: ()-->(4), (7)-->(5,6) + │ ├── scan multi_col@def_idx + │ │ ├── columns: d:4(string!null) e:5(int!null) f:6(float) rowid:7(int!null) + │ │ ├── constraint: /4/5/6/7: [/'foo'/11/5e-324 - /'foo'/20] + │ │ ├── flags: force-index=def_idx + │ │ ├── stats: [rows=9.1, distinct(4)=1, null(4)=0, distinct(5)=9.1, null(5)=0, distinct(6)=8.73625541, null(6)=0.091, distinct(7)=9.1, null(7)=0, distinct(4,5)=9.1, null(4,5)=0] + │ │ ├── key: (7) + │ │ └── fd: ()-->(4), (7)-->(5,6) + │ └── filters + │ └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + └── filters + ├── a:1 = '37685f26-4b07-40ba-9bbf-42916ed9bc61' [type=bool, outer=(1), constraints=(/1: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61' - /'37685f26-4b07-40ba-9bbf-42916ed9bc61']; tight), fd=()-->(1)] + ├── b:2 = true [type=bool, outer=(2), constraints=(/2: [/true - /true]; tight), fd=()-->(2)] + └── c:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] + +# A different combination of predicates. +opt +SELECT * FROM multi_col +WHERE b = true +AND c = 5 +AND e IN (1, 3, 5, 7, 9) +AND f > 0 +---- +select + ├── columns: a:1(uuid) b:2(bool!null) c:3(int!null) d:4(string) e:5(int!null) f:6(float!null) + ├── stats: [rows=3.1625092, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=3.1625092, null(5)=0, distinct(6)=3.1625092, null(6)=0, distinct(2,3)=1, null(2,3)=0, distinct(2,3,5,6)=3.1625092, null(2,3,5,6)=0] + ├── fd: ()-->(2,3) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=4.55405405] + │ ├── fd: ()-->(3) + │ └── scan multi_col@ce_idx + │ ├── columns: c:3(int!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /3/5/7 + │ │ ├── [/5/1 - /5/1] + │ │ ├── [/5/3 - /5/3] + │ │ ├── [/5/5 - /5/5] + │ │ ├── [/5/7 - /5/7] + │ │ └── [/5/9 - /5/9] + │ ├── stats: [rows=4.55405405, distinct(3)=1, null(3)=0, distinct(5)=4.55405405, null(5)=0, distinct(3,5)=4.55405405, null(3,5)=0] + │ ├── key: (7) + │ └── fd: ()-->(3), (7)-->(5) + └── filters + ├── b:2 = true [type=bool, outer=(2), constraints=(/2: [/true - /true]; tight), fd=()-->(2)] + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +# Force the alternate index. +opt +SELECT * FROM multi_col@bef_idx +WHERE b = true +AND c = 5 +AND e IN (1, 3, 5, 7, 9) +AND f > 0 +---- +select + ├── columns: a:1(uuid) b:2(bool!null) c:3(int!null) d:4(string) e:5(int!null) f:6(float!null) + ├── stats: [rows=3.1625092, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=3.1625092, null(5)=0, distinct(6)=3.1625092, null(6)=0, distinct(2,3)=1, null(2,3)=0, distinct(2,3,5,6)=3.1625092, null(2,3,5,6)=0] + ├── fd: ()-->(2,3) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=45.825] + │ ├── fd: ()-->(2) + │ └── scan multi_col@bef_idx + │ ├── columns: b:2(bool!null) e:5(int!null) f:6(float!null) rowid:7(int!null) + │ ├── constraint: /2/5/6/7 + │ │ ├── [/true/1/5e-324 - /true/1] + │ │ ├── [/true/3/5e-324 - /true/3] + │ │ ├── [/true/5/5e-324 - /true/5] + │ │ ├── [/true/7/5e-324 - /true/7] + │ │ └── [/true/9/5e-324 - /true/9] + │ ├── flags: force-index=bef_idx + │ ├── stats: [rows=45.825, distinct(2)=1, null(2)=0, distinct(5)=5, null(5)=0, distinct(6)=33.3333333, null(6)=0, distinct(2,5,6)=45.825, null(2,5,6)=0] + │ ├── key: (7) + │ └── fd: ()-->(2), (7)-->(5,6) + └── filters + └── c:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] + +# Now inject some stats ane see how the estimates change. +exec-ddl +ALTER TABLE multi_col INJECT STATISTICS '[ + { + "columns": [ + "a" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b", + "c" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b", + "c", + "d" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b", + "c", + "d", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "c" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 100, + "histo_col_type": "", + "name": "__auto__", + "null_count": 1000, + "row_count": 10000 + }, + { + "columns": [ + "c", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 1000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 100, + "row_count": 10000 + }, + { + "columns": [ + "b" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 3, + "histo_col_type": "", + "name": "__auto__", + "null_count": 5000, + "row_count": 10000 + }, + { + "columns": [ + "b", + "a", + "d" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "b", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 200, + "histo_col_type": "", + "name": "__auto__", + "null_count": 1000, + "row_count": 10000 + }, + { + "columns": [ + "b", + "e", + "f" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 9000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 50, + "row_count": 10000 + }, + { + "columns": [ + "d" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "d", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 200, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "d", + "e", + "f" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 100, + "histo_col_type": "", + "name": "__auto__", + "null_count": 1000, + "row_count": 10000 + }, + { + "columns": [ + "f" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 5000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 100, + "row_count": 10000 + } +]' +---- + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND c = 5 +AND d = 'foo' +AND e > 10 AND e <= 20 +AND f > 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.810021137, distinct(1)=0.810021137, null(1)=0, distinct(2)=0.810021137, null(2)=0, distinct(3)=0.810021137, null(3)=0, distinct(4)=0.810021137, null(4)=0, distinct(5)=0.810021137, null(5)=0, distinct(6)=0.810021137, null(6)=0, distinct(1-4)=0.810021137, null(1-4)=0, distinct(1-6)=0.810021137, null(1-6)=0] + ├── fd: ()-->(1-4) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=0.810022521] + │ ├── fd: ()-->(1-4) + │ └── scan multi_col@abcde_idx + │ ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /1/2/-3/4/5/7: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/true/5/'foo'/11 - /'37685f26-4b07-40ba-9bbf-42916ed9bc61'/true/5/'foo'/20] + │ ├── stats: [rows=0.810022521, distinct(1)=0.810022521, null(1)=0, distinct(2)=0.810022521, null(2)=0, distinct(3)=0.810022521, null(3)=0, distinct(4)=0.810022521, null(4)=0, distinct(5)=0.810022521, null(5)=0, distinct(1-4)=0.810022521, null(1-4)=0, distinct(1-5)=0.810022521, null(1-5)=0] + │ ├── key: (7) + │ └── fd: ()-->(1-4), (7)-->(5) + └── filters + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +# A different combination of predicates. +opt +SELECT * FROM multi_col +WHERE b = true +AND c = 5 +AND e IN (1, 3, 5, 7, 9) +AND f > 0 +---- +select + ├── columns: a:1(uuid) b:2(bool!null) c:3(int!null) d:4(string) e:5(int!null) f:6(float!null) + ├── stats: [rows=27.8153382, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=5, null(5)=0, distinct(6)=27.8153382, null(6)=0, distinct(2,3)=1, null(2,3)=0, distinct(2,3,5,6)=27.8153382, null(2,3,5,6)=0] + ├── fd: ()-->(2,3) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=45.0078177] + │ ├── fd: ()-->(3) + │ └── scan multi_col@ce_idx + │ ├── columns: c:3(int!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /3/5/7 + │ │ ├── [/5/1 - /5/1] + │ │ ├── [/5/3 - /5/3] + │ │ ├── [/5/5 - /5/5] + │ │ ├── [/5/7 - /5/7] + │ │ └── [/5/9 - /5/9] + │ ├── stats: [rows=45.0078177, distinct(3)=1, null(3)=0, distinct(5)=5, null(5)=0, distinct(3,5)=5, null(3,5)=0] + │ ├── key: (7) + │ └── fd: ()-->(3), (7)-->(5) + └── filters + ├── b:2 = true [type=bool, outer=(2), constraints=(/2: [/true - /true]; tight), fd=()-->(2)] + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +# Force the alternate index. +opt +SELECT * FROM multi_col@bef_idx +WHERE b = true +AND c = 5 +AND e IN (1, 3, 5, 7, 9) +AND f > 0 +---- +select + ├── columns: a:1(uuid) b:2(bool!null) c:3(int!null) d:4(string) e:5(int!null) f:6(float!null) + ├── stats: [rows=27.8153382, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=5, null(5)=0, distinct(6)=27.8153382, null(6)=0, distinct(2,3)=1, null(2,3)=0, distinct(2,3,5,6)=27.8153382, null(2,3,5,6)=0] + ├── fd: ()-->(2,3) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=412.841659] + │ ├── fd: ()-->(2) + │ └── scan multi_col@bef_idx + │ ├── columns: b:2(bool!null) e:5(int!null) f:6(float!null) rowid:7(int!null) + │ ├── constraint: /2/5/6/7 + │ │ ├── [/true/1/5e-324 - /true/1] + │ │ ├── [/true/3/5e-324 - /true/3] + │ │ ├── [/true/5/5e-324 - /true/5] + │ │ ├── [/true/7/5e-324 - /true/7] + │ │ └── [/true/9/5e-324 - /true/9] + │ ├── flags: force-index=bef_idx + │ ├── stats: [rows=412.841659, distinct(2)=1, null(2)=0, distinct(5)=5, null(5)=0, distinct(6)=412.841659, null(6)=0, distinct(2,5,6)=412.841659, null(2,5,6)=0] + │ ├── key: (7) + │ └── fd: ()-->(2), (7)-->(5,6) + └── filters + └── c:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] + +# Include histograms. +exec-ddl +ALTER TABLE multi_col INJECT STATISTICS '[ + { + "columns": [ + "a" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b", + "c" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b", + "c", + "d" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "a", + "b", + "c", + "d", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "c" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 100, + "histo_col_type": "", + "name": "__auto__", + "null_count": 1000, + "row_count": 10000 + }, + { + "columns": [ + "c", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 1000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 100, + "row_count": 10000 + }, + { + "columns": [ + "b" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 3, + "histo_col_type": "BOOL", + "histo_buckets":[{ + "num_eq":1, + "num_range":0, + "distinct_range":0, + "upper_bound":"false" + }, + { + "num_eq":4999, + "num_range":0, + "distinct_range":0, + "upper_bound":"true" + }], + "name": "__auto__", + "null_count": 5000, + "row_count": 10000 + }, + { + "columns": [ + "b", + "a", + "d" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "b", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 200, + "histo_col_type": "", + "name": "__auto__", + "null_count": 1000, + "row_count": 10000 + }, + { + "columns": [ + "b", + "e", + "f" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 9000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 50, + "row_count": 10000 + }, + { + "columns": [ + "d" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10, + "histo_col_type": "STRING", + "histo_buckets":[{ + "num_eq":1, + "num_range":0, + "distinct_range":0, + "upper_bound":"bar" + }, + { + "num_eq":1, + "num_range":2, + "distinct_range":2, + "upper_bound":"baz" + }, + { + "num_eq":1, + "num_range":1, + "distinct_range":1, + "upper_bound":"boo" + }, + { + "num_eq":9990, + "num_range":2, + "distinct_range":1, + "upper_bound":"foo" + }, + { + "num_eq":1, + "num_range":1, + "distinct_range":1, + "upper_bound":"foobar" + }], + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "d", + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 200, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "d", + "e", + "f" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 10000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 0, + "row_count": 10000 + }, + { + "columns": [ + "e" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 100, + "histo_col_type": "", + "name": "__auto__", + "null_count": 1000, + "row_count": 10000 + }, + { + "columns": [ + "f" + ], + "created_at": "2020-05-14 22:50:19.864085+00:00", + "distinct_count": 5000, + "histo_col_type": "", + "name": "__auto__", + "null_count": 100, + "row_count": 10000 + } +]' +---- + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND c = 5 +AND d = 'foo' +AND e > 10 AND e <= 20 +AND f > 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=8.09111244, distinct(1)=1, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(5)=8.09111244, null(5)=0, distinct(6)=8.09111244, null(6)=0, distinct(1,3,4)=1, null(1,3,4)=0, distinct(1-6)=8.09111244, null(1-6)=0] + │ histogram(2)= 0 8.0911 + │ <--- true + │ histogram(4)= 0 8.0911 + │ <--- 'foo' + ├── fd: ()-->(1-4) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=8.09114009] + │ ├── fd: ()-->(1-4) + │ └── scan multi_col@abcde_idx + │ ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int!null) d:4(string!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /1/2/-3/4/5/7: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/true/5/'foo'/11 - /'37685f26-4b07-40ba-9bbf-42916ed9bc61'/true/5/'foo'/20] + │ ├── stats: [rows=8.09114009, distinct(1)=1, null(1)=0, distinct(2)=2, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(5)=8.09114009, null(5)=0, distinct(1,3,4)=1, null(1,3,4)=0, distinct(1-5)=8.09114009, null(1-5)=0] + │ │ histogram(2)= 0 8.0911 + │ │ <--- true + │ │ histogram(4)= 0 8.0911 + │ │ <--- 'foo' + │ ├── key: (7) + │ └── fd: ()-->(1-4), (7)-->(5) + └── filters + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +# A different combination of predicates. +opt +SELECT * FROM multi_col +WHERE b = true +AND c = 5 +AND e IN (1, 3, 5, 7, 9) +AND f > 0 +---- +select + ├── columns: a:1(uuid) b:2(bool!null) c:3(int!null) d:4(string) e:5(int!null) f:6(float!null) + ├── stats: [rows=81.87, distinct(2)=2, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=5, null(5)=0, distinct(6)=81.87, null(6)=0, distinct(2,3,5,6)=81.87, null(2,3,5,6)=0] + │ histogram(2)= 0 81.87 + │ <--- true + ├── fd: ()-->(2,3) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=45.0078177] + │ ├── fd: ()-->(3) + │ └── scan multi_col@ce_idx + │ ├── columns: c:3(int!null) e:5(int!null) rowid:7(int!null) + │ ├── constraint: /3/5/7 + │ │ ├── [/5/1 - /5/1] + │ │ ├── [/5/3 - /5/3] + │ │ ├── [/5/5 - /5/5] + │ │ ├── [/5/7 - /5/7] + │ │ └── [/5/9 - /5/9] + │ ├── stats: [rows=45.0078177, distinct(3)=1, null(3)=0, distinct(5)=5, null(5)=0, distinct(3,5)=5, null(3,5)=0] + │ ├── key: (7) + │ └── fd: ()-->(3), (7)-->(5) + └── filters + ├── b:2 = true [type=bool, outer=(2), constraints=(/2: [/true - /true]; tight), fd=()-->(2)] + └── f:6 > 0.0 [type=bool, outer=(6), constraints=(/6: [/5e-324 - ]; tight)] + +# Force the alternate index. +opt +SELECT * FROM multi_col@bef_idx +WHERE b = true +AND c = 5 +AND e IN (1, 3, 5, 7, 9) +AND f > 0 +---- +select + ├── columns: a:1(uuid) b:2(bool!null) c:3(int!null) d:4(string) e:5(int!null) f:6(float!null) + ├── stats: [rows=81.87, distinct(2)=2, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=5, null(5)=0, distinct(6)=81.87, null(6)=0, distinct(2,3,5,6)=81.87, null(2,3,5,6)=0] + │ histogram(2)= 0 81.87 + │ <--- true + ├── fd: ()-->(2,3) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=416.509091] + │ ├── fd: ()-->(2) + │ └── scan multi_col@bef_idx + │ ├── columns: b:2(bool!null) e:5(int!null) f:6(float!null) rowid:7(int!null) + │ ├── constraint: /2/5/6/7 + │ │ ├── [/true/1/5e-324 - /true/1] + │ │ ├── [/true/3/5e-324 - /true/3] + │ │ ├── [/true/5/5e-324 - /true/5] + │ │ ├── [/true/7/5e-324 - /true/7] + │ │ └── [/true/9/5e-324 - /true/9] + │ ├── flags: force-index=bef_idx + │ ├── stats: [rows=416.509091, distinct(2)=2, null(2)=0, distinct(5)=5, null(5)=0, distinct(6)=416.509091, null(6)=0, distinct(2,5,6)=416.509091, null(2,5,6)=0] + │ │ histogram(2)= 0 416.51 + │ │ <--- true + │ ├── key: (7) + │ └── fd: ()-->(2), (7)-->(5,6) + └── filters + └── c:3 = 5 [type=bool, outer=(3), constraints=(/3: [/5 - /5]; tight), fd=()-->(3)] + +# A different combination of predicates, with four different combinations of +# values for b and d. + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND d = 'foo' +AND e = 5 +AND f = 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=8.09028187, distinct(1)=1, null(1)=0, distinct(2)=2, null(2)=0, distinct(4)=1, null(4)=0, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(1,4-6)=1, null(1,4-6)=0, distinct(1,2,4-6)=2, null(1,2,4-6)=0] + │ histogram(2)= 0 8.0903 + │ <--- true + │ histogram(4)= 0 8.0903 + │ <--- 'foo' + ├── fd: ()-->(1,2,4-6) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=0.811629073] + │ ├── fd: ()-->(2,5,6) + │ └── scan multi_col@bef_idx + │ ├── columns: b:2(bool!null) e:5(int!null) f:6(float!null) rowid:7(int!null) + │ ├── constraint: /2/5/6/7: [/true/5/0.0 - /true/5/0.0] + │ ├── stats: [rows=0.811629073, distinct(2)=0.811629073, null(2)=0, distinct(5)=0.811629073, null(5)=0, distinct(6)=0.811629073, null(6)=0, distinct(5,6)=0.811629073, null(5,6)=0, distinct(2,5,6)=0.811629073, null(2,5,6)=0] + │ │ histogram(2)= 0 0.81163 + │ │ <--- true - + │ ├── key: (7) + │ └── fd: ()-->(2,5,6) + └── filters + ├── a:1 = '37685f26-4b07-40ba-9bbf-42916ed9bc61' [type=bool, outer=(1), constraints=(/1: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61' - /'37685f26-4b07-40ba-9bbf-42916ed9bc61']; tight), fd=()-->(1)] + └── d:4 = 'foo' [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = true +AND d = 'bar' +AND e = 5 +AND f = 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.000809838025, distinct(1)=0.000809838025, null(1)=0, distinct(2)=0.000809838025, null(2)=0, distinct(4)=0.000809838025, null(4)=0, distinct(5)=0.000809838025, null(5)=0, distinct(6)=0.000809838025, null(6)=0, distinct(1,4-6)=0.000809838025, null(1,4-6)=0, distinct(1,2,4-6)=0.000809838025, null(1,2,4-6)=0] + │ histogram(2)= 0 0.00080984 + │ <----- true -- + │ histogram(4)= 0 0.00080984 + │ <---- 'bar' -- + ├── fd: ()-->(1,2,4-6) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=0.0008238352] + │ ├── fd: ()-->(1,2,4) + │ └── scan multi_col@bad_idx + │ ├── columns: a:1(uuid!null) b:2(bool!null) d:4(string!null) rowid:7(int!null) + │ ├── constraint: /2/-1/4/7: [/true/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/'bar' - /true/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/'bar'] + │ ├── stats: [rows=0.0008238352, distinct(1)=0.0008238352, null(1)=0, distinct(2)=0.0008238352, null(2)=0, distinct(4)=0.0008238352, null(4)=0, distinct(1,4)=0.0008238352, null(1,4)=0, distinct(1,2,4)=0.0008238352, null(1,2,4)=0] + │ │ histogram(2)= 0 0.00082384 + │ │ <----- true -- + │ │ histogram(4)= 0 0.00082384 + │ │ <---- 'bar' -- + │ ├── key: (7) + │ └── fd: ()-->(1,2,4) + └── filters + ├── e:5 = 5 [type=bool, outer=(5), constraints=(/5: [/5 - /5]; tight), fd=()-->(5)] + └── f:6 = 0.0 [type=bool, outer=(6), constraints=(/6: [/0.0 - /0.0]; tight), fd=()-->(6)] + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = false +AND d = 'bar' +AND e = 5 +AND f = 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=1.62000005e-07, distinct(1)=1.62000005e-07, null(1)=0, distinct(2)=1.62000005e-07, null(2)=0, distinct(4)=1.62000005e-07, null(4)=0, distinct(5)=1.62000005e-07, null(5)=0, distinct(6)=1.62000005e-07, null(6)=0, distinct(1,4-6)=1.62000005e-07, null(1,4-6)=0, distinct(1,2,4-6)=1.62000005e-07, null(1,2,4-6)=0] + │ histogram(2)= 0 1.62e-07 + │ <--- false - + │ histogram(4)= 0 1.62e-07 + │ <--- 'bar' - + ├── fd: ()-->(1,2,4-6) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=1.648e-07] + │ ├── fd: ()-->(1,2,4) + │ └── scan multi_col@bad_idx + │ ├── columns: a:1(uuid!null) b:2(bool!null) d:4(string!null) rowid:7(int!null) + │ ├── constraint: /2/-1/4/7: [/false/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/'bar' - /false/'37685f26-4b07-40ba-9bbf-42916ed9bc61'/'bar'] + │ ├── stats: [rows=1.648e-07, distinct(1)=1.648e-07, null(1)=0, distinct(2)=1.648e-07, null(2)=0, distinct(4)=1.648e-07, null(4)=0, distinct(1,4)=1.648e-07, null(1,4)=0, distinct(1,2,4)=1.648e-07, null(1,2,4)=0] + │ │ histogram(2)= 0 1.648e-07 + │ │ <---- false - + │ │ histogram(4)= 0 1.648e-07 + │ │ <---- 'bar' - + │ ├── key: (7) + │ └── fd: ()-->(1,2,4) + └── filters + ├── e:5 = 5 [type=bool, outer=(5), constraints=(/5: [/5 - /5]; tight), fd=()-->(5)] + └── f:6 = 0.0 [type=bool, outer=(6), constraints=(/6: [/0.0 - /0.0]; tight), fd=()-->(6)] + +opt +SELECT * FROM multi_col +WHERE a = '37685f26-4b07-40ba-9bbf-42916ed9bc61' +AND b = false +AND d = 'foo' +AND e = 5 +AND f = 0 +---- +select + ├── columns: a:1(uuid!null) b:2(bool!null) c:3(int) d:4(string!null) e:5(int!null) f:6(float!null) + ├── stats: [rows=0.00161838005, distinct(1)=0.00161838005, null(1)=0, distinct(2)=0.00161838005, null(2)=0, distinct(4)=0.00161838005, null(4)=0, distinct(5)=0.00161838005, null(5)=0, distinct(6)=0.00161838005, null(6)=0, distinct(1,4-6)=0.00161838005, null(1,4-6)=0, distinct(1,2,4-6)=0.00161838005, null(1,2,4-6)=0] + │ histogram(2)= 0 0.0016184 + │ <---- false - + │ histogram(4)= 0 0.0016184 + │ <---- 'foo' - + ├── fd: ()-->(1,2,4-6) + ├── index-join multi_col + │ ├── columns: a:1(uuid) b:2(bool) c:3(int) d:4(string) e:5(int) f:6(float) + │ ├── stats: [rows=0.000162358286] + │ ├── fd: ()-->(2,5,6) + │ └── scan multi_col@bef_idx + │ ├── columns: b:2(bool!null) e:5(int!null) f:6(float!null) rowid:7(int!null) + │ ├── constraint: /2/5/6/7: [/false/5/0.0 - /false/5/0.0] + │ ├── stats: [rows=0.000162358286, distinct(2)=0.000162358286, null(2)=0, distinct(5)=0.000162358286, null(5)=0, distinct(6)=0.000162358286, null(6)=0, distinct(5,6)=0.000162358286, null(5,6)=0, distinct(2,5,6)=0.000162358286, null(2,5,6)=0] + │ │ histogram(2)= 0 0.00016236 + │ │ <---- false -- + │ ├── key: (7) + │ └── fd: ()-->(2,5,6) + └── filters + ├── a:1 = '37685f26-4b07-40ba-9bbf-42916ed9bc61' [type=bool, outer=(1), constraints=(/1: [/'37685f26-4b07-40ba-9bbf-42916ed9bc61' - /'37685f26-4b07-40ba-9bbf-42916ed9bc61']; tight), fd=()-->(1)] + └── d:4 = 'foo' [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight), fd=()-->(4)] diff --git a/pkg/sql/opt/memo/testdata/stats/select b/pkg/sql/opt/memo/testdata/stats/select index 5824b6d92c26..dc263290cee9 100644 --- a/pkg/sql/opt/memo/testdata/stats/select +++ b/pkg/sql/opt/memo/testdata/stats/select @@ -72,17 +72,17 @@ SELECT * FROM b WHERE x = 1 AND z = 2 AND rowid >= 5 AND rowid <= 8 project ├── columns: x:1(int!null) z:2(int!null) ├── cardinality: [0 - 4] - ├── stats: [rows=8e-06] + ├── stats: [rows=0.8118008] ├── fd: ()-->(1,2) └── select ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null) ├── cardinality: [0 - 4] - ├── stats: [rows=8e-06, distinct(1)=8e-06, null(1)=0, distinct(2)=8e-06, null(2)=0, distinct(3)=8e-06, null(3)=0] + ├── stats: [rows=0.8118008, distinct(1)=0.8118008, null(1)=0, distinct(2)=0.8118008, null(2)=0, distinct(3)=0.8118008, null(3)=0, distinct(1,2)=0.8118008, null(1,2)=0, distinct(1-3)=0.8118008, null(1-3)=0] ├── key: (3) ├── fd: ()-->(1,2) ├── scan b │ ├── columns: x:1(int) z:2(int!null) rowid:3(int!null) - │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0] + │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=10000, null(1,2)=0, distinct(1-3)=10000, null(1-3)=0] │ ├── key: (3) │ └── fd: (3)-->(1,2) └── filters @@ -113,12 +113,12 @@ SELECT * FROM a WHERE y = 5 AND x + y < 10 ---- select ├── columns: x:1(int!null) y:2(int!null) - ├── stats: [rows=3.33333333, distinct(1)=3.33333333, null(1)=0, distinct(2)=1, null(2)=0] + ├── stats: [rows=9.33333333, distinct(1)=9.33333333, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=9.33333333, null(1,2)=0] ├── key: (1) ├── fd: ()-->(2) ├── scan a │ ├── columns: x:1(int!null) y:2(int) - │ ├── stats: [rows=4000, distinct(1)=4000, null(1)=0, distinct(2)=400, null(2)=0] + │ ├── stats: [rows=4000, distinct(1)=4000, null(1)=0, distinct(2)=400, null(2)=0, distinct(1,2)=4000, null(1,2)=0] │ ├── key: (1) │ └── fd: (1)-->(2) └── filters @@ -178,10 +178,10 @@ SELECT y FROM idx WHERE y < 5 AND z < 10 ---- project ├── columns: y:2(int!null) - ├── stats: [rows=111.111111] + ├── stats: [rows=311.111111] └── select ├── columns: y:2(int!null) z:3(int!null) - ├── stats: [rows=111.111111, distinct(2)=33.3333333, null(2)=0, distinct(3)=33.3333333, null(3)=0] + ├── stats: [rows=311.111111, distinct(2)=33.3333333, null(2)=0, distinct(3)=33.3333333, null(3)=0, distinct(2,3)=311.111111, null(2,3)=0] ├── scan idx@yz │ ├── columns: y:2(int!null) z:3(int) │ ├── constraint: /-2/3/1: (/4/NULL - /NULL) @@ -299,12 +299,12 @@ SELECT * FROM district WHERE d_id = 1 AND d_name='bobs_burgers' ---- select ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) - ├── stats: [rows=0.1, distinct(1)=0.1, null(1)=0, distinct(3)=0.1, null(3)=0] + ├── stats: [rows=0.91, distinct(1)=0.91, null(1)=0, distinct(3)=0.91, null(3)=0, distinct(1,3)=0.91, null(1,3)=0] ├── key: (2) ├── fd: ()-->(1,3) ├── scan district │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) - │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0] + │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,3)=100, null(1,3)=0] │ ├── key: (1,2) │ └── fd: (1,2)-->(3) └── filters @@ -316,12 +316,12 @@ SELECT * FROM district WHERE d_id = 1 and d_name LIKE 'bob' ---- select ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) - ├── stats: [rows=0.1, distinct(1)=0.1, null(1)=0, distinct(3)=0.1, null(3)=0] + ├── stats: [rows=0.91, distinct(1)=0.91, null(1)=0, distinct(3)=0.91, null(3)=0, distinct(1,3)=0.91, null(1,3)=0] ├── key: (2) ├── fd: ()-->(1,3) ├── scan district │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) - │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0] + │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,3)=100, null(1,3)=0] │ ├── key: (1,2) │ └── fd: (1,2)-->(3) └── filters @@ -339,12 +339,12 @@ SELECT * FROM district WHERE d_id > 1 AND d_id < 10 AND d_w_id=10 AND d_name='bo select ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) ├── cardinality: [0 - 8] - ├── stats: [rows=0.08, distinct(1)=0.08, null(1)=0, distinct(2)=0.08, null(2)=0, distinct(3)=0.08, null(3)=0] + ├── stats: [rows=0.827, distinct(1)=0.827, null(1)=0, distinct(2)=0.827, null(2)=0, distinct(3)=0.827, null(3)=0, distinct(2,3)=0.827, null(2,3)=0, distinct(1-3)=0.827, null(1-3)=0] ├── key: (1) ├── fd: ()-->(2,3) ├── scan district │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) - │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0] + │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(2,3)=100, null(2,3)=0, distinct(1-3)=100, null(1-3)=0] │ ├── key: (1,2) │ └── fd: (1,2)-->(3) └── filters @@ -360,12 +360,12 @@ SELECT * FROM district WHERE d_id = 1 AND d_w_id=10 AND d_name='hello' select ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string!null) ├── cardinality: [0 - 1] - ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0] + ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(1,2)=1, null(1,2)=0] ├── key: () ├── fd: ()-->(1-3) ├── scan district │ ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_name:3(string) - │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0] + │ ├── stats: [rows=100, distinct(1)=10, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0, distinct(1,2)=100, null(1,2)=0] │ ├── key: (1,2) │ └── fd: (1,2)-->(3) └── filters @@ -492,11 +492,11 @@ SELECT * FROM order_history WHERE item_id = order_id AND item_id = customer_id A ---- select ├── columns: order_id:1(int!null) item_id:2(int!null) customer_id:3(int!null) year:4(int) - ├── stats: [rows=0.001, distinct(1)=0.001, null(1)=0, distinct(2)=0.001, null(2)=0, distinct(3)=0.001, null(3)=0] + ├── stats: [rows=0.901, distinct(1)=0.901, null(1)=0, distinct(2)=0.901, null(2)=0, distinct(3)=0.901, null(3)=0, distinct(1-3)=0.901, null(1-3)=0] ├── fd: ()-->(1-3) ├── scan order_history │ ├── columns: order_id:1(int) item_id:2(int) customer_id:3(int) year:4(int) - │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(3)=100, null(3)=10] + │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(3)=100, null(3)=10, distinct(1-3)=1000, null(1-3)=0.001] └── filters ├── order_id:1 = 5 [type=bool, outer=(1), constraints=(/1: [/5 - /5]; tight), fd=()-->(1)] ├── item_id:2 = 5 [type=bool, outer=(2), constraints=(/2: [/5 - /5]; tight), fd=()-->(2)] @@ -581,11 +581,11 @@ SELECT * FROM uvw WHERE u=v AND u=10 ---- select ├── columns: u:1(int!null) v:2(int!null) w:3(int) - ├── stats: [rows=0.1, distinct(1)=0.1, null(1)=0, distinct(2)=0.1, null(2)=0] + ├── stats: [rows=0.910810811, distinct(1)=0.910810811, null(1)=0, distinct(2)=0.910810811, null(2)=0, distinct(1,2)=0.910810811, null(1,2)=0] ├── fd: ()-->(1,2) ├── scan uvw │ ├── columns: u:1(int) v:2(int) w:3(int) - │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10] + │ └── stats: [rows=1000, distinct(1)=100, null(1)=10, distinct(2)=100, null(2)=10, distinct(1,2)=1000, null(1,2)=0.1] └── filters ├── v:2 = 10 [type=bool, outer=(2), constraints=(/2: [/10 - /10]; tight), fd=()-->(2)] └── u:1 = 10 [type=bool, outer=(1), constraints=(/1: [/10 - /10]; tight), fd=()-->(1)] @@ -722,7 +722,7 @@ inner-join (zigzag lineitem@l_sd lineitem@l_cd) ├── eq columns: [1 4] = [1 4] ├── left fixed columns: [11] = ['1995-09-01'] ├── right fixed columns: [12] = ['1995-08-01'] - ├── stats: [rows=0.1, distinct(11)=0.1, null(11)=0, distinct(12)=0.1, null(12)=0, distinct(11,12)=0.1, null(11,12)=0] + ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] ├── key: (1,4) ├── fd: ()-->(11,12) └── filters @@ -738,7 +738,7 @@ WHERE ---- select ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) - ├── stats: [rows=0.1, distinct(11)=0.1, null(11)=0, distinct(12)=0.1, null(12)=0, distinct(11,12)=0.1, null(11,12)=0] + ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] ├── key: (1,4) ├── fd: ()-->(11,12) ├── scan lineitem @@ -767,7 +767,7 @@ inner-join (lookup lineitem) ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) ├── key columns: [1 4] = [1 4] ├── lookup columns are key - ├── stats: [rows=0.1, distinct(11)=0.1, null(11)=0, distinct(12)=0.1, null(12)=0, distinct(11,12)=0.1, null(11,12)=0] + ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] ├── key: (1,4) ├── fd: ()-->(11,12), (1,4)-->(5) ├── inner-join (zigzag lineitem@l_sd lineitem@l_cd) @@ -775,7 +775,7 @@ inner-join (lookup lineitem) │ ├── eq columns: [1 4] = [1 4] │ ├── left fixed columns: [11] = ['1995-09-01'] │ ├── right fixed columns: [12] = ['1995-08-01'] - │ ├── stats: [rows=0.1, distinct(11)=0.1, null(11)=0, distinct(12)=0.1, null(12)=0] + │ ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] │ ├── fd: ()-->(11,12) │ └── filters │ ├── l_shipdate:11 = '1995-09-01' [type=bool, outer=(11), constraints=(/11: [/'1995-09-01' - /'1995-09-01']; tight), fd=()-->(11)] @@ -791,7 +791,7 @@ WHERE ---- select ├── columns: l_shipdate:11(date!null) l_commitdate:12(date!null) l_orderkey:1(int!null) l_linenumber:4(int!null) l_quantity:5(float!null) - ├── stats: [rows=0.1, distinct(11)=0.1, null(11)=0, distinct(12)=0.1, null(12)=0, distinct(11,12)=0.1, null(11,12)=0] + ├── stats: [rows=0.91, distinct(11)=0.91, null(11)=0, distinct(12)=0.91, null(12)=0, distinct(11,12)=0.91, null(11,12)=0] ├── key: (1,4) ├── fd: ()-->(11,12), (1,4)-->(5) ├── scan lineitem @@ -973,17 +973,17 @@ SELECT * FROM b WHERE x = 1 AND z = 2 AND rowid >= 5 AND rowid <= 8 project ├── columns: x:1(int!null) z:2(int!null) ├── cardinality: [0 - 4] - ├── stats: [rows=6.40128026e-06] + ├── stats: [rows=0.811440928] ├── fd: ()-->(1,2) └── select ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null) ├── cardinality: [0 - 4] - ├── stats: [rows=6.40128026e-06, distinct(1)=6.40128026e-06, null(1)=0, distinct(2)=6.40128026e-06, null(2)=0, distinct(3)=6.40128026e-06, null(3)=0] + ├── stats: [rows=0.811440928, distinct(1)=0.811440928, null(1)=0, distinct(2)=0.811440928, null(2)=0, distinct(3)=0.811440928, null(3)=0, distinct(1,2)=0.811440928, null(1,2)=0, distinct(1-3)=0.811440928, null(1-3)=0] ├── key: (3) ├── fd: ()-->(1,2) ├── scan b │ ├── columns: x:1(int) z:2(int!null) rowid:3(int!null) - │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=2000, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0] + │ ├── stats: [rows=10000, distinct(1)=5000, null(1)=2000, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=10000, null(1,2)=0, distinct(1-3)=10000, null(1-3)=0] │ ├── key: (3) │ └── fd: (3)-->(1,2) └── filters @@ -1014,12 +1014,12 @@ SELECT * FROM a WHERE y = 5 AND x + y < 10 ---- select ├── columns: x:1(int!null) y:2(int!null) - ├── stats: [rows=3.34168755, distinct(1)=3.34168755, null(1)=0, distinct(2)=1, null(2)=0] + ├── stats: [rows=9.35672515, distinct(1)=9.35672515, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=9.35672515, null(1,2)=0] ├── key: (1) ├── fd: ()-->(2) ├── scan a │ ├── columns: x:1(int!null) y:2(int) - │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=1000] + │ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(2)=400, null(2)=1000, distinct(1,2)=5000, null(1,2)=0] │ ├── key: (1) │ └── fd: (1)-->(2) └── filters @@ -1611,13 +1611,13 @@ SELECT * FROM hist_and_distinct WHERE a = 10 AND b = 10 AND c = 10 AND d >= 10 A ---- select ├── columns: a:1(int!null) b:2(int!null) c:3(int!null) d:4(int!null) - ├── stats: [rows=0.3, distinct(1)=0.3, null(1)=0, distinct(2)=0.3, null(2)=0, distinct(3)=0.3, null(3)=0, distinct(4)=0.3, null(4)=0] - │ histogram(1)= 0 0.3 + ├── stats: [rows=0.39, distinct(1)=0.39, null(1)=0, distinct(2)=0.39, null(2)=0, distinct(3)=0.39, null(3)=0, distinct(4)=0.39, null(4)=0, distinct(1-3)=0.39, null(1-3)=0, distinct(1-4)=0.39, null(1-4)=0] + │ histogram(1)= 0 0.39 │ <--- 10 ├── fd: ()-->(1-3) ├── scan hist_and_distinct │ ├── columns: a:1(int) b:2(int) c:3(int) d:4(int) - │ └── stats: [rows=1000, distinct(1)=40, null(1)=0, distinct(2)=5, null(2)=0, distinct(3)=5, null(3)=0, distinct(4)=120, null(4)=0] + │ └── stats: [rows=1000, distinct(1)=40, null(1)=0, distinct(2)=5, null(2)=0, distinct(3)=5, null(3)=0, distinct(4)=120, null(4)=0, distinct(1-3)=1000, null(1-3)=0, distinct(1-4)=1000, null(1-4)=0] │ histogram(1)= 0 0 90 10 180 20 270 30 360 40 │ <--- 0 ---- 10 ----- 20 ----- 30 ----- 40 └── filters @@ -1948,3 +1948,47 @@ select │ └── fd: (1)-->(2) └── filters └── ((x:1 >= 25) AND (x:1 <= 50)) OR ((x:1 >= 50) AND (x:1 <= 25)) [type=bool, outer=(1), constraints=(/1: [/25 - /50]; tight)] + +exec-ddl +ALTER TABLE b INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2020-01-28 03:02:57.841772+00:00", + "row_count": 10000, + "distinct_count": 1000 + }, + { + "columns": ["z"], + "created_at": "2020-01-28 03:02:57.841772+00:00", + "row_count": 10000, + "distinct_count": 100 + } , + { + "columns": ["x","z"], + "created_at": "2020-01-28 03:02:57.841772+00:00", + "row_count": 10000, + "distinct_count": 1500 + } +]' +---- + +# Multi-column stats test. +build +SELECT * FROM b WHERE x = 1 AND z = 2 +---- +project + ├── columns: x:1(int!null) z:2(int!null) + ├── stats: [rows=6.01] + ├── fd: ()-->(1,2) + └── select + ├── columns: x:1(int!null) z:2(int!null) rowid:3(int!null) + ├── stats: [rows=6.01, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=1, null(1,2)=0] + ├── key: (3) + ├── fd: ()-->(1,2) + ├── scan b + │ ├── columns: x:1(int) z:2(int!null) rowid:3(int!null) + │ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=0, distinct(3)=10000, null(3)=0, distinct(1,2)=1500, null(1,2)=0] + │ ├── key: (3) + │ └── fd: (3)-->(1,2) + └── filters + └── (x:1 = 1) AND (z:2 = 2) [type=bool, outer=(1,2), constraints=(/1: [/1 - /1]; /2: [/2 - /2]; tight), fd=()-->(1,2)] diff --git a/pkg/sql/opt/memo/testdata/stats/values b/pkg/sql/opt/memo/testdata/stats/values index a99b55dc938e..f5d44af20964 100644 --- a/pkg/sql/opt/memo/testdata/stats/values +++ b/pkg/sql/opt/memo/testdata/stats/values @@ -4,12 +4,12 @@ SELECT * FROM (VALUES (1, 2), (1, 2), (1, 3), (2, 3)) AS q(x, y) WHERE x = 5 AND select ├── columns: x:1(int!null) y:2(int!null) ├── cardinality: [0 - 4] - ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0] + ├── stats: [rows=1.3, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(1,2)=1, null(1,2)=0] ├── fd: ()-->(1,2) ├── values │ ├── columns: column1:1(int!null) column2:2(int!null) │ ├── cardinality: [4 - 4] - │ ├── stats: [rows=4, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0] + │ ├── stats: [rows=4, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0, distinct(1,2)=3, null(1,2)=0] │ ├── (1, 2) [type=tuple{int, int}] │ ├── (1, 2) [type=tuple{int, int}] │ ├── (1, 3) [type=tuple{int, int}] diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpcc b/pkg/sql/opt/memo/testdata/stats_quality/tpcc index efd0b69ac58b..9d269421dc03 100644 --- a/pkg/sql/opt/memo/testdata/stats_quality/tpcc +++ b/pkg/sql/opt/memo/testdata/stats_quality/tpcc @@ -61,7 +61,7 @@ project ├── columns: c_id:1(int!null) c_d_id:2(int!null) c_w_id:3(int!null) c_last:6(varchar) c_credit:14(char) c_discount:16(decimal) ├── constraint: /3/2/1: [/1/1/50 - /1/1/50] ├── cardinality: [0 - 1] - ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(6)=0.999501832, null(6)=0, distinct(14)=0.786939691, null(14)=0, distinct(16)=0.999901673, null(16)=0] + ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(6)=0.999501832, null(6)=0, distinct(14)=0.786939691, null(14)=0, distinct(16)=0.999901673, null(16)=0, distinct(1-3)=1, null(1-3)=0] ├── key: () └── fd: ()-->(1-3,6,14,16) @@ -149,7 +149,7 @@ project │ ├── [/4/1400 - /4/1400] │ └── [/4/1500 - /4/1500] ├── cardinality: [0 - 5] - ├── stats: [rows=5, distinct(1)=5, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=4.86513081, null(3)=0, distinct(8)=4.8037108, null(8)=0, distinct(14)=0.993262137, null(14)=0, distinct(15)=0.993262137, null(15)=0, distinct(16)=0.993262137, null(16)=0, distinct(17)=4.99972957, null(17)=0] + ├── stats: [rows=5, distinct(1)=5, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=4.86513081, null(3)=0, distinct(8)=4.8037108, null(8)=0, distinct(14)=0.993262137, null(14)=0, distinct(15)=0.993262137, null(15)=0, distinct(16)=0.993262137, null(16)=0, distinct(17)=4.99972957, null(17)=0, distinct(1,2)=5, null(1,2)=0] ├── key: (1) ├── fd: ()-->(2), (1)-->(3,8,14-17) └── ordering: +1 opt(2) [actual: +1] @@ -202,7 +202,7 @@ project ├── save-table-name: payment_01_scan_2 ├── columns: c_id:1(int!null) c_d_id:2(int!null) c_w_id:3(int!null) c_first:4(varchar) c_last:6(varchar!null) ├── constraint: /3/2/6/4/1: [/1/1/'ANTIABLEABLE' - /1/1/'ANTIABLEABLE'] - ├── stats: [rows=3.165, distinct(1)=3.16334717, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=3.15605754, null(4)=0, distinct(6)=1, null(6)=0] + ├── stats: [rows=3.165, distinct(1)=3.16334717, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=3.15605754, null(4)=0, distinct(6)=1, null(6)=0, distinct(2,3,6)=1, null(2,3,6)=0] │ histogram(3)= 0 3.165 │ <---- 1 - ├── key: (1) @@ -251,7 +251,7 @@ project ├── columns: c_id:1(int!null) c_d_id:2(int!null) c_w_id:3(int!null) c_first:4(varchar) c_middle:5(char) c_last:6(varchar) c_balance:17(decimal) ├── constraint: /3/2/1: [/1/1/50 - /1/1/50] ├── cardinality: [0 - 1] - ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=0.999106141, null(4)=0, distinct(5)=0.632121172, null(5)=0, distinct(6)=0.999501832, null(6)=0, distinct(17)=0.632121172, null(17)=0] + ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=0.999106141, null(4)=0, distinct(5)=0.632121172, null(5)=0, distinct(6)=0.999501832, null(6)=0, distinct(17)=0.632121172, null(17)=0, distinct(1-3)=1, null(1-3)=0] ├── key: () └── fd: ()-->(1-6,17) @@ -291,7 +291,7 @@ project └── index-join customer ├── save-table-name: order_status_02_index_join_2 ├── columns: c_id:1(int!null) c_d_id:2(int!null) c_w_id:3(int!null) c_first:4(varchar) c_middle:5(char) c_last:6(varchar!null) c_balance:17(decimal) - ├── stats: [rows=2.853, distinct(1)=2.85165693, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=2.84573236, null(4)=0, distinct(5)=0.942329734, null(5)=0, distinct(6)=1, null(6)=0, distinct(17)=0.942329734, null(17)=0] + ├── stats: [rows=2.853, distinct(1)=2.85165693, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=2.84573236, null(4)=0, distinct(5)=0.942329734, null(5)=0, distinct(6)=1, null(6)=0, distinct(17)=0.942329734, null(17)=0, distinct(2,3,6)=1, null(2,3,6)=0] │ histogram(3)= 0 2.853 │ <---- 2 - ├── key: (1) @@ -301,7 +301,7 @@ project ├── save-table-name: order_status_02_scan_3 ├── columns: c_id:1(int!null) c_d_id:2(int!null) c_w_id:3(int!null) c_first:4(varchar) c_last:6(varchar!null) ├── constraint: /3/2/6/4/1: [/2/2/'ANTIBARESE' - /2/2/'ANTIBARESE'] - ├── stats: [rows=2.853, distinct(1)=2.85165693, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=2.84573236, null(4)=0, distinct(6)=1, null(6)=0] + ├── stats: [rows=2.853, distinct(1)=2.85165693, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=2.84573236, null(4)=0, distinct(6)=1, null(6)=0, distinct(2,3,6)=1, null(2,3,6)=0] │ histogram(3)= 0 2.853 │ <---- 2 - ├── key: (1) @@ -339,7 +339,7 @@ project ├── save-table-name: order_status_03_project_1 ├── columns: o_id:1(int!null) o_entry_d:5(timestamp) o_carrier_id:6(int) ├── cardinality: [0 - 1] - ├── stats: [rows=0.974324775, distinct(1)=0.974168103, null(1)=0, distinct(5)=0.622553468, null(5)=0, distinct(6)=0.932422121, null(6)=0.292297432] + ├── stats: [rows=0.986644468, distinct(1)=0.986483809, null(1)=0, distinct(5)=0.627174982, null(5)=0, distinct(6)=0.943691252, null(6)=0.29599334] ├── key: () ├── fd: ()-->(1,5,6) └── scan "order"@order_idx @@ -347,7 +347,7 @@ project ├── columns: o_id:1(int!null) o_d_id:2(int!null) o_w_id:3(int!null) o_c_id:4(int!null) o_entry_d:5(timestamp) o_carrier_id:6(int) ├── constraint: /3/2/4/-1: [/4/3/10 - /4/3/10] ├── limit: 1 - ├── stats: [rows=0.974324775, distinct(1)=0.974168103, null(1)=0, distinct(2)=0.974324775, null(2)=0, distinct(3)=0.974324775, null(3)=0, distinct(4)=0.974324775, null(4)=0, distinct(5)=0.622553468, null(5)=0, distinct(6)=0.932422121, null(6)=0.292297432] + ├── stats: [rows=0.986644468, distinct(1)=0.986483809, null(1)=0, distinct(2)=0.986644468, null(2)=0, distinct(3)=0.986644468, null(3)=0, distinct(4)=0.986644468, null(4)=0, distinct(5)=0.627174982, null(5)=0, distinct(6)=0.943691252, null(6)=0.29599334] ├── key: () └── fd: ()-->(1-6) @@ -359,13 +359,13 @@ WHERE ol_w_id = 1 AND ol_d_id = 1 AND ol_o_id = 1000 project ├── save-table-name: order_status_04_project_1 ├── columns: ol_i_id:5(int!null) ol_supply_w_id:6(int) ol_quantity:8(int) ol_amount:9(decimal) ol_delivery_d:7(timestamp) - ├── stats: [rows=9.28687776, distinct(5)=9.28645943, null(5)=0, distinct(6)=6.04928775, null(6)=0, distinct(7)=1.98075105, null(7)=2.78224921, distinct(8)=0.99990737, null(8)=0, distinct(9)=9.2868189, null(9)=0] + ├── stats: [rows=9.40430419, distinct(5)=9.40387521, null(5)=0, distinct(6)=6.09540837, null(6)=0, distinct(7)=1.98184869, null(7)=2.81742891, distinct(8)=0.999917632, null(8)=0, distinct(9)=9.40424383, null(9)=0] └── scan order_line ├── save-table-name: order_status_04_scan_2 ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_i_id:5(int!null) ol_supply_w_id:6(int) ol_delivery_d:7(timestamp) ol_quantity:8(int) ol_amount:9(decimal) ├── constraint: /3/2/-1/4: [/1/1/1000 - /1/1/1000] - ├── stats: [rows=9.28687776, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=9.28645943, null(5)=0, distinct(6)=6.04928775, null(6)=0, distinct(7)=1.98075105, null(7)=2.78224921, distinct(8)=0.99990737, null(8)=0, distinct(9)=9.2868189, null(9)=0] - │ histogram(3)= 0 9.2869 + ├── stats: [rows=9.40430419, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=9.40387521, null(5)=0, distinct(6)=6.09540837, null(6)=0, distinct(7)=1.98184869, null(7)=2.81742891, distinct(8)=0.999917632, null(8)=0, distinct(9)=9.40424383, null(9)=0, distinct(1-3)=1, null(1-3)=0] + │ histogram(3)= 0 9.4043 │ <---- 1 -- └── fd: ()-->(1-3) @@ -458,8 +458,8 @@ scalar-group-by │ ├── save-table-name: delivery_02_scan_2 │ ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_amount:9(decimal) │ ├── constraint: /3/2/-1/4: [/8/6/1000 - /8/6/1000] - │ ├── stats: [rows=9.96737239, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(9)=9.96730459, null(9)=0] - │ │ histogram(3)= 0 9.9674 + │ ├── stats: [rows=10.0934032, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(9)=10.0933337, null(9)=0, distinct(1-3)=1, null(1-3)=0] + │ │ histogram(3)= 0 10.093 │ │ <---- 8 -- │ └── fd: ()-->(1-3) └── aggregations @@ -513,7 +513,7 @@ project ├── columns: d_id:1(int!null) d_w_id:2(int!null) d_next_o_id:11(int) ├── constraint: /2/1: [/4/9 - /4/9] ├── cardinality: [0 - 1] - ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(11)=0.633967659, null(11)=0] + ├── stats: [rows=1, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(11)=0.633967659, null(11)=0, distinct(1,2)=1, null(1,2)=0] ├── key: () └── fd: ()-->(1,2,11) @@ -550,21 +550,21 @@ scalar-group-by │ ├── save-table-name: stock_level_02_distinct_on_2 │ ├── columns: s_i_id:11(int!null) │ ├── grouping columns: s_i_id:11(int!null) - │ ├── stats: [rows=185.570315, distinct(11)=185.570315, null(11)=0] + │ ├── stats: [rows=187.914589, distinct(11)=187.914589, null(11)=0] │ ├── key: (11) │ └── inner-join (lookup stock) │ ├── save-table-name: stock_level_02_lookup_join_3 │ ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_i_id:5(int!null) s_i_id:11(int!null) s_w_id:12(int!null) s_quantity:13(int!null) │ ├── key columns: [3 5] = [12 11] │ ├── lookup columns are key - │ ├── stats: [rows=216.137889, distinct(1)=19.9995949, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=185.570315, null(5)=0, distinct(11)=185.570315, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=30.3089364, null(13)=0] + │ ├── stats: [rows=218.87081, distinct(1)=19.9996466, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=187.914589, null(5)=0, distinct(11)=187.914589, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=30.3110384, null(13)=0] │ ├── fd: ()-->(2,3,12), (11)-->(13), (5)==(11), (11)==(5), (3)==(12), (12)==(3) │ ├── scan order_line │ │ ├── save-table-name: stock_level_02_scan_4 │ │ ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_i_id:5(int!null) │ │ ├── constraint: /3/2/-1/4: [/1/1/999 - /1/1/980] - │ │ ├── stats: [rows=185.737555, distinct(1)=20, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=185.570315, null(5)=0] - │ │ │ histogram(3)= 0 185.74 + │ │ ├── stats: [rows=188.086084, distinct(1)=20, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(5)=187.914589, null(5)=0, distinct(2,3)=1, null(2,3)=0, distinct(1-3)=20, null(1-3)=0] + │ │ │ histogram(3)= 0 188.09 │ │ │ <---- 1 -- │ │ └── fd: ()-->(2,3) │ └── filters @@ -582,10 +582,10 @@ column_names row_count distinct_count null_count {ol_w_id} 193 1 0 ~~~~ column_names row_count_est row_count_err distinct_count_est distinct_count_err null_count_est null_count_err -{ol_d_id} 186.00 1.04 1.00 1.00 0.00 1.00 -{ol_i_id} 186.00 1.04 186.00 1.04 0.00 1.00 -{ol_o_id} 186.00 1.04 20.00 1.00 0.00 1.00 -{ol_w_id} 186.00 1.04 1.00 1.00 0.00 1.00 +{ol_d_id} 188.00 1.03 1.00 1.00 0.00 1.00 +{ol_i_id} 188.00 1.03 188.00 1.03 0.00 1.00 +{ol_o_id} 188.00 1.03 20.00 1.00 0.00 1.00 +{ol_w_id} 188.00 1.03 1.00 1.00 0.00 1.00 stats table=stock_level_02_lookup_join_3 ---- @@ -599,13 +599,13 @@ column_names row_count distinct_count null_count {s_w_id} 15 1 0 ~~~~ column_names row_count_est row_count_err distinct_count_est distinct_count_err null_count_est null_count_err -{ol_d_id} 216.00 14.40 <== 1.00 1.00 0.00 1.00 -{ol_i_id} 216.00 14.40 <== 186.00 12.40 <== 0.00 1.00 -{ol_o_id} 216.00 14.40 <== 20.00 1.82 0.00 1.00 -{ol_w_id} 216.00 14.40 <== 1.00 1.00 0.00 1.00 -{s_i_id} 216.00 14.40 <== 186.00 12.40 <== 0.00 1.00 -{s_quantity} 216.00 14.40 <== 30.00 6.00 <== 0.00 1.00 -{s_w_id} 216.00 14.40 <== 1.00 1.00 0.00 1.00 +{ol_d_id} 219.00 14.60 <== 1.00 1.00 0.00 1.00 +{ol_i_id} 219.00 14.60 <== 188.00 12.53 <== 0.00 1.00 +{ol_o_id} 219.00 14.60 <== 20.00 1.82 0.00 1.00 +{ol_w_id} 219.00 14.60 <== 1.00 1.00 0.00 1.00 +{s_i_id} 219.00 14.60 <== 188.00 12.53 <== 0.00 1.00 +{s_quantity} 219.00 14.60 <== 30.00 6.00 <== 0.00 1.00 +{s_w_id} 219.00 14.60 <== 1.00 1.00 0.00 1.00 # TODO(radu): add stock_level_02_distinct_on_2. diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q06 b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q06 index b7debeb97224..52ce7daf007e 100644 --- a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q06 +++ b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q06 @@ -44,7 +44,7 @@ scalar-group-by │ ├── select │ │ ├── save-table-name: q6_select_3 │ │ ├── columns: l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_shipdate:11(date!null) - │ │ ├── stats: [rows=34745.8339, distinct(5)=16.6666667, null(5)=0, distinct(6)=34199.2567, null(6)=0, distinct(7)=1.22222222, null(7)=0, distinct(11)=365, null(11)=0, distinct(6,7)=34745.8339, null(6,7)=0] + │ │ ├── stats: [rows=34745.8339, distinct(5)=16.6666667, null(5)=0, distinct(6)=34199.2567, null(6)=0, distinct(7)=1.22222222, null(7)=0, distinct(11)=365, null(11)=0, distinct(6,7)=34745.8339, null(6,7)=0, distinct(5,7,11)=7435.18519, null(5,7,11)=0] │ │ │ histogram(11)= 0 0 116.69 155.56 955.78 133.34 955.78 133.34 978 133.34 1066.9 44.446 1022.4 88.892 1000.2 133.34 1066.9 44.446 1000.2 177.78 1044.7 111.12 933.55 177.78 1066.9 177.78 1066.9 22.223 1044.7 44.446 911.29 244.49 911.29 177.78 1044.7 133.34 1022.4 133.34 1000.2 155.56 1044.7 88.892 1044.7 155.56 1066.9 44.446 1044.7 66.669 1044.7 44.446 1044.7 111.12 1000.2 177.78 1066.9 22.223 1000.2 133.34 955.78 200.04 1000.2 88.892 1044.7 111.12 426.02 85.204 │ │ │ <--- '1993-12-31' -------- '1994-01-02' -------- '1994-01-16' -------- '1994-01-30' ----- '1994-02-08' -------- '1994-02-18' -------- '1994-03-04' -------- '1994-03-15' -------- '1994-03-29' -------- '1994-04-10' -------- '1994-04-24' -------- '1994-05-03' -------- '1994-05-13' -------- '1994-05-24' -------- '1994-06-04' -------- '1994-06-13' -------- '1994-06-22' -------- '1994-07-05' -------- '1994-07-16' -------- '1994-07-29' -------- '1994-08-09' -------- '1994-08-22' -------- '1994-09-04' -------- '1994-09-15' -------- '1994-09-29' -------- '1994-10-10' -------- '1994-10-20' -------- '1994-11-03' -------- '1994-11-16' -------- '1994-11-29' -------- '1994-12-11' -------- '1994-12-25' -------- '1994-12-31' │ │ ├── index-join lineitem diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q12 b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q12 index a9d1c8f16a5a..1b601a77959a 100644 --- a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q12 +++ b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q12 @@ -76,7 +76,7 @@ sort │ │ ├── select │ │ │ ├── save-table-name: q12_select_5 │ │ │ ├── columns: l_orderkey:10(int!null) l_shipdate:20(date!null) l_commitdate:21(date!null) l_receiptdate:22(date!null) l_shipmode:24(char!null) - │ │ │ ├── stats: [rows=29822.8417, distinct(10)=29606.4627, null(10)=0, distinct(20)=2526, null(20)=0, distinct(21)=2466, null(21)=0, distinct(22)=365, null(22)=0, distinct(24)=2, null(24)=0] + │ │ │ ├── stats: [rows=29822.8417, distinct(10)=29606.4627, null(10)=0, distinct(20)=2526, null(20)=0, distinct(21)=2466, null(21)=0, distinct(22)=365, null(22)=0, distinct(24)=2, null(24)=0, distinct(22,24)=730, null(22,24)=0] │ │ │ │ histogram(22)= 0 0 826.55 57.145 876.38 95.242 895.43 38.097 895.43 76.193 876.38 95.242 914.48 57.145 838.29 133.34 914.48 57.145 781.11 152.39 914.48 38.097 857.33 95.242 838.29 228.61 819.24 152.39 914.48 133.34 914.48 114.29 800.19 133.34 857.33 152.39 876.38 114.29 895.43 133.34 914.48 57.145 857.33 76.193 914.48 57.145 857.33 95.242 895.43 76.193 895.43 76.193 857.33 76.193 819.24 114.29 838.29 114.29 914.48 114.29 819.24 133.34 609.65 76.207 │ │ │ │ <--- '1993-12-31' -------- '1994-01-13' -------- '1994-01-26' -------- '1994-02-08' -------- '1994-02-19' -------- '1994-03-04' -------- '1994-03-14' -------- '1994-03-26' -------- '1994-04-08' -------- '1994-04-21' -------- '1994-05-03' -------- '1994-05-14' -------- '1994-05-24' -------- '1994-06-03' -------- '1994-06-15' -------- '1994-06-27' -------- '1994-07-07' -------- '1994-07-16' -------- '1994-07-26' -------- '1994-08-10' -------- '1994-08-22' -------- '1994-09-03' -------- '1994-09-15' -------- '1994-09-27' -------- '1994-10-11' -------- '1994-10-22' -------- '1994-11-02' -------- '1994-11-14' -------- '1994-11-26' -------- '1994-12-10' -------- '1994-12-22' -------- '1994-12-31' │ │ │ ├── index-join lineitem diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q16 b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q16 index e32cfc653058..0b0c7e3f7df2 100644 --- a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q16 +++ b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q16 @@ -109,13 +109,13 @@ sort │ ├── select │ │ ├── save-table-name: q16_select_8 │ │ ├── columns: p_partkey:6(int!null) p_brand:9(char!null) p_type:10(varchar!null) p_size:11(int!null) - │ │ ├── stats: [rows=3555.55556, distinct(6)=3555.43444, null(6)=0, distinct(9)=8.33333333, null(9)=0, distinct(10)=150, null(10)=0, distinct(11)=8, null(11)=0, distinct(9-11)=3553.4368, null(9-11)=0] + │ │ ├── stats: [rows=3555.55556, distinct(6)=3555.43444, null(6)=0, distinct(9)=8.33333333, null(9)=0, distinct(10)=150, null(10)=0, distinct(11)=8, null(11)=0, distinct(9,11)=66.6666667, null(9,11)=0, distinct(9-11)=3553.4368, null(9-11)=0] │ │ ├── key: (6) │ │ ├── fd: (6)-->(9-11) │ │ ├── scan part │ │ │ ├── save-table-name: q16_scan_9 │ │ │ ├── columns: p_partkey:6(int!null) p_brand:9(char!null) p_type:10(varchar!null) p_size:11(int!null) - │ │ │ ├── stats: [rows=200000, distinct(6)=199241, null(6)=0, distinct(9)=25, null(9)=0, distinct(10)=150, null(10)=0, distinct(11)=50, null(11)=0, distinct(9-11)=187500, null(9-11)=0] + │ │ │ ├── stats: [rows=200000, distinct(6)=199241, null(6)=0, distinct(9)=25, null(9)=0, distinct(10)=150, null(10)=0, distinct(11)=50, null(11)=0, distinct(9,11)=1250, null(9,11)=0, distinct(9-11)=187500, null(9-11)=0] │ │ │ │ histogram(6)= 0 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 │ │ │ │ <--- 27 ----- 1110 ----- 2241 ----- 3086 ----- 4134 ----- 5302 ----- 6222 ----- 7308 ----- 8249 ----- 9171 ----- 10049 ----- 10958 ----- 11832 ----- 13025 ----- 14063 ----- 14953 ----- 16249 ----- 17419 ----- 18363 ----- 19412 ----- 20257 ----- 21190 ----- 22110 ----- 23045 ----- 23956 ----- 24962 ----- 25942 ----- 26990 ----- 27934 ----- 28876 ----- 29513 ----- 30326 ----- 31259 ----- 32300 ----- 33577 ----- 34550 ----- 35562 ----- 36498 ----- 37475 ----- 38584 ----- 39641 ----- 40548 ----- 41605 ----- 42527 ----- 43612 ----- 44702 ----- 45701 ----- 46726 ----- 47795 ----- 48935 ----- 50152 ----- 51183 ----- 52001 ----- 52904 ----- 53868 ----- 54808 ----- 55986 ----- 57155 ----- 58516 ----- 59526 ----- 60557 ----- 61547 ----- 62369 ----- 63672 ----- 64583 ----- 65360 ----- 66147 ----- 67201 ----- 68142 ----- 69145 ----- 70209 ----- 71141 ----- 71923 ----- 73031 ----- 73987 ----- 74974 ----- 76170 ----- 77138 ----- 77849 ----- 78931 ----- 79832 ----- 80761 ----- 81843 ----- 82834 ----- 84032 ----- 85072 ----- 86287 ----- 87302 ----- 88422 ----- 89432 ----- 90550 ----- 91463 ----- 92249 ----- 93385 ----- 94789 ----- 96013 ----- 96893 ----- 98000 ----- 99008 ----- 100166 ----- 101263 ----- 102351 ----- 103236 ----- 104121 ----- 105363 ----- 106329 ----- 107325 ----- 108231 ----- 109054 ----- 110019 ----- 111185 ----- 112112 ----- 112908 ----- 113904 ----- 114785 ----- 115410 ----- 116526 ----- 117559 ----- 118310 ----- 119073 ----- 120034 ----- 120817 ----- 121744 ----- 122566 ----- 123720 ----- 124813 ----- 125835 ----- 126622 ----- 127651 ----- 128328 ----- 129315 ----- 130244 ----- 131450 ----- 132439 ----- 133288 ----- 134164 ----- 135298 ----- 136347 ----- 137243 ----- 138256 ----- 139427 ----- 140374 ----- 141371 ----- 142302 ----- 143322 ----- 144335 ----- 145333 ----- 146212 ----- 147321 ----- 148591 ----- 149594 ------ 150514 ------ 151361 ------ 152059 ------ 153070 ------ 154059 ------ 155259 ------ 156473 ------ 157690 ------ 158703 ------ 159675 ------ 160597 ------ 161668 ------ 162737 ------ 163955 ------ 164942 ------ 165924 ------ 167059 ------ 167866 ------ 169034 ------ 169935 ------ 170712 ------ 171806 ------ 172841 ------ 174078 ------ 175347 ------ 176430 ------ 177346 ------ 178566 ------ 179515 ------ 180677 ------ 181729 ------ 182983 ------ 183814 ------ 184892 ------ 185696 ------ 186611 ------ 187744 ------ 188974 ------ 189911 ------ 190671 ------ 191607 ------ 192820 ------ 193789 ------ 195057 ------ 196224 ------ 197231 ------ 198281 ------ 199119 ------ 199999 │ │ │ ├── key: (6) diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q17 b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q17 index d8bc06f56631..c1bda172c749 100644 --- a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q17 +++ b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q17 @@ -100,14 +100,14 @@ project │ │ │ │ │ │ │ ├── select │ │ │ │ │ │ │ │ ├── save-table-name: q17_select_9 │ │ │ │ │ │ │ │ ├── columns: p_partkey:17(int!null) p_brand:20(char!null) p_container:23(char!null) - │ │ │ │ │ │ │ │ ├── stats: [rows=200, distinct(17)=199.999619, null(17)=0, distinct(20)=1, null(20)=0, distinct(23)=1, null(23)=0] + │ │ │ │ │ │ │ │ ├── stats: [rows=200, distinct(17)=199.999619, null(17)=0, distinct(20)=1, null(20)=0, distinct(23)=1, null(23)=0, distinct(20,23)=1, null(20,23)=0] │ │ │ │ │ │ │ │ ├── key: (17) │ │ │ │ │ │ │ │ ├── fd: ()-->(20,23) │ │ │ │ │ │ │ │ ├── ordering: +17 opt(20,23) [actual: +17] │ │ │ │ │ │ │ │ ├── scan part │ │ │ │ │ │ │ │ │ ├── save-table-name: q17_scan_10 │ │ │ │ │ │ │ │ │ ├── columns: p_partkey:17(int!null) p_brand:20(char!null) p_container:23(char!null) - │ │ │ │ │ │ │ │ │ ├── stats: [rows=200000, distinct(17)=199241, null(17)=0, distinct(20)=25, null(20)=0, distinct(23)=40, null(23)=0] + │ │ │ │ │ │ │ │ │ ├── stats: [rows=200000, distinct(17)=199241, null(17)=0, distinct(20)=25, null(20)=0, distinct(23)=40, null(23)=0, distinct(20,23)=1000, null(20,23)=0] │ │ │ │ │ │ │ │ │ │ histogram(17)= 0 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 │ │ │ │ │ │ │ │ │ │ <--- 27 ----- 1110 ----- 2241 ----- 3086 ----- 4134 ----- 5302 ----- 6222 ----- 7308 ----- 8249 ----- 9171 ----- 10049 ----- 10958 ----- 11832 ----- 13025 ----- 14063 ----- 14953 ----- 16249 ----- 17419 ----- 18363 ----- 19412 ----- 20257 ----- 21190 ----- 22110 ----- 23045 ----- 23956 ----- 24962 ----- 25942 ----- 26990 ----- 27934 ----- 28876 ----- 29513 ----- 30326 ----- 31259 ----- 32300 ----- 33577 ----- 34550 ----- 35562 ----- 36498 ----- 37475 ----- 38584 ----- 39641 ----- 40548 ----- 41605 ----- 42527 ----- 43612 ----- 44702 ----- 45701 ----- 46726 ----- 47795 ----- 48935 ----- 50152 ----- 51183 ----- 52001 ----- 52904 ----- 53868 ----- 54808 ----- 55986 ----- 57155 ----- 58516 ----- 59526 ----- 60557 ----- 61547 ----- 62369 ----- 63672 ----- 64583 ----- 65360 ----- 66147 ----- 67201 ----- 68142 ----- 69145 ----- 70209 ----- 71141 ----- 71923 ----- 73031 ----- 73987 ----- 74974 ----- 76170 ----- 77138 ----- 77849 ----- 78931 ----- 79832 ----- 80761 ----- 81843 ----- 82834 ----- 84032 ----- 85072 ----- 86287 ----- 87302 ----- 88422 ----- 89432 ----- 90550 ----- 91463 ----- 92249 ----- 93385 ----- 94789 ----- 96013 ----- 96893 ----- 98000 ----- 99008 ----- 100166 ----- 101263 ----- 102351 ----- 103236 ----- 104121 ----- 105363 ----- 106329 ----- 107325 ----- 108231 ----- 109054 ----- 110019 ----- 111185 ----- 112112 ----- 112908 ----- 113904 ----- 114785 ----- 115410 ----- 116526 ----- 117559 ----- 118310 ----- 119073 ----- 120034 ----- 120817 ----- 121744 ----- 122566 ----- 123720 ----- 124813 ----- 125835 ----- 126622 ----- 127651 ----- 128328 ----- 129315 ----- 130244 ----- 131450 ----- 132439 ----- 133288 ----- 134164 ----- 135298 ----- 136347 ----- 137243 ----- 138256 ----- 139427 ----- 140374 ----- 141371 ----- 142302 ----- 143322 ----- 144335 ----- 145333 ----- 146212 ----- 147321 ----- 148591 ----- 149594 ------ 150514 ------ 151361 ------ 152059 ------ 153070 ------ 154059 ------ 155259 ------ 156473 ------ 157690 ------ 158703 ------ 159675 ------ 160597 ------ 161668 ------ 162737 ------ 163955 ------ 164942 ------ 165924 ------ 167059 ------ 167866 ------ 169034 ------ 169935 ------ 170712 ------ 171806 ------ 172841 ------ 174078 ------ 175347 ------ 176430 ------ 177346 ------ 178566 ------ 179515 ------ 180677 ------ 181729 ------ 182983 ------ 183814 ------ 184892 ------ 185696 ------ 186611 ------ 187744 ------ 188974 ------ 189911 ------ 190671 ------ 191607 ------ 192820 ------ 193789 ------ 195057 ------ 196224 ------ 197231 ------ 198281 ------ 199119 ------ 199999 │ │ │ │ │ │ │ │ │ ├── key: (17) diff --git a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q19 b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q19 index d57a1f505490..02b173b16e0e 100644 --- a/pkg/sql/opt/memo/testdata/stats_quality/tpch/q19 +++ b/pkg/sql/opt/memo/testdata/stats_quality/tpch/q19 @@ -67,17 +67,17 @@ scalar-group-by │ ├── inner-join (hash) │ │ ├── save-table-name: q19_inner_join_3 │ │ ├── columns: l_partkey:2(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) p_partkey:17(int!null) p_brand:20(char!null) p_size:22(int!null) p_container:23(char!null) - │ │ ├── stats: [rows=71.4087386, distinct(2)=71.4087386, null(2)=0, distinct(5)=5.55555556, null(5)=0, distinct(6)=71.4015197, null(6)=0, distinct(7)=10.9833244, null(7)=0, distinct(14)=1, null(14)=0, distinct(15)=2, null(15)=0, distinct(17)=71.4087386, null(17)=0, distinct(20)=3, null(20)=0, distinct(22)=5.55555556, null(22)=0, distinct(23)=12, null(23)=0, distinct(6,7)=71.402791, null(6,7)=0] + │ │ ├── stats: [rows=71.4087386, distinct(2)=71.4087386, null(2)=0, distinct(5)=5.55555556, null(5)=0, distinct(6)=71.4015197, null(6)=0, distinct(7)=10.9833244, null(7)=0, distinct(14)=1, null(14)=0, distinct(15)=2, null(15)=0, distinct(17)=71.4087386, null(17)=0, distinct(20)=3, null(20)=0, distinct(22)=5.55555556, null(22)=0, distinct(23)=12, null(23)=0, distinct(6,7)=71.402791, null(6,7)=0, distinct(20,22,23)=71.4087386, null(20,22,23)=0] │ │ ├── fd: ()-->(14), (17)-->(20,22,23), (2)==(17), (17)==(2) │ │ ├── select │ │ │ ├── save-table-name: q19_select_4 │ │ │ ├── columns: l_partkey:2(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) - │ │ │ ├── stats: [rows=428658.214, distinct(2)=177863.163, null(2)=0, distinct(5)=50, null(5)=0, distinct(6)=353162.332, null(6)=0, distinct(7)=11, null(7)=0, distinct(14)=1, null(14)=0, distinct(15)=2, null(15)=0, distinct(6,7)=428658.214, null(6,7)=0] + │ │ │ ├── stats: [rows=428658.214, distinct(2)=177863.163, null(2)=0, distinct(5)=50, null(5)=0, distinct(6)=353162.332, null(6)=0, distinct(7)=11, null(7)=0, distinct(14)=1, null(14)=0, distinct(15)=2, null(15)=0, distinct(6,7)=428658.214, null(6,7)=0, distinct(14,15)=2, null(14,15)=0] │ │ │ ├── fd: ()-->(14) │ │ │ ├── scan lineitem │ │ │ │ ├── save-table-name: q19_scan_5 │ │ │ │ ├── columns: l_partkey:2(int!null) l_quantity:5(float!null) l_extendedprice:6(float!null) l_discount:7(float!null) l_shipinstruct:14(char!null) l_shipmode:15(char!null) - │ │ │ │ └── stats: [rows=6001215, distinct(2)=199241, null(2)=0, distinct(5)=50, null(5)=0, distinct(6)=925955, null(6)=0, distinct(7)=11, null(7)=0, distinct(14)=4, null(14)=0, distinct(15)=7, null(15)=0, distinct(6,7)=6001215, null(6,7)=0] + │ │ │ │ └── stats: [rows=6001215, distinct(2)=199241, null(2)=0, distinct(5)=50, null(5)=0, distinct(6)=925955, null(6)=0, distinct(7)=11, null(7)=0, distinct(14)=4, null(14)=0, distinct(15)=7, null(15)=0, distinct(6,7)=6001215, null(6,7)=0, distinct(14,15)=28, null(14,15)=0] │ │ │ │ histogram(2)= 0 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 28805 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 28805 1200 29405 600 29405 600 29405 600 29405 600 29405 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 1200 29405 600 29405 600 29405 600 28805 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 28805 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 28805 1200 29405 600 29405 600 29405 600 29405 600 29405 600 29405 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 1200 29405 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 30006 600 │ │ │ │ <--- 29 ------- 959 ------- 1845 ------- 3339 ------- 4395 ------- 5525 ------- 6349 ------- 7298 ------- 8340 ------- 9659 ------- 10756 ------- 11642 ------- 12662 ------- 13609 ------- 14663 ------- 15886 ------- 16905 ------- 17902 ------- 18705 ------- 19722 ------- 20827 ------- 21789 ------- 23026 ------- 24201 ------- 25338 ------- 26224 ------- 27182 ------- 28202 ------- 29058 ------- 29919 ------- 31031 ------- 32058 ------- 33058 ------- 34089 ------- 34972 ------- 35929 ------- 36995 ------- 37901 ------- 38814 ------- 39884 ------- 41044 ------- 42034 ------- 43257 ------- 44224 ------- 45196 ------- 46284 ------- 47373 ------- 48352 ------- 49175 ------- 50212 ------- 51359 ------- 52449 ------- 53225 ------- 54295 ------- 55556 ------- 56736 ------- 57984 ------- 59102 ------- 60117 ------- 61111 ------- 62007 ------- 63054 ------- 63923 ------- 64971 ------- 66018 ------- 67114 ------- 67979 ------- 69163 ------- 70178 ------- 71421 ------- 72412 ------- 73432 ------- 74444 ------- 75421 ------- 76307 ------- 77194 ------- 78249 ------- 79229 ------- 80220 ------- 81103 ------- 81841 ------- 83133 ------- 84398 ------- 85526 ------- 86382 ------- 87333 ------- 88313 ------- 89185 ------- 90240 ------- 91121 ------- 91927 ------- 92840 ------- 93562 ------- 94443 ------- 95372 ------- 96282 ------- 97084 ------- 97969 ------- 99105 ------- 100036 ------- 100911 ------- 101928 ------- 102725 ------- 103625 ------- 104599 ------- 105568 ------- 106366 ------- 107318 ------- 108221 ------- 109084 ------- 110150 ------- 111130 ------- 112290 ------- 113258 ------- 114514 ------- 115361 ------- 116860 ------- 118141 ------- 119083 ------- 120205 ------- 121265 ------- 122280 ------- 123210 ------- 124316 ------- 125393 ------- 126419 ------- 127466 ------- 128467 ------- 129550 ------- 130729 ------- 131956 ------- 133065 ------- 134173 ------- 135106 ------- 136339 ------- 137237 ------- 138221 ------- 139139 ------- 140103 ------- 141250 ------- 142059 ------- 142984 ------- 144048 ------- 145243 ------- 146186 ------- 147153 ------- 148099 ------- 149103 ------- 149945 ------- 150918 ------- 151860 ------- 152830 ------- 153863 ------- 154681 ------- 156041 ------- 156862 ------- 157688 ------- 158743 ------- 159676 ------- 160684 ------- 161325 ------- 162394 ------- 163558 ------- 164576 ------- 165819 ------- 166748 ------- 167734 ------- 168986 ------- 170087 ------- 170931 ------- 171933 ------- 172836 ------- 174038 ------- 175011 ------- 175836 ------- 176680 ------- 177741 ------- 178899 ------- 179745 ------- 180631 ------- 181664 ------- 182624 ------- 183639 ------- 184414 ------- 185145 ------- 186261 ------- 187090 ------- 188033 ------- 189049 ------- 190063 ------- 191040 ------- 192115 ------- 193408 ------- 194360 ------- 195506 ------- 196582 ------- 197367 ------- 198465 ------- 199096 ------- 199970 │ │ │ └── filters @@ -86,13 +86,13 @@ scalar-group-by │ │ ├── select │ │ │ ├── save-table-name: q19_select_6 │ │ │ ├── columns: p_partkey:17(int!null) p_brand:20(char!null) p_size:22(int!null) p_container:23(char!null) - │ │ │ ├── stats: [rows=66666.6667, distinct(17)=66618.6736, null(17)=0, distinct(20)=25, null(20)=0, distinct(22)=16.6666667, null(22)=0, distinct(23)=40, null(23)=0] + │ │ │ ├── stats: [rows=66666.6667, distinct(17)=66618.6736, null(17)=0, distinct(20)=25, null(20)=0, distinct(22)=16.6666667, null(22)=0, distinct(23)=40, null(23)=0, distinct(20,22,23)=16666.6667, null(20,22,23)=0] │ │ │ ├── key: (17) │ │ │ ├── fd: (17)-->(20,22,23) │ │ │ ├── scan part │ │ │ │ ├── save-table-name: q19_scan_7 │ │ │ │ ├── columns: p_partkey:17(int!null) p_brand:20(char!null) p_size:22(int!null) p_container:23(char!null) - │ │ │ │ ├── stats: [rows=200000, distinct(17)=199241, null(17)=0, distinct(20)=25, null(20)=0, distinct(22)=50, null(22)=0, distinct(23)=40, null(23)=0] + │ │ │ │ ├── stats: [rows=200000, distinct(17)=199241, null(17)=0, distinct(20)=25, null(20)=0, distinct(22)=50, null(22)=0, distinct(23)=40, null(23)=0, distinct(20,22,23)=50000, null(20,22,23)=0] │ │ │ │ │ histogram(17)= 0 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 980 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 1000 20 │ │ │ │ │ <--- 27 ----- 1110 ----- 2241 ----- 3086 ----- 4134 ----- 5302 ----- 6222 ----- 7308 ----- 8249 ----- 9171 ----- 10049 ----- 10958 ----- 11832 ----- 13025 ----- 14063 ----- 14953 ----- 16249 ----- 17419 ----- 18363 ----- 19412 ----- 20257 ----- 21190 ----- 22110 ----- 23045 ----- 23956 ----- 24962 ----- 25942 ----- 26990 ----- 27934 ----- 28876 ----- 29513 ----- 30326 ----- 31259 ----- 32300 ----- 33577 ----- 34550 ----- 35562 ----- 36498 ----- 37475 ----- 38584 ----- 39641 ----- 40548 ----- 41605 ----- 42527 ----- 43612 ----- 44702 ----- 45701 ----- 46726 ----- 47795 ----- 48935 ----- 50152 ----- 51183 ----- 52001 ----- 52904 ----- 53868 ----- 54808 ----- 55986 ----- 57155 ----- 58516 ----- 59526 ----- 60557 ----- 61547 ----- 62369 ----- 63672 ----- 64583 ----- 65360 ----- 66147 ----- 67201 ----- 68142 ----- 69145 ----- 70209 ----- 71141 ----- 71923 ----- 73031 ----- 73987 ----- 74974 ----- 76170 ----- 77138 ----- 77849 ----- 78931 ----- 79832 ----- 80761 ----- 81843 ----- 82834 ----- 84032 ----- 85072 ----- 86287 ----- 87302 ----- 88422 ----- 89432 ----- 90550 ----- 91463 ----- 92249 ----- 93385 ----- 94789 ----- 96013 ----- 96893 ----- 98000 ----- 99008 ----- 100166 ----- 101263 ----- 102351 ----- 103236 ----- 104121 ----- 105363 ----- 106329 ----- 107325 ----- 108231 ----- 109054 ----- 110019 ----- 111185 ----- 112112 ----- 112908 ----- 113904 ----- 114785 ----- 115410 ----- 116526 ----- 117559 ----- 118310 ----- 119073 ----- 120034 ----- 120817 ----- 121744 ----- 122566 ----- 123720 ----- 124813 ----- 125835 ----- 126622 ----- 127651 ----- 128328 ----- 129315 ----- 130244 ----- 131450 ----- 132439 ----- 133288 ----- 134164 ----- 135298 ----- 136347 ----- 137243 ----- 138256 ----- 139427 ----- 140374 ----- 141371 ----- 142302 ----- 143322 ----- 144335 ----- 145333 ----- 146212 ----- 147321 ----- 148591 ----- 149594 ------ 150514 ------ 151361 ------ 152059 ------ 153070 ------ 154059 ------ 155259 ------ 156473 ------ 157690 ------ 158703 ------ 159675 ------ 160597 ------ 161668 ------ 162737 ------ 163955 ------ 164942 ------ 165924 ------ 167059 ------ 167866 ------ 169034 ------ 169935 ------ 170712 ------ 171806 ------ 172841 ------ 174078 ------ 175347 ------ 176430 ------ 177346 ------ 178566 ------ 179515 ------ 180677 ------ 181729 ------ 182983 ------ 183814 ------ 184892 ------ 185696 ------ 186611 ------ 187744 ------ 188974 ------ 189911 ------ 190671 ------ 191607 ------ 192820 ------ 193789 ------ 195057 ------ 196224 ------ 197231 ------ 198281 ------ 199119 ------ 199999 │ │ │ │ ├── key: (17) diff --git a/pkg/sql/opt/norm/testdata/rules/combo b/pkg/sql/opt/norm/testdata/rules/combo index 2dadbf2389c6..e3ca0cf4d126 100644 --- a/pkg/sql/opt/norm/testdata/rules/combo +++ b/pkg/sql/opt/norm/testdata/rules/combo @@ -489,7 +489,7 @@ SELECT s, k FROM a WHERE s='foo' AND f>100 ---- ================================================================================ Initial expression - Cost: 1110.07 + Cost: 1110.13 ================================================================================ project ├── columns: s:4!null k:1!null @@ -507,7 +507,7 @@ Initial expression └── (s:4 = 'foo') AND (f:3 > 100.0) [outer=(3,4), constraints=(/3: [/100.00000000000001 - ]; /4: [/'foo' - /'foo']; tight), fd=()-->(4)] ================================================================================ SimplifySelectFilters - Cost: 1110.07 + Cost: 1110.13 ================================================================================ project ├── columns: s:4!null k:1!null @@ -527,7 +527,7 @@ SimplifySelectFilters + └── f:3 > 100.0 [outer=(3), constraints=(/3: [/100.00000000000001 - ]; tight)] ================================================================================ PruneSelectCols - Cost: 1090.07 + Cost: 1090.13 ================================================================================ project ├── columns: s:4!null k:1!null @@ -550,7 +550,7 @@ PruneSelectCols └── f:3 > 100.0 [outer=(3), constraints=(/3: [/100.00000000000001 - ]; tight)] ================================================================================ GenerateIndexScans - Cost: 1080.07 + Cost: 1080.13 ================================================================================ project ├── columns: s:4!null k:1!null @@ -573,7 +573,7 @@ GenerateZigzagJoins (no changes) -------------------------------------------------------------------------------- ================================================================================ GenerateConstrainedScans - Cost: 3.62 + Cost: 10.10 ================================================================================ project ├── columns: s:4!null k:1!null @@ -595,7 +595,7 @@ GenerateConstrainedScans + └── fd: ()-->(4), (1)-->(3), (3)-->(1) ================================================================================ Final best expression - Cost: 3.62 + Cost: 10.10 ================================================================================ project ├── columns: s:4!null k:1!null diff --git a/pkg/sql/opt/xform/testdata/coster/join b/pkg/sql/opt/xform/testdata/coster/join index 69b90a6e8fa1..7a919ec223aa 100644 --- a/pkg/sql/opt/xform/testdata/coster/join +++ b/pkg/sql/opt/xform/testdata/coster/join @@ -531,23 +531,23 @@ WHERE w = 'foo' AND x = '2AB23800-06B1-4E19-A3BB-DF3768B808D2' AND (i,j,k,l,m,n) ---- project ├── columns: w:1!null x:2!null y:3!null z:4!null - ├── stats: [rows=1.25e-15] - ├── cost: 12200.07 + ├── stats: [rows=4.50439883] + ├── cost: 12227.475 ├── fd: ()-->(1,2) └── inner-join (lookup abcde@idx_abcd) ├── columns: w:1!null x:2!null y:3!null z:4!null i:5!null j:6!null k:7!null l:8!null m:9!null n:10!null a:12!null b:13!null c:14!null ├── key columns: [1 2 3] = [12 13 14] - ├── stats: [rows=1.25e-15, distinct(1)=1e-20, null(1)=0, distinct(2)=1e-20, null(2)=0, distinct(3)=1e-20, null(3)=0, distinct(12)=1e-20, null(12)=0, distinct(13)=1e-20, null(13)=0, distinct(14)=1e-20, null(14)=0] - ├── cost: 12200.06 + ├── stats: [rows=4.50439883, distinct(1)=0.9, null(1)=0, distinct(2)=0.9, null(2)=0, distinct(3)=0.884031733, null(3)=0, distinct(12)=0.9, null(12)=0, distinct(13)=0.9, null(13)=0, distinct(14)=0.884031733, null(14)=0] + ├── cost: 12227.42 ├── fd: ()-->(1,2,5-10,12,13), (1)==(12), (12)==(1), (2)==(13), (13)==(2), (3)==(14), (14)==(3) ├── select │ ├── columns: w:1!null x:2!null y:3!null z:4!null i:5!null j:6!null k:7!null l:8!null m:9!null n:10!null - │ ├── stats: [rows=1e-20, distinct(1)=1e-20, null(1)=0, distinct(2)=1e-20, null(2)=0, distinct(3)=1e-20, null(3)=0, distinct(4)=1e-20, null(4)=0, distinct(5)=1e-20, null(5)=0, distinct(6)=1e-20, null(6)=0, distinct(7)=1e-20, null(7)=0, distinct(8)=1e-20, null(8)=0, distinct(9)=1e-20, null(9)=0, distinct(10)=1e-20, null(10)=0] + │ ├── stats: [rows=0.9, distinct(1)=0.9, null(1)=0, distinct(2)=0.9, null(2)=0, distinct(3)=0.884031733, null(3)=0, distinct(4)=0.899635587, null(4)=0, distinct(5)=0.9, null(5)=0, distinct(6)=0.9, null(6)=0, distinct(7)=0.9, null(7)=0, distinct(8)=0.9, null(8)=0, distinct(9)=0.9, null(9)=0, distinct(10)=0.9, null(10)=0, distinct(5-10)=0.9, null(5-10)=0] │ ├── cost: 12200.03 │ ├── fd: ()-->(1,2,5-10) │ ├── scan wxyzijklmn │ │ ├── columns: w:1!null x:2!null y:3!null z:4!null i:5 j:6 k:7 l:8 m:9 n:10 - │ │ ├── stats: [rows=10000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=25, null(3)=0, distinct(4)=1000, null(4)=0, distinct(5)=10000, null(5)=0, distinct(6)=10000, null(6)=0, distinct(7)=10000, null(7)=0, distinct(8)=10000, null(8)=0, distinct(9)=10000, null(9)=0, distinct(10)=10000, null(10)=0] + │ │ ├── stats: [rows=10000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=25, null(3)=0, distinct(4)=1000, null(4)=0, distinct(5)=10000, null(5)=0, distinct(6)=10000, null(6)=0, distinct(7)=10000, null(7)=0, distinct(8)=10000, null(8)=0, distinct(9)=10000, null(9)=0, distinct(10)=10000, null(10)=0, distinct(5-10)=10000, null(5-10)=0] │ │ └── cost: 12100.02 │ └── filters │ ├── w:1 = 'foo' [outer=(1), constraints=(/1: [/'foo' - /'foo']; tight), fd=()-->(1)] diff --git a/pkg/sql/opt/xform/testdata/coster/scan b/pkg/sql/opt/xform/testdata/coster/scan index 7a82ac57da68..3492d8f04382 100644 --- a/pkg/sql/opt/xform/testdata/coster/scan +++ b/pkg/sql/opt/xform/testdata/coster/scan @@ -38,13 +38,13 @@ limit ├── columns: k:1!null i:2!null s:3 d:4!null ├── cardinality: [0 - 20] ├── stats: [rows=20] - ├── cost: 1197.56333 + ├── cost: 642.134762 ├── key: (1) ├── fd: (1)-->(2-4) ├── select │ ├── columns: k:1!null i:2!null s:3 d:4!null - │ ├── stats: [rows=1666.66667, distinct(1)=1666.66667, null(1)=0, distinct(2)=5, null(2)=0] - │ ├── cost: 1197.35333 + │ ├── stats: [rows=4666.66667, distinct(1)=4666.66667, null(1)=0, distinct(2)=5, null(2)=0, distinct(1,2)=4666.66667, null(1,2)=0] + │ ├── cost: 641.924762 │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ ├── limit hint: 20.00 @@ -52,10 +52,10 @@ limit │ │ ├── columns: k:1!null i:2 s:3 d:4!null │ │ ├── constraint: /1: [/6 - ] │ │ ├── stats: [rows=33333.3333, distinct(1)=33333.3333, null(1)=0] - │ │ ├── cost: 864.01 + │ │ ├── cost: 308.581429 │ │ ├── key: (1) │ │ ├── fd: (1)-->(2-4) - │ │ └── limit hint: 400.00 + │ │ └── limit hint: 142.86 │ └── filters │ └── i:2 IN (1, 3, 5, 7, 9) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3] [/5 - /5] [/7 - /7] [/9 - /9]; tight)] └── 20 @@ -69,14 +69,14 @@ limit ├── columns: k:1!null i:2!null s:3 d:4!null ├── cardinality: [0 - 20] ├── stats: [rows=20] - ├── cost: 490.73 + ├── cost: 52.4648066 ├── key: (1) ├── fd: (1)-->(2-4) ├── select │ ├── columns: k:1!null i:2!null s:3 d:4!null │ ├── cardinality: [0 - 450] - │ ├── stats: [rows=22.5, distinct(1)=22.5, null(1)=0, distinct(2)=5, null(2)=0] - │ ├── cost: 490.52 + │ ├── stats: [rows=407.25, distinct(1)=407.25, null(1)=0, distinct(2)=5, null(2)=0, distinct(1,2)=407.25, null(1,2)=0] + │ ├── cost: 52.2548066 │ ├── key: (1) │ ├── fd: (1)-->(2-4) │ ├── limit hint: 20.00 @@ -85,10 +85,10 @@ limit │ │ ├── constraint: /1: [/1 - /450] │ │ ├── cardinality: [0 - 450] │ │ ├── stats: [rows=450, distinct(1)=450, null(1)=0] - │ │ ├── cost: 486.01 + │ │ ├── cost: 47.7448066 │ │ ├── key: (1) │ │ ├── fd: (1)-->(2-4) - │ │ └── limit hint: 400.00 + │ │ └── limit hint: 22.10 │ └── filters │ └── i:2 IN (1, 3, 5, 7, 9) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3] [/5 - /5] [/7 - /7] [/9 - /9]; tight)] └── 20 diff --git a/pkg/sql/opt/xform/testdata/external/customer b/pkg/sql/opt/xform/testdata/external/customer index c896fb1476ca..f8341ca0a115 100644 --- a/pkg/sql/opt/xform/testdata/external/customer +++ b/pkg/sql/opt/xform/testdata/external/customer @@ -182,7 +182,7 @@ limit │ │ ├── key: (1) │ │ ├── fd: (1)-->(9) │ │ ├── ordering: +1 opt(9) [actual: +1] - │ │ └── limit hint: 10.10 + │ │ └── limit hint: 5.27 │ └── filters │ └── NOT read:9 [outer=(9), constraints=(/9: [/false - /false]; tight), fd=()-->(9)] └── 5 diff --git a/pkg/sql/opt/xform/testdata/external/tpcc-no-stats b/pkg/sql/opt/xform/testdata/external/tpcc-no-stats index 0f14b86b6fcd..998bd0217802 100644 --- a/pkg/sql/opt/xform/testdata/external/tpcc-no-stats +++ b/pkg/sql/opt/xform/testdata/external/tpcc-no-stats @@ -680,22 +680,26 @@ FROM customer WHERE c_w_id = 10 AND c_d_id = 100 AND c_last = 'Smith' ORDER BY c_first ASC ---- -project +sort ├── columns: c_id:1!null c_balance:17 c_first:4 c_middle:5 ├── key: (1) ├── fd: (1)-->(4,5,17) ├── ordering: +4 - └── index-join customer - ├── columns: c_id:1!null c_d_id:2!null c_w_id:3!null c_first:4 c_middle:5 c_last:6!null c_balance:17 + └── project + ├── columns: c_id:1!null c_first:4 c_middle:5 c_balance:17 ├── key: (1) - ├── fd: ()-->(2,3,6), (1)-->(4,5,17) - ├── ordering: +4 opt(2,3,6) [actual: +4] - └── scan customer@customer_idx - ├── columns: c_id:1!null c_d_id:2!null c_w_id:3!null c_first:4 c_last:6!null - ├── constraint: /3/2/6/4/1: [/10/100/'Smith' - /10/100/'Smith'] + ├── fd: (1)-->(4,5,17) + └── select + ├── columns: c_id:1!null c_d_id:2!null c_w_id:3!null c_first:4 c_middle:5 c_last:6!null c_balance:17 ├── key: (1) - ├── fd: ()-->(2,3,6), (1)-->(4) - └── ordering: +4 opt(2,3,6) [actual: +4] + ├── fd: ()-->(2,3,6), (1)-->(4,5,17) + ├── scan customer + │ ├── columns: c_id:1!null c_d_id:2!null c_w_id:3!null c_first:4 c_middle:5 c_last:6 c_balance:17 + │ ├── constraint: /3/2/1: [/10/100 - /10/100] + │ ├── key: (1) + │ └── fd: ()-->(2,3), (1)-->(4-6,17) + └── filters + └── c_last:6 = 'Smith' [outer=(6), constraints=(/6: [/'Smith' - /'Smith']; tight), fd=()-->(6)] opt format=hide-qual SELECT o_id, o_entry_d, o_carrier_id diff --git a/pkg/sql/opt/xform/testdata/external/trading b/pkg/sql/opt/xform/testdata/external/trading index 504fa6a68385..625daff58511 100644 --- a/pkg/sql/opt/xform/testdata/external/trading +++ b/pkg/sql/opt/xform/testdata/external/trading @@ -556,22 +556,22 @@ project ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null dealerid:7!null cardid:8!null buyprice:9!null sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null version:15!null ├── key columns: [8] = [1] ├── lookup columns are key - ├── stats: [rows=1, distinct(1)=0.00184973785, null(1)=0, distinct(8)=0.00184973785, null(8)=0] + ├── stats: [rows=1, distinct(1)=0.0201621393, null(1)=0, distinct(8)=0.0201621393, null(8)=0] ├── key: (8) ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1) ├── index-join cardsinfo │ ├── columns: dealerid:7!null cardid:8!null buyprice:9!null sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null version:15!null - │ ├── stats: [rows=0.00184973785, distinct(7)=0.00184973785, null(7)=0, distinct(8)=0.00184973785, null(8)=0, distinct(9)=0.00184973785, null(9)=0, distinct(10)=0.00184973785, null(10)=0, distinct(11)=0.00184973785, null(11)=0, distinct(12)=0.00184973785, null(12)=0, distinct(13)=0.00184973785, null(13)=0, distinct(14)=0.00184973785, null(14)=0, distinct(15)=0.00184973785, null(15)=0] - │ │ histogram(15)= 0 0 0.0018497 0 - │ │ <--- 1584421773604892000.0000000000 ----------- 1584421778604892000 + │ ├── stats: [rows=0.0201621426, distinct(7)=0.0201621426, null(7)=0, distinct(8)=0.0201621393, null(8)=0, distinct(9)=0.02016214, null(9)=0, distinct(10)=0.02016214, null(10)=0, distinct(11)=0.02016214, null(11)=0, distinct(12)=0.02016214, null(12)=0, distinct(13)=0.02016214, null(13)=0, distinct(14)=0.02016214, null(14)=0, distinct(15)=0.0201621426, null(15)=0, distinct(7,15)=0.0201621426, null(7,15)=0] + │ │ histogram(15)= 0 0 0.020162 0 + │ │ <--- 1584421773604892000.0000000000 ---------- 1584421778604892000 │ ├── key: (8) │ ├── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14) │ └── scan cardsinfo@cardsinfoversionindex │ ├── columns: dealerid:7!null cardid:8!null version:15!null │ ├── constraint: /7/15: (/1/1584421773604892000.0000000000 - /1] - │ ├── stats: [rows=0.00184973785, distinct(7)=0.00184973785, null(7)=0, distinct(15)=0.00184973785, null(15)=0] - │ │ histogram(15)= 0 0 0.0018497 0 - │ │ <--- 1584421773604892000.0000000000 ----------- 1584421778604892000 + │ ├── stats: [rows=0.0201621426, distinct(7)=0.0201621426, null(7)=0, distinct(15)=0.0201621426, null(15)=0, distinct(7,15)=0.0201621426, null(7,15)=0] + │ │ histogram(15)= 0 0 0.020162 0 + │ │ <--- 1584421773604892000.0000000000 ---------- 1584421778604892000 │ ├── key: (8) │ └── fd: ()-->(7), (8)-->(15), (15)-->(8) └── filters (true) @@ -699,21 +699,21 @@ project │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null sellprice:6!null buyprice:7!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null │ ├── key columns: [16 17 3] = [9 10 11] │ ├── lookup columns are key - │ ├── stats: [rows=157.894737, distinct(3)=157.894737, null(3)=0, distinct(11)=157.894737, null(11)=0] + │ ├── stats: [rows=478.646617, distinct(3)=478.646617, null(3)=0, distinct(11)=478.646617, null(11)=0] │ ├── key: (5,11) │ ├── fd: ()-->(1,2,4,9,10), (3,5)-->(6,7), (11)-->(12,13), (3)==(11), (11)==(3) │ ├── ordering: -(3|11) opt(1,2,4,9,10) [actual: -3] │ ├── limit hint: 20.00 │ ├── project │ │ ├── columns: "project_const_col_@9":16!null "project_const_col_@10":17!null transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null sellprice:6!null buyprice:7!null - │ │ ├── stats: [rows=157.894737] + │ │ ├── stats: [rows=478.646617] │ │ ├── key: (3,5) │ │ ├── fd: ()-->(1,2,4,16,17), (3,5)-->(6,7) │ │ ├── ordering: -3 opt(1,2,4) [actual: -3] │ │ ├── limit hint: 100.00 │ │ ├── index-join transactiondetails │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null sellprice:6!null buyprice:7!null - │ │ │ ├── stats: [rows=157.894737, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=157.894737, null(3)=0, distinct(4)=1, null(4)=0, distinct(5)=157.894114, null(5)=0, distinct(6)=157.894114, null(6)=0, distinct(7)=157.894114, null(7)=0] + │ │ │ ├── stats: [rows=478.646617, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=478.646617, null(3)=0, distinct(4)=1, null(4)=0, distinct(5)=478.640889, null(5)=0, distinct(6)=478.640889, null(6)=0, distinct(7)=478.640889, null(7)=0, distinct(1,2,4)=1, null(1,2,4)=0] │ │ │ ├── key: (3,5) │ │ │ ├── fd: ()-->(1,2,4), (3,5)-->(6,7) │ │ │ ├── ordering: -3 opt(1,2,4) [actual: -3] @@ -721,7 +721,7 @@ project │ │ │ └── scan transactiondetails@detailscardidindex,rev │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null │ │ │ ├── constraint: /1/2/4/3/5: [/1/false/21953 - /1/false/21953] - │ │ │ ├── stats: [rows=157.894737, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=1, null(4)=0] + │ │ │ ├── stats: [rows=478.646617, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=1, null(4)=0, distinct(1,2,4)=1, null(1,2,4)=0] │ │ │ ├── key: (3,5) │ │ │ ├── fd: ()-->(1,2,4) │ │ │ ├── ordering: -3 opt(1,2,4) [actual: -3] @@ -762,9 +762,10 @@ project # 1. The TransactionDate comparisons should be the last 2 days from the # current timestamp. However, the current timestamp is not treated as a # constant as it should be. -# 2. Missing rule to push "LIMIT 50" into GroupBy->LeftJoin complex. This +# 2. Missing rule to push "LIMIT 50" into GroupBy->RightJoin complex. This # would need to be an exploration rule since it involves an ordering. -# Or we could push down the "limit hint" into GroupBy->LeftJoin. +# Or we could push down the "limit hint" into GroupBy->RightJoin (and +# further into the InnerJoin). # 3. Wrong join-type (probably due to #2 above). Should be LookupJoin. # opt format=show-stats @@ -818,13 +819,13 @@ project │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(1-6,9-15,24), (15)-->(8-14), (1)==(8), (8)==(1) │ │ ├── right-join (hash) │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null transactiondetails.dealerid:16 isbuy:17 transactiondate:18 transactiondetails.cardid:19 quantity:20 - │ │ │ ├── stats: [rows=519622.136, distinct(8)=19000, null(8)=0, distinct(19)=19000, null(19)=0] + │ │ │ ├── stats: [rows=5523583.18, distinct(8)=19000, null(8)=0, distinct(19)=19000, null(19)=0] │ │ │ ├── key: (8,18-20) │ │ │ ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1), (8,18-20)-->(16,17) │ │ │ ├── scan transactiondetails │ │ │ │ ├── columns: transactiondetails.dealerid:16!null isbuy:17!null transactiondate:18!null transactiondetails.cardid:19!null quantity:20!null │ │ │ │ ├── constraint: /16/17/18/19/20: [/1/false/'2020-02-28 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] - │ │ │ │ ├── stats: [rows=1000000, distinct(16)=1, null(16)=0, distinct(17)=1, null(17)=0, distinct(18)=1000000, null(18)=0, distinct(19)=56999.9987, null(19)=0] + │ │ │ │ ├── stats: [rows=10630000, distinct(16)=1, null(16)=0, distinct(17)=1, null(17)=0, distinct(18)=10630000, null(18)=0, distinct(19)=57000, null(19)=0, distinct(16,17)=1, null(16,17)=0, distinct(16-18)=10630000, null(16-18)=0] │ │ │ │ ├── key: (18-20) │ │ │ │ └── fd: ()-->(16,17) │ │ │ ├── inner-join (merge) @@ -902,6 +903,10 @@ project # 3. The Date comparisons should be the last 7 days from the current # timestamp. However, the current timestamp is not treated as a constant as # it should be. +# 4. The row count estimate for the constrained scan of transactions is too +# large, causing us to choose the incorrect join algorithm (we should +# choose a lookup join). Collecting small histograms on all columns would +# fix the issue. # opt format=show-stats SELECT @@ -921,86 +926,84 @@ WHERE GROUP BY extract(day from d.TransactionDate) ORDER BY extract(day from d.TransactionDate) ---- -group-by +sort ├── columns: extract:37 totalsell:32!null totalbuy:34!null totalprofit:36!null - ├── grouping columns: column37:37 - ├── stats: [rows=12345.679, distinct(37)=12345.679, null(37)=0] + ├── stats: [rows=1171234.57, distinct(37)=1171234.57, null(37)=0] ├── key: (37) ├── fd: (37)-->(32,34,36) ├── ordering: +37 - ├── sort - │ ├── columns: column31:31!null column33:33!null column35:35!null column37:37 - │ ├── stats: [rows=12634.4671, distinct(37)=12345.679, null(37)=0] - │ ├── ordering: +37 - │ └── project - │ ├── columns: column31:31!null column33:33!null column35:35!null column37:37 - │ ├── stats: [rows=12634.4671, distinct(37)=12345.679, null(37)=0] - │ ├── inner-join (hash) - │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null id:16!null cardsinfo.dealerid:22!null cardsinfo.cardid:23!null - │ │ ├── stats: [rows=12634.4671, distinct(3)=12345.679, null(3)=0, distinct(4)=12634.4671, null(4)=0, distinct(11)=12345.679, null(11)=0, distinct(16)=12634.4671, null(16)=0] - │ │ ├── key: (5,11,23) - │ │ ├── fd: ()-->(1,2,9,10,22), (3-5)-->(6,7), (11)-->(12,13), (16)==(4,23), (23)==(4,16), (3)==(11), (11)==(3), (4)==(16,23) - │ │ ├── scan cardsinfo@cardsinfoversionindex - │ │ │ ├── columns: cardsinfo.dealerid:22!null cardsinfo.cardid:23!null - │ │ │ ├── constraint: /22/30: [/1 - /1] - │ │ │ ├── stats: [rows=58333.3333, distinct(22)=1, null(22)=0, distinct(23)=37420.3552, null(23)=0] - │ │ │ ├── key: (23) - │ │ │ └── fd: ()-->(22) - │ │ ├── inner-join (hash) - │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null id:16!null - │ │ │ ├── stats: [rows=12345.679, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=12345.679, null(3)=0, distinct(4)=12345.679, null(4)=0, distinct(5)=12267.872, null(5)=0, distinct(6)=12267.872, null(6)=0, distinct(7)=12267.872, null(7)=0, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=12345.679, null(11)=0, distinct(12)=7803.95639, null(12)=0, distinct(13)=7803.95639, null(13)=0, distinct(16)=12345.679, null(16)=0] - │ │ │ ├── key: (5,11,16) - │ │ │ ├── fd: ()-->(1,2,9,10), (11)-->(12,13), (3-5)-->(6,7), (3)==(11), (11)==(3), (4)==(16), (16)==(4) - │ │ │ ├── scan cards@cardsnamesetnumber - │ │ │ │ ├── columns: id:16!null - │ │ │ │ ├── stats: [rows=57000, distinct(16)=57000, null(16)=0] - │ │ │ │ └── key: (16) - │ │ │ ├── inner-join (lookup transactiondetails) - │ │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null - │ │ │ │ ├── key columns: [92 93 11] = [1 2 3] - │ │ │ │ ├── stats: [rows=12345.679, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=12345.679, null(3)=0, distinct(4)=11100.2211, null(4)=0, distinct(5)=12267.8812, null(5)=0, distinct(6)=12267.8812, null(6)=0, distinct(7)=12267.8812, null(7)=0, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=12345.679, null(11)=0, distinct(12)=7803.95979, null(12)=0, distinct(13)=7803.95979, null(13)=0] - │ │ │ │ ├── key: (4,5,11) - │ │ │ │ ├── fd: ()-->(1,2,9,10), (11)-->(12,13), (3-5)-->(6,7), (3)==(11), (11)==(3) - │ │ │ │ ├── project - │ │ │ │ │ ├── columns: "project_const_col_@1":92!null "project_const_col_@2":93!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null - │ │ │ │ │ ├── stats: [rows=12345.679, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=12345.679, null(11)=0, distinct(12)=12345.679, null(12)=0, distinct(13)=12345.679, null(13)=0, distinct(92)=1, null(92)=0, distinct(93)=1, null(93)=0] - │ │ │ │ │ ├── key: (11) - │ │ │ │ │ ├── fd: ()-->(9,10,92,93), (11)-->(12,13) - │ │ │ │ │ ├── select - │ │ │ │ │ │ ├── columns: transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null - │ │ │ │ │ │ ├── stats: [rows=12345.679, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=12345.679, null(11)=0, distinct(12)=12345.679, null(12)=0, distinct(13)=12345.679, null(13)=0] - │ │ │ │ │ │ ├── key: (11) - │ │ │ │ │ │ ├── fd: ()-->(9,10), (11)-->(12,13) - │ │ │ │ │ │ ├── scan transactions - │ │ │ │ │ │ │ ├── columns: transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null - │ │ │ │ │ │ │ ├── constraint: /9/10/11: [/1/false/'2020-02-23 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ │ │ ├── stats: [rows=111111.111, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=111111.111, null(11)=0] - │ │ │ │ │ │ │ ├── key: (11) - │ │ │ │ │ │ │ └── fd: ()-->(9,10), (11)-->(12,13) - │ │ │ │ │ │ └── filters - │ │ │ │ │ │ ├── accountname:12 != 'someaccount' [outer=(12), constraints=(/12: (/NULL - /'someaccount') [/e'someaccount\x00' - ]; tight)] - │ │ │ │ │ │ └── customername:13 != 'somecustomer' [outer=(13), constraints=(/13: (/NULL - /'somecustomer') [/e'somecustomer\x00' - ]; tight)] - │ │ │ │ │ └── projections - │ │ │ │ │ ├── 1 [as="project_const_col_@1":92] - │ │ │ │ │ └── false [as="project_const_col_@2":93] - │ │ │ │ └── filters - │ │ │ │ └── (transactiondate:3 >= '2020-02-23 00:00:00+00:00') AND (transactiondate:3 <= '2020-03-01 00:00:00+00:00') [outer=(3), constraints=(/3: [/'2020-02-23 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] - │ │ │ └── filters - │ │ │ └── id:16 = transactiondetails.cardid:4 [outer=(4,16), constraints=(/4: (/NULL - ]; /16: (/NULL - ]), fd=(4)==(16), (16)==(4)] - │ │ └── filters - │ │ └── id:16 = cardsinfo.cardid:23 [outer=(16,23), constraints=(/16: (/NULL - ]; /23: (/NULL - ]), fd=(16)==(23), (23)==(16)] - │ └── projections - │ ├── transactiondetails.sellprice:6 * quantity:5 [as=column31:31, outer=(5,6)] - │ ├── transactiondetails.buyprice:7 * quantity:5 [as=column33:33, outer=(5,7)] - │ ├── quantity:5 * (transactiondetails.sellprice:6 - transactiondetails.buyprice:7) [as=column35:35, outer=(5-7)] - │ └── extract('day', transactiondate:3) [as=column37:37, outer=(3)] - └── aggregations - ├── sum [as=sum:32, outer=(31)] - │ └── column31:31 - ├── sum [as=sum:34, outer=(33)] - │ └── column33:33 - └── sum [as=sum:36, outer=(35)] - └── column35:35 + └── group-by + ├── columns: sum:32!null sum:34!null sum:36!null column37:37 + ├── grouping columns: column37:37 + ├── stats: [rows=1171234.57, distinct(37)=1171234.57, null(37)=0] + ├── key: (37) + ├── fd: (37)-->(32,34,36) + ├── project + │ ├── columns: column31:31!null column33:33!null column35:35!null column37:37 + │ ├── stats: [rows=1198631.87, distinct(37)=1171234.57, null(37)=0] + │ ├── inner-join (hash) + │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null id:16!null cardsinfo.dealerid:22!null cardsinfo.cardid:23!null + │ │ ├── stats: [rows=1198631.87, distinct(3)=1171234.57, null(3)=0, distinct(4)=37420.3552, null(4)=0, distinct(11)=1171234.57, null(11)=0, distinct(16)=37420.3552, null(16)=0] + │ │ ├── key: (5,11,23) + │ │ ├── fd: ()-->(1,2,9,10,22), (3-5)-->(6,7), (11)-->(12,13), (16)==(4,23), (23)==(4,16), (3)==(11), (11)==(3), (4)==(16,23) + │ │ ├── inner-join (hash) + │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null + │ │ │ ├── stats: [rows=1171234.57, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1171234.57, null(3)=0, distinct(4)=56999.9999, null(4)=0, distinct(5)=1091498.71, null(5)=0, distinct(6)=1091498.71, null(6)=0, distinct(7)=1091498.71, null(7)=0, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=1171234.57, null(11)=0, distinct(12)=551608.449, null(12)=0, distinct(13)=551608.449, null(13)=0] + │ │ │ ├── key: (4,5,11) + │ │ │ ├── fd: ()-->(1,2,9,10), (11)-->(12,13), (3-5)-->(6,7), (3)==(11), (11)==(3) + │ │ │ ├── scan transactiondetails + │ │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null + │ │ │ │ ├── constraint: /1/2/3/4/5: [/1/false/'2020-02-23 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] + │ │ │ │ ├── stats: [rows=10630000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=10630000, null(3)=0, distinct(4)=57000, null(4)=0, distinct(5)=8207077.23, null(5)=0, distinct(6)=8207077.23, null(6)=0, distinct(7)=8207077.23, null(7)=0, distinct(1,2)=1, null(1,2)=0, distinct(1-3)=10630000, null(1-3)=0] + │ │ │ │ ├── key: (3-5) + │ │ │ │ └── fd: ()-->(1,2), (3-5)-->(6,7) + │ │ │ ├── select + │ │ │ │ ├── columns: transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null + │ │ │ │ ├── stats: [rows=1171234.57, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=1171234.57, null(11)=0, distinct(12)=666666.667, null(12)=0, distinct(13)=666666.667, null(13)=0, distinct(9,10)=1, null(9,10)=0, distinct(9-13)=1171234.57, null(9-13)=0] + │ │ │ │ ├── key: (11) + │ │ │ │ ├── fd: ()-->(9,10), (11)-->(12,13) + │ │ │ │ ├── scan transactions + │ │ │ │ │ ├── columns: transactions.dealerid:9!null transactions.isbuy:10!null date:11!null accountname:12!null customername:13!null + │ │ │ │ │ ├── constraint: /9/10/11: [/1/false/'2020-02-23 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── stats: [rows=1181111.11, distinct(9)=1, null(9)=0, distinct(10)=1, null(10)=0, distinct(11)=1181111.11, null(11)=0, distinct(9,10)=1, null(9,10)=0, distinct(9-11)=1181111.11, null(9-11)=0] + │ │ │ │ │ ├── key: (11) + │ │ │ │ │ └── fd: ()-->(9,10), (11)-->(12,13) + │ │ │ │ └── filters + │ │ │ │ ├── accountname:12 != 'someaccount' [outer=(12), constraints=(/12: (/NULL - /'someaccount') [/e'someaccount\x00' - ]; tight)] + │ │ │ │ └── customername:13 != 'somecustomer' [outer=(13), constraints=(/13: (/NULL - /'somecustomer') [/e'somecustomer\x00' - ]; tight)] + │ │ │ └── filters + │ │ │ └── transactiondate:3 = date:11 [outer=(3,11), constraints=(/3: (/NULL - ]; /11: (/NULL - ]), fd=(3)==(11), (11)==(3)] + │ │ ├── inner-join (hash) + │ │ │ ├── columns: id:16!null cardsinfo.dealerid:22!null cardsinfo.cardid:23!null + │ │ │ ├── stats: [rows=58333.3333, distinct(16)=37420.3552, null(16)=0, distinct(22)=1, null(22)=0, distinct(23)=37420.3552, null(23)=0] + │ │ │ ├── key: (23) + │ │ │ ├── fd: ()-->(22), (16)==(23), (23)==(16) + │ │ │ ├── scan cardsinfo@cardsinfoversionindex + │ │ │ │ ├── columns: cardsinfo.dealerid:22!null cardsinfo.cardid:23!null + │ │ │ │ ├── constraint: /22/30: [/1 - /1] + │ │ │ │ ├── stats: [rows=58333.3333, distinct(22)=1, null(22)=0, distinct(23)=37420.3552, null(23)=0] + │ │ │ │ ├── key: (23) + │ │ │ │ └── fd: ()-->(22) + │ │ │ ├── scan cards@cardsnamesetnumber + │ │ │ │ ├── columns: id:16!null + │ │ │ │ ├── stats: [rows=57000, distinct(16)=57000, null(16)=0] + │ │ │ │ └── key: (16) + │ │ │ └── filters + │ │ │ └── id:16 = cardsinfo.cardid:23 [outer=(16,23), constraints=(/16: (/NULL - ]; /23: (/NULL - ]), fd=(16)==(23), (23)==(16)] + │ │ └── filters + │ │ └── id:16 = transactiondetails.cardid:4 [outer=(4,16), constraints=(/4: (/NULL - ]; /16: (/NULL - ]), fd=(4)==(16), (16)==(4)] + │ └── projections + │ ├── transactiondetails.sellprice:6 * quantity:5 [as=column31:31, outer=(5,6)] + │ ├── transactiondetails.buyprice:7 * quantity:5 [as=column33:33, outer=(5,7)] + │ ├── quantity:5 * (transactiondetails.sellprice:6 - transactiondetails.buyprice:7) [as=column35:35, outer=(5-7)] + │ └── extract('day', transactiondate:3) [as=column37:37, outer=(3)] + └── aggregations + ├── sum [as=sum:32, outer=(31)] + │ └── column31:31 + ├── sum [as=sum:34, outer=(33)] + │ └── column33:33 + └── sum [as=sum:36, outer=(35)] + └── column35:35 # Check if transaction was already inserted, for idempotency. # @@ -1152,37 +1155,39 @@ values │ │ ├── cardinality: [0 - 100] │ │ ├── key: (5,9-11) │ │ ├── fd: ()-->(4), (1)==(9), (9)==(1), (2)==(10), (10)==(2), (3)==(11), (11)==(3) - │ │ ├── sort + │ │ ├── inner-join (lookup transactions) │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null t.dealerid:9!null t.isbuy:10!null date:11!null + │ │ │ ├── key columns: [1 2 3] = [9 10 11] + │ │ │ ├── lookup columns are key │ │ │ ├── key: (5,9-11) │ │ │ ├── fd: ()-->(4), (1)==(9), (9)==(1), (2)==(10), (10)==(2), (3)==(11), (11)==(3) │ │ │ ├── ordering: -(3|11) opt(4) [actual: -3] │ │ │ ├── limit hint: 100.00 - │ │ │ └── inner-join (lookup transactions) - │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null t.dealerid:9!null t.isbuy:10!null date:11!null - │ │ │ ├── key columns: [1 2 3] = [9 10 11] - │ │ │ ├── lookup columns are key - │ │ │ ├── key: (5,9-11) - │ │ │ ├── fd: ()-->(4), (1)==(9), (9)==(1), (2)==(10), (10)==(2), (3)==(11), (11)==(3) - │ │ │ ├── scan d@detailscardidindex - │ │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null - │ │ │ │ ├── constraint: /1/2/4/3/5 - │ │ │ │ │ ├── [/1/false/19483/'2020-02-28 00:00:00+00:00' - /1/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/1/true/19483/'2020-02-28 00:00:00+00:00' - /1/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/2/false/19483/'2020-02-28 00:00:00+00:00' - /2/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/2/true/19483/'2020-02-28 00:00:00+00:00' - /2/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/3/false/19483/'2020-02-28 00:00:00+00:00' - /3/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/3/true/19483/'2020-02-28 00:00:00+00:00' - /3/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/4/false/19483/'2020-02-28 00:00:00+00:00' - /4/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/4/true/19483/'2020-02-28 00:00:00+00:00' - /4/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/5/false/19483/'2020-02-28 00:00:00+00:00' - /5/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ └── [/5/true/19483/'2020-02-28 00:00:00+00:00' - /5/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ ├── key: (1-3,5) - │ │ │ │ └── fd: ()-->(4) - │ │ │ └── filters - │ │ │ ├── (date:11 >= '2020-02-28 00:00:00+00:00') AND (date:11 <= '2020-03-01 00:00:00+00:00') [outer=(11), constraints=(/11: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] - │ │ │ ├── ((((t.dealerid:9 = 1) OR (t.dealerid:9 = 2)) OR (t.dealerid:9 = 3)) OR (t.dealerid:9 = 4)) OR (t.dealerid:9 = 5) [outer=(9), constraints=(/9: [/1 - /1] [/2 - /2] [/3 - /3] [/4 - /4] [/5 - /5]; tight)] - │ │ │ └── t.isbuy:10 IN (false, true) [outer=(10), constraints=(/10: [/false - /false] [/true - /true]; tight)] + │ │ │ ├── sort + │ │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null + │ │ │ │ ├── key: (1-3,5) + │ │ │ │ ├── fd: ()-->(4) + │ │ │ │ ├── ordering: -3 opt(4) [actual: -3] + │ │ │ │ ├── limit hint: 1100.00 + │ │ │ │ └── scan d@detailscardidindex + │ │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null + │ │ │ │ ├── constraint: /1/2/4/3/5 + │ │ │ │ │ ├── [/1/false/19483/'2020-02-28 00:00:00+00:00' - /1/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/1/true/19483/'2020-02-28 00:00:00+00:00' - /1/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/2/false/19483/'2020-02-28 00:00:00+00:00' - /2/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/2/true/19483/'2020-02-28 00:00:00+00:00' - /2/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/3/false/19483/'2020-02-28 00:00:00+00:00' - /3/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/3/true/19483/'2020-02-28 00:00:00+00:00' - /3/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/4/false/19483/'2020-02-28 00:00:00+00:00' - /4/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/4/true/19483/'2020-02-28 00:00:00+00:00' - /4/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/5/false/19483/'2020-02-28 00:00:00+00:00' - /5/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ └── [/5/true/19483/'2020-02-28 00:00:00+00:00' - /5/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ ├── key: (1-3,5) + │ │ │ │ └── fd: ()-->(4) + │ │ │ └── filters + │ │ │ ├── (date:11 >= '2020-02-28 00:00:00+00:00') AND (date:11 <= '2020-03-01 00:00:00+00:00') [outer=(11), constraints=(/11: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] + │ │ │ ├── ((((t.dealerid:9 = 1) OR (t.dealerid:9 = 2)) OR (t.dealerid:9 = 3)) OR (t.dealerid:9 = 4)) OR (t.dealerid:9 = 5) [outer=(9), constraints=(/9: [/1 - /1] [/2 - /2] [/3 - /3] [/4 - /4] [/5 - /5]; tight)] + │ │ │ └── t.isbuy:10 IN (false, true) [outer=(10), constraints=(/10: [/false - /false] [/true - /true]; tight)] │ │ └── 100 │ └── aggregations │ └── sum [as=sum:16, outer=(5)] diff --git a/pkg/sql/opt/xform/testdata/external/trading-mutation b/pkg/sql/opt/xform/testdata/external/trading-mutation index e4a843b73e98..b632c6d43bc3 100644 --- a/pkg/sql/opt/xform/testdata/external/trading-mutation +++ b/pkg/sql/opt/xform/testdata/external/trading-mutation @@ -557,26 +557,26 @@ FROM CardsView WHERE Version > 1584421773604892000.0000000000 ---- project ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null buyprice:9!null sellprice:10!null desiredinventory:12!null actualinventory:13!null version:15!null discount:11!null maxinventory:14!null - ├── stats: [rows=0.3325] + ├── stats: [rows=1] ├── key: (15) ├── fd: (1)-->(2-6,9-15), (2,4,5)~~>(1,3,6), (15)-->(1-6,9-14) └── inner-join (lookup cards) ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null dealerid:7!null cardid:8!null buyprice:9!null sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null version:15!null ├── key columns: [8] = [1] ├── lookup columns are key - ├── stats: [rows=0.3325, distinct(1)=5.83333167e-06, null(1)=0, distinct(8)=5.83333167e-06, null(8)=0] + ├── stats: [rows=1, distinct(1)=6.35833333e-05, null(1)=0, distinct(8)=6.35833333e-05, null(8)=0] ├── key: (8) ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1) ├── index-join cardsinfo │ ├── columns: dealerid:7!null cardid:8!null buyprice:9!null sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null version:15!null - │ ├── stats: [rows=5.83333333e-06, distinct(7)=5.83333333e-06, null(7)=0, distinct(8)=5.83333167e-06, null(8)=0, distinct(9)=5.83333333e-06, null(9)=0, distinct(10)=5.83333333e-06, null(10)=0, distinct(11)=5.83333333e-06, null(11)=0, distinct(12)=5.83333333e-06, null(12)=0, distinct(13)=5.83333333e-06, null(13)=0, distinct(14)=5.83333333e-06, null(14)=0, distinct(15)=5.83333333e-06, null(15)=0] + │ ├── stats: [rows=6.35833333e-05, distinct(7)=6.35833333e-05, null(7)=0, distinct(8)=6.35833333e-05, null(8)=0, distinct(9)=6.35833333e-05, null(9)=0, distinct(10)=6.35833333e-05, null(10)=0, distinct(11)=6.35833333e-05, null(11)=0, distinct(12)=6.35833333e-05, null(12)=0, distinct(13)=6.35833333e-05, null(13)=0, distinct(14)=6.35833333e-05, null(14)=0, distinct(15)=6.35833333e-05, null(15)=0, distinct(7,15)=6.35833333e-05, null(7,15)=0] │ │ histogram(15)= │ ├── key: (8) │ ├── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14) │ └── scan cardsinfo@cardsinfoversionindex │ ├── columns: dealerid:7!null cardid:8!null version:15!null │ ├── constraint: /7/15: (/1/1584421773604892000.0000000000 - /1] - │ ├── stats: [rows=5.83333333e-06, distinct(7)=5.83333333e-06, null(7)=0, distinct(15)=5.83333333e-06, null(15)=0] + │ ├── stats: [rows=6.35833333e-05, distinct(7)=6.35833333e-05, null(7)=0, distinct(15)=6.35833333e-05, null(15)=0, distinct(7,15)=6.35833333e-05, null(7,15)=0] │ │ histogram(15)= │ ├── key: (8) │ └── fd: ()-->(7), (8)-->(15), (15)-->(8) @@ -703,21 +703,21 @@ project │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null sellprice:6!null buyprice:7!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null │ ├── key columns: [20 21 3] = [11 12 13] │ ├── lookup columns are key - │ ├── stats: [rows=157.894737, distinct(3)=157.894737, null(3)=0, distinct(13)=157.894737, null(13)=0] + │ ├── stats: [rows=478.646617, distinct(3)=478.646617, null(3)=0, distinct(13)=478.646617, null(13)=0] │ ├── key: (5,13) │ ├── fd: ()-->(1,2,4,11,12), (3,5)-->(6,7), (13)-->(14,15), (3)==(13), (13)==(3) │ ├── ordering: -(3|13) opt(1,2,4,11,12) [actual: -3] │ ├── limit hint: 20.00 │ ├── project │ │ ├── columns: "project_const_col_@11":20!null "project_const_col_@12":21!null transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null sellprice:6!null buyprice:7!null - │ │ ├── stats: [rows=157.894737] + │ │ ├── stats: [rows=478.646617] │ │ ├── key: (3,5) │ │ ├── fd: ()-->(1,2,4,20,21), (3,5)-->(6,7) │ │ ├── ordering: -3 opt(1,2,4) [actual: -3] │ │ ├── limit hint: 100.00 │ │ ├── index-join transactiondetails │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null sellprice:6!null buyprice:7!null - │ │ │ ├── stats: [rows=157.894737, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=157.894737, null(3)=0, distinct(4)=1, null(4)=0, distinct(5)=157.894114, null(5)=0, distinct(6)=157.894114, null(6)=0, distinct(7)=157.894114, null(7)=0] + │ │ │ ├── stats: [rows=478.646617, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=478.646617, null(3)=0, distinct(4)=1, null(4)=0, distinct(5)=478.640889, null(5)=0, distinct(6)=478.640889, null(6)=0, distinct(7)=478.640889, null(7)=0, distinct(1,2,4)=1, null(1,2,4)=0] │ │ │ ├── key: (3,5) │ │ │ ├── fd: ()-->(1,2,4), (3,5)-->(6,7) │ │ │ ├── ordering: -3 opt(1,2,4) [actual: -3] @@ -725,7 +725,7 @@ project │ │ │ └── scan transactiondetails@detailscardidindex,rev │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null │ │ │ ├── constraint: /1/2/4/3/5: [/1/false/21953 - /1/false/21953] - │ │ │ ├── stats: [rows=157.894737, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=1, null(4)=0] + │ │ │ ├── stats: [rows=478.646617, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=1, null(4)=0, distinct(1,2,4)=1, null(1,2,4)=0] │ │ │ ├── key: (3,5) │ │ │ ├── fd: ()-->(1,2,4) │ │ │ ├── ordering: -3 opt(1,2,4) [actual: -3] @@ -766,9 +766,10 @@ project # 1. The TransactionDate comparisons should be the last 2 days from the # current timestamp. However, the current timestamp is not treated as a # constant as it should be. -# 2. Missing rule to push "LIMIT 50" into GroupBy->LeftJoin complex. This +# 2. Missing rule to push "LIMIT 50" into GroupBy->RightJoin complex. This # would need to be an exploration rule since it involves an ordering. -# Or we could push down the "limit hint" into GroupBy->LeftJoin. +# Or we could push down the "limit hint" into GroupBy->RightJoin (and +# further into the InnerJoin). # 3. Wrong join-type (probably due to #3 above). Should be LookupJoin. # opt format=show-stats @@ -822,13 +823,13 @@ project │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(1-6,9-15,30), (15)-->(8-14), (1)==(8), (8)==(1) │ │ ├── right-join (hash) │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null cardsinfo.discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null transactiondetails.dealerid:20 isbuy:21 transactiondate:22 transactiondetails.cardid:23 quantity:24 - │ │ │ ├── stats: [rows=519622.136, distinct(8)=19000, null(8)=0, distinct(23)=19000, null(23)=0] + │ │ │ ├── stats: [rows=5523583.18, distinct(8)=19000, null(8)=0, distinct(23)=19000, null(23)=0] │ │ │ ├── key: (8,22-24) │ │ │ ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1), (8,22-24)-->(20,21) │ │ │ ├── scan transactiondetails │ │ │ │ ├── columns: transactiondetails.dealerid:20!null isbuy:21!null transactiondate:22!null transactiondetails.cardid:23!null quantity:24!null │ │ │ │ ├── constraint: /20/21/22/23/24: [/1/false/'2020-02-28 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] - │ │ │ │ ├── stats: [rows=1000000, distinct(20)=1, null(20)=0, distinct(21)=1, null(21)=0, distinct(22)=1000000, null(22)=0, distinct(23)=56999.9987, null(23)=0] + │ │ │ │ ├── stats: [rows=10630000, distinct(20)=1, null(20)=0, distinct(21)=1, null(21)=0, distinct(22)=10630000, null(22)=0, distinct(23)=57000, null(23)=0, distinct(20,21)=1, null(20,21)=0, distinct(20-22)=10630000, null(20-22)=0] │ │ │ │ ├── key: (22-24) │ │ │ │ └── fd: ()-->(20,21) │ │ │ ├── inner-join (merge) @@ -906,6 +907,10 @@ project # 3. The Date comparisons should be the last 7 days from the current # timestamp. However, the current timestamp is not treated as a constant as # it should be. +# 4. The row count estimate for the constrained scan of transactions is too +# large, causing us to choose the incorrect join algorithm (we should +# choose a lookup join). Collecting small histograms on all columns would +# fix the issue. # opt format=show-stats SELECT @@ -925,86 +930,84 @@ WHERE GROUP BY extract(day from d.TransactionDate) ORDER BY extract(day from d.TransactionDate) ---- -group-by +sort ├── columns: extract:45 totalsell:40!null totalbuy:42!null totalprofit:44!null - ├── grouping columns: column45:45 - ├── stats: [rows=12345.679, distinct(45)=12345.679, null(45)=0] + ├── stats: [rows=1171234.57, distinct(45)=1171234.57, null(45)=0] ├── key: (45) ├── fd: (45)-->(40,42,44) ├── ordering: +45 - ├── sort - │ ├── columns: column39:39!null column41:41!null column43:43!null column45:45 - │ ├── stats: [rows=12634.4671, distinct(45)=12345.679, null(45)=0] - │ ├── ordering: +45 - │ └── project - │ ├── columns: column39:39!null column41:41!null column43:43!null column45:45 - │ ├── stats: [rows=12634.4671, distinct(45)=12345.679, null(45)=0] - │ ├── inner-join (hash) - │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null id:20!null cardsinfo.dealerid:26!null cardsinfo.cardid:27!null - │ │ ├── stats: [rows=12634.4671, distinct(3)=12345.679, null(3)=0, distinct(4)=12634.4671, null(4)=0, distinct(13)=12345.679, null(13)=0, distinct(20)=12634.4671, null(20)=0] - │ │ ├── key: (5,13,27) - │ │ ├── fd: ()-->(1,2,11,12,26), (3-5)-->(6,7), (13)-->(14,15), (20)==(4,27), (27)==(4,20), (3)==(13), (13)==(3), (4)==(20,27) - │ │ ├── scan cardsinfo@cardsinfoversionindex - │ │ │ ├── columns: cardsinfo.dealerid:26!null cardsinfo.cardid:27!null - │ │ │ ├── constraint: /26/34: [/1 - /1] - │ │ │ ├── stats: [rows=58333.3333, distinct(26)=1, null(26)=0, distinct(27)=37420.3552, null(27)=0] - │ │ │ ├── key: (27) - │ │ │ └── fd: ()-->(26) - │ │ ├── inner-join (hash) - │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null id:20!null - │ │ │ ├── stats: [rows=12345.679, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=12345.679, null(3)=0, distinct(4)=12345.679, null(4)=0, distinct(5)=12267.872, null(5)=0, distinct(6)=12267.872, null(6)=0, distinct(7)=12267.872, null(7)=0, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=12345.679, null(13)=0, distinct(14)=7803.95639, null(14)=0, distinct(15)=7803.95639, null(15)=0, distinct(20)=12345.679, null(20)=0] - │ │ │ ├── key: (5,13,20) - │ │ │ ├── fd: ()-->(1,2,11,12), (13)-->(14,15), (3-5)-->(6,7), (3)==(13), (13)==(3), (4)==(20), (20)==(4) - │ │ │ ├── scan cards@cardsnamesetnumber - │ │ │ │ ├── columns: id:20!null - │ │ │ │ ├── stats: [rows=57000, distinct(20)=57000, null(20)=0] - │ │ │ │ └── key: (20) - │ │ │ ├── inner-join (lookup transactiondetails) - │ │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null - │ │ │ │ ├── key columns: [100 101 13] = [1 2 3] - │ │ │ │ ├── stats: [rows=12345.679, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=12345.679, null(3)=0, distinct(4)=11100.2211, null(4)=0, distinct(5)=12267.8812, null(5)=0, distinct(6)=12267.8812, null(6)=0, distinct(7)=12267.8812, null(7)=0, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=12345.679, null(13)=0, distinct(14)=7803.95979, null(14)=0, distinct(15)=7803.95979, null(15)=0] - │ │ │ │ ├── key: (4,5,13) - │ │ │ │ ├── fd: ()-->(1,2,11,12), (13)-->(14,15), (3-5)-->(6,7), (3)==(13), (13)==(3) - │ │ │ │ ├── project - │ │ │ │ │ ├── columns: "project_const_col_@1":100!null "project_const_col_@2":101!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null - │ │ │ │ │ ├── stats: [rows=12345.679, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=12345.679, null(13)=0, distinct(14)=12345.679, null(14)=0, distinct(15)=12345.679, null(15)=0, distinct(100)=1, null(100)=0, distinct(101)=1, null(101)=0] - │ │ │ │ │ ├── key: (13) - │ │ │ │ │ ├── fd: ()-->(11,12,100,101), (13)-->(14,15) - │ │ │ │ │ ├── select - │ │ │ │ │ │ ├── columns: transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null - │ │ │ │ │ │ ├── stats: [rows=12345.679, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=12345.679, null(13)=0, distinct(14)=12345.679, null(14)=0, distinct(15)=12345.679, null(15)=0] - │ │ │ │ │ │ ├── key: (13) - │ │ │ │ │ │ ├── fd: ()-->(11,12), (13)-->(14,15) - │ │ │ │ │ │ ├── scan transactions - │ │ │ │ │ │ │ ├── columns: transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null - │ │ │ │ │ │ │ ├── constraint: /11/12/13: [/1/false/'2020-02-23 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ │ │ ├── stats: [rows=111111.111, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=111111.111, null(13)=0] - │ │ │ │ │ │ │ ├── key: (13) - │ │ │ │ │ │ │ └── fd: ()-->(11,12), (13)-->(14,15) - │ │ │ │ │ │ └── filters - │ │ │ │ │ │ ├── accountname:14 != 'someaccount' [outer=(14), constraints=(/14: (/NULL - /'someaccount') [/e'someaccount\x00' - ]; tight)] - │ │ │ │ │ │ └── customername:15 != 'somecustomer' [outer=(15), constraints=(/15: (/NULL - /'somecustomer') [/e'somecustomer\x00' - ]; tight)] - │ │ │ │ │ └── projections - │ │ │ │ │ ├── 1 [as="project_const_col_@1":100] - │ │ │ │ │ └── false [as="project_const_col_@2":101] - │ │ │ │ └── filters - │ │ │ │ └── (transactiondate:3 >= '2020-02-23 00:00:00+00:00') AND (transactiondate:3 <= '2020-03-01 00:00:00+00:00') [outer=(3), constraints=(/3: [/'2020-02-23 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] - │ │ │ └── filters - │ │ │ └── id:20 = transactiondetails.cardid:4 [outer=(4,20), constraints=(/4: (/NULL - ]; /20: (/NULL - ]), fd=(4)==(20), (20)==(4)] - │ │ └── filters - │ │ └── id:20 = cardsinfo.cardid:27 [outer=(20,27), constraints=(/20: (/NULL - ]; /27: (/NULL - ]), fd=(20)==(27), (27)==(20)] - │ └── projections - │ ├── transactiondetails.sellprice:6 * quantity:5 [as=column39:39, outer=(5,6)] - │ ├── transactiondetails.buyprice:7 * quantity:5 [as=column41:41, outer=(5,7)] - │ ├── quantity:5 * (transactiondetails.sellprice:6 - transactiondetails.buyprice:7) [as=column43:43, outer=(5-7)] - │ └── extract('day', transactiondate:3) [as=column45:45, outer=(3)] - └── aggregations - ├── sum [as=sum:40, outer=(39)] - │ └── column39:39 - ├── sum [as=sum:42, outer=(41)] - │ └── column41:41 - └── sum [as=sum:44, outer=(43)] - └── column43:43 + └── group-by + ├── columns: sum:40!null sum:42!null sum:44!null column45:45 + ├── grouping columns: column45:45 + ├── stats: [rows=1171234.57, distinct(45)=1171234.57, null(45)=0] + ├── key: (45) + ├── fd: (45)-->(40,42,44) + ├── project + │ ├── columns: column39:39!null column41:41!null column43:43!null column45:45 + │ ├── stats: [rows=1198631.87, distinct(45)=1171234.57, null(45)=0] + │ ├── inner-join (hash) + │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null id:20!null cardsinfo.dealerid:26!null cardsinfo.cardid:27!null + │ │ ├── stats: [rows=1198631.87, distinct(3)=1171234.57, null(3)=0, distinct(4)=37420.3552, null(4)=0, distinct(13)=1171234.57, null(13)=0, distinct(20)=37420.3552, null(20)=0] + │ │ ├── key: (5,13,27) + │ │ ├── fd: ()-->(1,2,11,12,26), (3-5)-->(6,7), (13)-->(14,15), (20)==(4,27), (27)==(4,20), (3)==(13), (13)==(3), (4)==(20,27) + │ │ ├── inner-join (hash) + │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null + │ │ │ ├── stats: [rows=1171234.57, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=1171234.57, null(3)=0, distinct(4)=56999.9999, null(4)=0, distinct(5)=1091498.71, null(5)=0, distinct(6)=1091498.71, null(6)=0, distinct(7)=1091498.71, null(7)=0, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=1171234.57, null(13)=0, distinct(14)=551608.449, null(14)=0, distinct(15)=551608.449, null(15)=0] + │ │ │ ├── key: (4,5,13) + │ │ │ ├── fd: ()-->(1,2,11,12), (13)-->(14,15), (3-5)-->(6,7), (3)==(13), (13)==(3) + │ │ │ ├── scan transactiondetails + │ │ │ │ ├── columns: transactiondetails.dealerid:1!null transactiondetails.isbuy:2!null transactiondate:3!null transactiondetails.cardid:4!null quantity:5!null transactiondetails.sellprice:6!null transactiondetails.buyprice:7!null + │ │ │ │ ├── constraint: /1/2/3/4/5: [/1/false/'2020-02-23 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] + │ │ │ │ ├── stats: [rows=10630000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=10630000, null(3)=0, distinct(4)=57000, null(4)=0, distinct(5)=8207077.23, null(5)=0, distinct(6)=8207077.23, null(6)=0, distinct(7)=8207077.23, null(7)=0, distinct(1,2)=1, null(1,2)=0, distinct(1-3)=10630000, null(1-3)=0] + │ │ │ │ ├── key: (3-5) + │ │ │ │ └── fd: ()-->(1,2), (3-5)-->(6,7) + │ │ │ ├── select + │ │ │ │ ├── columns: transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null + │ │ │ │ ├── stats: [rows=1171234.57, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=1171234.57, null(13)=0, distinct(14)=666666.667, null(14)=0, distinct(15)=666666.667, null(15)=0, distinct(11,12)=1, null(11,12)=0, distinct(11-15)=1171234.57, null(11-15)=0] + │ │ │ │ ├── key: (13) + │ │ │ │ ├── fd: ()-->(11,12), (13)-->(14,15) + │ │ │ │ ├── scan transactions + │ │ │ │ │ ├── columns: transactions.dealerid:11!null transactions.isbuy:12!null date:13!null accountname:14!null customername:15!null + │ │ │ │ │ ├── constraint: /11/12/13: [/1/false/'2020-02-23 00:00:00+00:00' - /1/false/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── stats: [rows=1181111.11, distinct(11)=1, null(11)=0, distinct(12)=1, null(12)=0, distinct(13)=1181111.11, null(13)=0, distinct(11,12)=1, null(11,12)=0, distinct(11-13)=1181111.11, null(11-13)=0] + │ │ │ │ │ ├── key: (13) + │ │ │ │ │ └── fd: ()-->(11,12), (13)-->(14,15) + │ │ │ │ └── filters + │ │ │ │ ├── accountname:14 != 'someaccount' [outer=(14), constraints=(/14: (/NULL - /'someaccount') [/e'someaccount\x00' - ]; tight)] + │ │ │ │ └── customername:15 != 'somecustomer' [outer=(15), constraints=(/15: (/NULL - /'somecustomer') [/e'somecustomer\x00' - ]; tight)] + │ │ │ └── filters + │ │ │ └── transactiondate:3 = date:13 [outer=(3,13), constraints=(/3: (/NULL - ]; /13: (/NULL - ]), fd=(3)==(13), (13)==(3)] + │ │ ├── inner-join (hash) + │ │ │ ├── columns: id:20!null cardsinfo.dealerid:26!null cardsinfo.cardid:27!null + │ │ │ ├── stats: [rows=58333.3333, distinct(20)=37420.3552, null(20)=0, distinct(26)=1, null(26)=0, distinct(27)=37420.3552, null(27)=0] + │ │ │ ├── key: (27) + │ │ │ ├── fd: ()-->(26), (20)==(27), (27)==(20) + │ │ │ ├── scan cardsinfo@cardsinfoversionindex + │ │ │ │ ├── columns: cardsinfo.dealerid:26!null cardsinfo.cardid:27!null + │ │ │ │ ├── constraint: /26/34: [/1 - /1] + │ │ │ │ ├── stats: [rows=58333.3333, distinct(26)=1, null(26)=0, distinct(27)=37420.3552, null(27)=0] + │ │ │ │ ├── key: (27) + │ │ │ │ └── fd: ()-->(26) + │ │ │ ├── scan cards@cardsnamesetnumber + │ │ │ │ ├── columns: id:20!null + │ │ │ │ ├── stats: [rows=57000, distinct(20)=57000, null(20)=0] + │ │ │ │ └── key: (20) + │ │ │ └── filters + │ │ │ └── id:20 = cardsinfo.cardid:27 [outer=(20,27), constraints=(/20: (/NULL - ]; /27: (/NULL - ]), fd=(20)==(27), (27)==(20)] + │ │ └── filters + │ │ └── id:20 = transactiondetails.cardid:4 [outer=(4,20), constraints=(/4: (/NULL - ]; /20: (/NULL - ]), fd=(4)==(20), (20)==(4)] + │ └── projections + │ ├── transactiondetails.sellprice:6 * quantity:5 [as=column39:39, outer=(5,6)] + │ ├── transactiondetails.buyprice:7 * quantity:5 [as=column41:41, outer=(5,7)] + │ ├── quantity:5 * (transactiondetails.sellprice:6 - transactiondetails.buyprice:7) [as=column43:43, outer=(5-7)] + │ └── extract('day', transactiondate:3) [as=column45:45, outer=(3)] + └── aggregations + ├── sum [as=sum:40, outer=(39)] + │ └── column39:39 + ├── sum [as=sum:42, outer=(41)] + │ └── column41:41 + └── sum [as=sum:44, outer=(43)] + └── column43:43 # Check if transaction was already inserted, for idempotency. # @@ -1156,37 +1159,39 @@ values │ │ ├── cardinality: [0 - 100] │ │ ├── key: (5,11-13) │ │ ├── fd: ()-->(4), (1)==(11), (11)==(1), (2)==(12), (12)==(2), (3)==(13), (13)==(3) - │ │ ├── sort + │ │ ├── inner-join (lookup transactions) │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null t.dealerid:11!null t.isbuy:12!null date:13!null + │ │ │ ├── key columns: [1 2 3] = [11 12 13] + │ │ │ ├── lookup columns are key │ │ │ ├── key: (5,11-13) │ │ │ ├── fd: ()-->(4), (1)==(11), (11)==(1), (2)==(12), (12)==(2), (3)==(13), (13)==(3) │ │ │ ├── ordering: -(3|13) opt(4) [actual: -3] │ │ │ ├── limit hint: 100.00 - │ │ │ └── inner-join (lookup transactions) - │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null t.dealerid:11!null t.isbuy:12!null date:13!null - │ │ │ ├── key columns: [1 2 3] = [11 12 13] - │ │ │ ├── lookup columns are key - │ │ │ ├── key: (5,11-13) - │ │ │ ├── fd: ()-->(4), (1)==(11), (11)==(1), (2)==(12), (12)==(2), (3)==(13), (13)==(3) - │ │ │ ├── scan d@detailscardidindex - │ │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null - │ │ │ │ ├── constraint: /1/2/4/3/5 - │ │ │ │ │ ├── [/1/false/19483/'2020-02-28 00:00:00+00:00' - /1/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/1/true/19483/'2020-02-28 00:00:00+00:00' - /1/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/2/false/19483/'2020-02-28 00:00:00+00:00' - /2/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/2/true/19483/'2020-02-28 00:00:00+00:00' - /2/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/3/false/19483/'2020-02-28 00:00:00+00:00' - /3/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/3/true/19483/'2020-02-28 00:00:00+00:00' - /3/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/4/false/19483/'2020-02-28 00:00:00+00:00' - /4/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/4/true/19483/'2020-02-28 00:00:00+00:00' - /4/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ ├── [/5/false/19483/'2020-02-28 00:00:00+00:00' - /5/false/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ │ └── [/5/true/19483/'2020-02-28 00:00:00+00:00' - /5/true/19483/'2020-03-01 00:00:00+00:00'] - │ │ │ │ ├── key: (1-3,5) - │ │ │ │ └── fd: ()-->(4) - │ │ │ └── filters - │ │ │ ├── (date:13 >= '2020-02-28 00:00:00+00:00') AND (date:13 <= '2020-03-01 00:00:00+00:00') [outer=(13), constraints=(/13: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] - │ │ │ ├── ((((t.dealerid:11 = 1) OR (t.dealerid:11 = 2)) OR (t.dealerid:11 = 3)) OR (t.dealerid:11 = 4)) OR (t.dealerid:11 = 5) [outer=(11), constraints=(/11: [/1 - /1] [/2 - /2] [/3 - /3] [/4 - /4] [/5 - /5]; tight)] - │ │ │ └── t.isbuy:12 IN (false, true) [outer=(12), constraints=(/12: [/false - /false] [/true - /true]; tight)] + │ │ │ ├── sort + │ │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null + │ │ │ │ ├── key: (1-3,5) + │ │ │ │ ├── fd: ()-->(4) + │ │ │ │ ├── ordering: -3 opt(4) [actual: -3] + │ │ │ │ ├── limit hint: 1100.00 + │ │ │ │ └── scan d@detailscardidindex + │ │ │ │ ├── columns: d.dealerid:1!null d.isbuy:2!null transactiondate:3!null cardid:4!null quantity:5!null + │ │ │ │ ├── constraint: /1/2/4/3/5 + │ │ │ │ │ ├── [/1/false/19483/'2020-02-28 00:00:00+00:00' - /1/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/1/true/19483/'2020-02-28 00:00:00+00:00' - /1/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/2/false/19483/'2020-02-28 00:00:00+00:00' - /2/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/2/true/19483/'2020-02-28 00:00:00+00:00' - /2/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/3/false/19483/'2020-02-28 00:00:00+00:00' - /3/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/3/true/19483/'2020-02-28 00:00:00+00:00' - /3/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/4/false/19483/'2020-02-28 00:00:00+00:00' - /4/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/4/true/19483/'2020-02-28 00:00:00+00:00' - /4/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ ├── [/5/false/19483/'2020-02-28 00:00:00+00:00' - /5/false/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ │ └── [/5/true/19483/'2020-02-28 00:00:00+00:00' - /5/true/19483/'2020-03-01 00:00:00+00:00'] + │ │ │ │ ├── key: (1-3,5) + │ │ │ │ └── fd: ()-->(4) + │ │ │ └── filters + │ │ │ ├── (date:13 >= '2020-02-28 00:00:00+00:00') AND (date:13 <= '2020-03-01 00:00:00+00:00') [outer=(13), constraints=(/13: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] + │ │ │ ├── ((((t.dealerid:11 = 1) OR (t.dealerid:11 = 2)) OR (t.dealerid:11 = 3)) OR (t.dealerid:11 = 4)) OR (t.dealerid:11 = 5) [outer=(11), constraints=(/11: [/1 - /1] [/2 - /2] [/3 - /3] [/4 - /4] [/5 - /5]; tight)] + │ │ │ └── t.isbuy:12 IN (false, true) [outer=(12), constraints=(/12: [/false - /false] [/true - /true]; tight)] │ │ └── 100 │ └── aggregations │ └── sum [as=sum:20, outer=(5)] diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index f8e34a81e85b..49d2e1511d6a 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -1784,7 +1784,7 @@ memo (optimized, ~13KB, required=[presentation: q:2,r:3]) ├── G1: (select G2 G3) (zigzag-join G3 pqr@q pqr@r) (select G4 G5) (select G6 G7) (select G8 G7) │ └── [presentation: q:2,r:3] │ ├── best: (zigzag-join G3 pqr@q pqr@r) - │ └── cost: 0.24 + │ └── cost: 1.92 ├── G2: (scan pqr,cols=(2,3)) │ └── [] │ ├── best: (scan pqr,cols=(2,3)) @@ -1865,7 +1865,7 @@ memo (optimized, ~16KB, required=[presentation: q:2,r:3,s:4]) ├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) (select G6 G7) (select G8 G9) (select G10 G9) │ └── [presentation: q:2,r:3,s:4] │ ├── best: (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) - │ └── cost: 0.86 + │ └── cost: 7.46 ├── G2: (scan pqr,cols=(2-4)) │ └── [] │ ├── best: (scan pqr,cols=(2-4)) @@ -1874,7 +1874,7 @@ memo (optimized, ~16KB, required=[presentation: q:2,r:3,s:4]) ├── G4: (zigzag-join G3 pqr@q pqr@r) │ └── [] │ ├── best: (zigzag-join G3 pqr@q pqr@r) - │ └── cost: 0.24 + │ └── cost: 1.93 ├── G5: (filters) ├── G6: (index-join G13 pqr,cols=(2-4)) │ └── [] @@ -1930,7 +1930,7 @@ memo (optimized, ~11KB, required=[presentation: q:2,s:4]) ├── G1: (select G2 G3) (zigzag-join G3 pqr@q pqr@s) (select G4 G5) (select G6 G7) │ └── [presentation: q:2,s:4] │ ├── best: (zigzag-join G3 pqr@q pqr@s) - │ └── cost: 0.24 + │ └── cost: 1.93 ├── G2: (scan pqr,cols=(2,4)) │ └── [] │ ├── best: (scan pqr,cols=(2,4)) @@ -1984,7 +1984,7 @@ memo (optimized, ~13KB, required=[presentation: r:3,t:5]) ├── G1: (select G2 G3) (zigzag-join G3 pqr@rs pqr@ts) (select G4 G5) (select G6 G5) (select G7 G8) │ └── [presentation: r:3,t:5] │ ├── best: (zigzag-join G3 pqr@rs pqr@ts) - │ └── cost: 0.24 + │ └── cost: 1.94 ├── G2: (scan pqr,cols=(3,5)) │ └── [] │ ├── best: (scan pqr,cols=(3,5)) @@ -2075,7 +2075,7 @@ memo (optimized, ~32KB, required=[presentation: p:1,q:2,r:3,s:4]) ├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(1-4)) (zigzag-join G3 pqr@q pqr@s) (zigzag-join G3 pqr@q pqr@rs) (lookup-join G6 G7 pqr,keyCols=[1],outCols=(1-4)) (select G8 G9) (select G10 G11) (select G12 G7) (select G13 G7) │ └── [presentation: p:1,q:2,r:3,s:4] │ ├── best: (zigzag-join G3 pqr@q pqr@s) - │ └── cost: 0.04 + │ └── cost: 1.94 ├── G2: (scan pqr,cols=(1-4)) │ └── [] │ ├── best: (scan pqr,cols=(1-4)) @@ -2084,12 +2084,12 @@ memo (optimized, ~32KB, required=[presentation: p:1,q:2,r:3,s:4]) ├── G4: (zigzag-join G17 pqr@q pqr@r) │ └── [] │ ├── best: (zigzag-join G17 pqr@q pqr@r) - │ └── cost: 0.24 + │ └── cost: 1.93 ├── G5: (filters G16) ├── G6: (zigzag-join G9 pqr@r pqr@s) │ └── [] │ ├── best: (zigzag-join G9 pqr@r pqr@s) - │ └── cost: 0.24 + │ └── cost: 1.94 ├── G7: (filters G14) ├── G8: (index-join G18 pqr,cols=(1-4)) │ └── [] @@ -2108,7 +2108,7 @@ memo (optimized, ~32KB, required=[presentation: p:1,q:2,r:3,s:4]) ├── G13: (index-join G21 pqr,cols=(1-4)) │ └── [] │ ├── best: (index-join G21 pqr,cols=(1-4)) - │ └── cost: 0.54 + │ └── cost: 4.72 ├── G14: (eq G22 G23) ├── G15: (eq G24 G23) ├── G16: (eq G25 G26) @@ -2128,7 +2128,7 @@ memo (optimized, ~32KB, required=[presentation: p:1,q:2,r:3,s:4]) ├── G21: (scan pqr@rs,cols=(1,3,4),constrained) │ └── [] │ ├── best: (scan pqr@rs,cols=(1,3,4),constrained) - │ └── cost: 0.12 + │ └── cost: 0.98 ├── G22: (variable q) ├── G23: (const 1) ├── G24: (variable r) diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index c0aa6297848a..54a8d1c96ea7 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -266,7 +266,7 @@ memo (optimized, ~6KB, required=[presentation: k:1]) ├── G1: (project G2 G3 k) │ └── [presentation: k:1] │ ├── best: (project G2 G3 k) - │ └── cost: 1.10 + │ └── cost: 1.11 ├── G2: (select G4 G5) (select G6 G7) (select G8 G9) │ └── [] │ ├── best: (select G8 G9)