Skip to content

Commit

Permalink
DRILL-7245: Cap NDV at row count after applying filters
Browse files Browse the repository at this point in the history
  • Loading branch information
Gautam Parai authored and lushuifeng committed Jun 21, 2019
1 parent 50a6635 commit 13d79ce
Showing 1 changed file with 6 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public Double getDistinctRowCount(RelNode rel, RelMetadataQuery mq, ImmutableBit
*/
private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq, DrillTable table,
ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
double selectivity, rowCount;
double selectivity, gbyColPredSel, rowCount;
/* If predicate is present, determine its selectivity to estimate filtered rows.
* Thereafter, compute the number of distinct rows.
*/
Expand Down Expand Up @@ -172,16 +172,17 @@ private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq,
break;
}
estRowCnt *= ndv;
selectivity = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
gbyColPredSel = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
/* If predicate is on group-by column, scale down the NDV by selectivity. Consider the query
* select a, b from t where a = 10 group by a, b. Here, NDV(a) will be scaled down by SEL(a)
* whereas NDV(b) will not.
*/
if (selectivity > 0) {
estRowCnt *= selectivity;
if (gbyColPredSel > 0) {
estRowCnt *= gbyColPredSel;
}
}
estRowCnt = Math.min(estRowCnt, rowCount);
// Estimated NDV should not exceed number of rows after applying the filters
estRowCnt = Math.min(estRowCnt, selectivity*rowCount);
if (!allColsHaveNDV) {
if (logger.isDebugEnabled()) {
logger.debug(String.format("NDV not available for %s(%s). Using default rowcount for group-by %s",
Expand Down

0 comments on commit 13d79ce

Please sign in to comment.