Skip to content

Commit

Permalink
planner, stats: overflow estimation may lead to wrong join reorder (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
winoros authored Oct 22, 2024
1 parent d011ecd commit eebdcfe
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 5 deletions.
21 changes: 17 additions & 4 deletions pkg/planner/core/rule_join_reorder_greedy.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/util/intest"
)

type joinReorderGreedySolver struct {
Expand Down Expand Up @@ -97,9 +98,9 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorder
s.curJoinGroup = s.curJoinGroup[1:]
for {
bestCost := math.MaxFloat64
bestIdx := -1
var finalRemainOthers []expression.Expression
var bestJoin base.LogicalPlan
bestIdx, whateverValidOneIdx := -1, -1
var finalRemainOthers, remainOthersOfWhateverValidOne []expression.Expression
var bestJoin, whateverValidOne base.LogicalPlan
for i, node := range s.curJoinGroup {
newJoin, remainOthers := s.checkConnectionAndMakeJoin(curJoinTree.p, node.p)
if newJoin == nil {
Expand All @@ -109,6 +110,9 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorder
if err != nil {
return nil, err
}
whateverValidOne = newJoin
whateverValidOneIdx = i
remainOthersOfWhateverValidOne = remainOthers
curCost := s.calcJoinCumCost(newJoin, curJoinTree, node)
tracer.appendLogicalJoinCost(newJoin, curCost)
if bestCost > curCost {
Expand All @@ -120,7 +124,16 @@ func (s *joinReorderGreedySolver) constructConnectedJoinTree(tracer *joinReorder
}
// If we could find more join node, meaning that the sub connected graph have been totally explored.
if bestJoin == nil {
break
if whateverValidOne == nil {
break
}
// This branch is for the unexpected case.
// We throw assertion in test env. And create a valid join to avoid wrong result in the production env.
intest.Assert(false, "Join reorder should find one valid join but failed.")
bestJoin = whateverValidOne
bestCost = math.MaxFloat64
bestIdx = whateverValidOneIdx
finalRemainOthers = remainOthersOfWhateverValidOne
}
curJoinTree = &jrNode{
p: bestJoin,
Expand Down
6 changes: 5 additions & 1 deletion pkg/statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type Histogram struct {
// For some types like `Int`, we do not build it because we can get them directly from `Bounds`.
Scalars []scalar
ID int64 // Column ID.
NDV int64 // Number of distinct values.
NDV int64 // Number of distinct values. Note that It contains the NDV of the TopN which is excluded from histogram.
NullCount int64 // Number of null values.
// LastUpdateVersion is the version that this histogram updated last time.
LastUpdateVersion uint64
Expand Down Expand Up @@ -980,6 +980,10 @@ func (hg *Histogram) OutOfRangeRowCount(
if histWidth <= 0 {
return 0
}
if math.IsInf(histWidth, 1) {
// The histogram is too wide. As a quick fix, we return 0 to indicate that the overlap percentage is near 0.
return 0
}
boundL := histL - histWidth
boundR := histR + histWidth

Expand Down
61 changes: 61 additions & 0 deletions tests/integrationtest/r/statistics/overflow_calc.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
insert into lrr_test (col102) values(-1.704648925036604e308), (-1.6888619680353582e308), (-1.6685908644498436e308), (-1.6311134967437805e308), (-1.6128280680807152e308), (-1.5899713947158026e308), (-1.5709457594070477e308), (-1.4925714566991343e308), (-1.4705985087370154e308), (-1.4451316666300040e308), (-1.3946576985986583e308), (-1.3695679630646804e308), (-1.3208992137984086e308), (-1.2887981369134862e308), (-1.2119996449796167e308), (-1.195172956104992e308), (-1.1929781068369925e308), (-1.1746351299417647e308), (-1.1237012620945195e308), (-1.1223448185004882e308), (-1.0974439629672084e308), (-1.0657654808610821e308), (-1.0582598945271716e308), (-1.0565276887850733e308), (-1.0416104832981696e308), (-1.0368741532690337e308), (-1.033521479407133e308), (-1.0232269544119505e308), (-9.31943312515408e307), (-9.05107332838438e307), (-8.276443475796885e307), (-7.845086666145396e307), (-7.664543340054255e307), (-7.235369799352141e307), (-7.047280050755922e307), (-6.62205033356235e307), (-6.35964999739255e307), (-5.989391229038818e307), (-5.974526205854541e307), (-5.798684586589338e307), (-4.98047732376121e307), (-4.4623979626128605e307), (-4.3248436443381234e307), (-3.3391152928792773e307), (-3.2694282487729395e307), (-3.2461091065368577e307), (-2.8613054009714654e307), (-2.7176814604572905e307), (-2.1301127705458223e307), (-1.7280065154718344e307), (-1.6743061442642827e307), (-4.862812928655648e306), (-3.3262533560429795e305), (4.124952267435051e305), (5.4576487694211726e306), (1.1237742400537221e307), (1.569984332645614e307), (1.7966188405412235e307), (1.8619233341238355e307), (2.1152066540419881e307), (2.1764927570795164e307), (2.99416682762135e307), (3.0545414962788647e307), (3.262967770716021e307), (3.288944887183685e307), (4.9025219351381e307), (5.250864486081297e307), (5.52054372134351e307), (6.311436996747818e307), (6.870852232080436e307), (7.501871137935436e307), (7.925709054822421e307), (8.438195254661318e307), (8.446731596918706e307), (9.43580947190119e307), (9.66735866233596e307), (1.0022043827847664e308), (1.020869767928594e308), (1.0327408606815872e308), (1.0402383684235906e308), (1.0690255622829305e308), (1.1623306052784659e308), (1.1906116361044565e308), (1.2221839628780758e308), (1.3112927565356536e308), (1.3307364382402157e308), (1.3646958839720612e308), (1.425066345632827e308), (1.4433864261103511e308), (1.5038532858735658e308), (1.5079450808097928e308), (1.553628680980576e308), (1.6241456663280369e308), (1.6295729949930798e308), (1.6328703529666413e308), (1.6832354056195887e308), (1.7017315016390902e308), (1.7134206410400048e308), (1.7240829054261275e308), (1.7257738639648862e308), (1.7262297095455299e308), (1.7905151735809062e308);
analyze table lrr_test;
select t1.col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0 order by t1.col1;
col1 col1
4.124952267435051e305 4.124952267435051e305
5.4576487694211726e306 5.4576487694211726e306
1.1237742400537221e307 1.1237742400537221e307
1.569984332645614e307 1.569984332645614e307
1.7966188405412235e307 1.7966188405412235e307
1.8619233341238355e307 1.8619233341238355e307
2.1152066540419881e307 2.1152066540419881e307
2.1764927570795164e307 2.1764927570795164e307
2.99416682762135e307 2.99416682762135e307
3.0545414962788647e307 3.0545414962788647e307
3.262967770716021e307 3.262967770716021e307
3.288944887183685e307 3.288944887183685e307
4.9025219351381e307 4.9025219351381e307
5.250864486081297e307 5.250864486081297e307
5.52054372134351e307 5.52054372134351e307
6.311436996747818e307 6.311436996747818e307
6.870852232080436e307 6.870852232080436e307
7.501871137935436e307 7.501871137935436e307
7.925709054822421e307 7.925709054822421e307
8.438195254661318e307 8.438195254661318e307
8.446731596918706e307 8.446731596918706e307
9.43580947190119e307 9.43580947190119e307
9.66735866233596e307 9.66735866233596e307
1.0022043827847664e308 1.0022043827847664e308
1.020869767928594e308 1.020869767928594e308
1.0327408606815872e308 1.0327408606815872e308
1.0402383684235906e308 1.0402383684235906e308
1.0690255622829305e308 1.0690255622829305e308
1.1623306052784659e308 1.1623306052784659e308
1.1906116361044565e308 1.1906116361044565e308
1.2221839628780758e308 1.2221839628780758e308
1.3112927565356536e308 1.3112927565356536e308
1.3307364382402157e308 1.3307364382402157e308
1.3646958839720612e308 1.3646958839720612e308
1.425066345632827e308 1.425066345632827e308
1.4433864261103511e308 1.4433864261103511e308
1.5038532858735658e308 1.5038532858735658e308
1.5079450808097928e308 1.5079450808097928e308
1.553628680980576e308 1.553628680980576e308
1.6241456663280369e308 1.6241456663280369e308
1.6295729949930798e308 1.6295729949930798e308
1.6328703529666413e308 1.6328703529666413e308
1.6832354056195887e308 1.6832354056195887e308
1.7017315016390902e308 1.7017315016390902e308
1.7134206410400048e308 1.7134206410400048e308
1.7240829054261275e308 1.7240829054261275e308
1.7257738639648862e308 1.7257738639648862e308
1.7262297095455299e308 1.7262297095455299e308
1.7905151735809062e308 1.7905151735809062e308
explain format='brief' select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0;
id estRows task access object operator info
MergeJoin 49.00 root inner join, left key:statistics__overflow_calc.lrr_test.col1, right key:statistics__overflow_calc.lrr_test.col1
├─TableReader(Build) 49.00 root data:TableRangeScan
│ └─TableRangeScan 49.00 cop[tikv] table:t2 range:[0,+inf], keep order:true
└─TableReader(Probe) 49.00 root data:TableRangeScan
└─TableRangeScan 49.00 cop[tikv] table:t1 range:[0,+inf], keep order:true
8 changes: 8 additions & 0 deletions tests/integrationtest/t/statistics/overflow_calc.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# https://github.com/pingcap/tidb/issues/56704
CREATE TABLE `lrr_test` ( `COL102` double DEFAULT NULL, `COL1` double GENERATED ALWAYS AS (`COL102` + 10) STORED NOT NULL, PRIMARY KEY (`COL1`) /*T![clustered_index] CLUSTERED */ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
insert into lrr_test (col102) values(-1.704648925036604e308), (-1.6888619680353582e308), (-1.6685908644498436e308), (-1.6311134967437805e308), (-1.6128280680807152e308), (-1.5899713947158026e308), (-1.5709457594070477e308), (-1.4925714566991343e308), (-1.4705985087370154e308), (-1.4451316666300040e308), (-1.3946576985986583e308), (-1.3695679630646804e308), (-1.3208992137984086e308), (-1.2887981369134862e308), (-1.2119996449796167e308), (-1.195172956104992e308), (-1.1929781068369925e308), (-1.1746351299417647e308), (-1.1237012620945195e308), (-1.1223448185004882e308), (-1.0974439629672084e308), (-1.0657654808610821e308), (-1.0582598945271716e308), (-1.0565276887850733e308), (-1.0416104832981696e308), (-1.0368741532690337e308), (-1.033521479407133e308), (-1.0232269544119505e308), (-9.31943312515408e307), (-9.05107332838438e307), (-8.276443475796885e307), (-7.845086666145396e307), (-7.664543340054255e307), (-7.235369799352141e307), (-7.047280050755922e307), (-6.62205033356235e307), (-6.35964999739255e307), (-5.989391229038818e307), (-5.974526205854541e307), (-5.798684586589338e307), (-4.98047732376121e307), (-4.4623979626128605e307), (-4.3248436443381234e307), (-3.3391152928792773e307), (-3.2694282487729395e307), (-3.2461091065368577e307), (-2.8613054009714654e307), (-2.7176814604572905e307), (-2.1301127705458223e307), (-1.7280065154718344e307), (-1.6743061442642827e307), (-4.862812928655648e306), (-3.3262533560429795e305), (4.124952267435051e305), (5.4576487694211726e306), (1.1237742400537221e307), (1.569984332645614e307), (1.7966188405412235e307), (1.8619233341238355e307), (2.1152066540419881e307), (2.1764927570795164e307), (2.99416682762135e307), (3.0545414962788647e307), (3.262967770716021e307), (3.288944887183685e307), (4.9025219351381e307), (5.250864486081297e307), (5.52054372134351e307), (6.311436996747818e307), (6.870852232080436e307), (7.501871137935436e307), (7.925709054822421e307), (8.438195254661318e307), (8.446731596918706e307), (9.43580947190119e307), (9.66735866233596e307), (1.0022043827847664e308), (1.020869767928594e308), (1.0327408606815872e308), (1.0402383684235906e308), (1.0690255622829305e308), (1.1623306052784659e308), (1.1906116361044565e308), (1.2221839628780758e308), (1.3112927565356536e308), (1.3307364382402157e308), (1.3646958839720612e308), (1.425066345632827e308), (1.4433864261103511e308), (1.5038532858735658e308), (1.5079450808097928e308), (1.553628680980576e308), (1.6241456663280369e308), (1.6295729949930798e308), (1.6328703529666413e308), (1.6832354056195887e308), (1.7017315016390902e308), (1.7134206410400048e308), (1.7240829054261275e308), (1.7257738639648862e308), (1.7262297095455299e308), (1.7905151735809062e308);

analyze table lrr_test;

select t1.col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0 order by t1.col1;
explain format='brief' select t1. col1, t2.col1 from lrr_test as t1 right join lrr_test as t2 on t1.col1 = t2.col1 where t1.col1 >=0;

0 comments on commit eebdcfe

Please sign in to comment.