Skip to content

Commit

Permalink
Merge branch 'master' into exchange-partition-GA
Browse files Browse the repository at this point in the history
  • Loading branch information
ymkzpx authored Jul 20, 2022
2 parents 2271f67 + f0717df commit 12fce72
Show file tree
Hide file tree
Showing 9 changed files with 1,329 additions and 73 deletions.
15 changes: 15 additions & 0 deletions expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,21 @@ func pushNotAcrossExpr(ctx sessionctx.Context, expr Expression, not bool) (_ Exp
return expr, not
}

// GetExprInsideIsTruth get the expression inside the `istrue_with_null` and `istrue`.
// This is useful when handling expressions from "not" or "!", because we might wrap `istrue_with_null` or `istrue`
// when handling them. See pushNotAcrossExpr() and wrapWithIsTrue() for details.
func GetExprInsideIsTruth(expr Expression) Expression {
if f, ok := expr.(*ScalarFunction); ok {
switch f.FuncName.L {
case ast.IsTruthWithNull, ast.IsTruthWithoutNull:
return GetExprInsideIsTruth(f.GetArgs()[0])
default:
return expr
}
}
return expr
}

// PushDownNot pushes the `not` function down to the expression's arguments.
func PushDownNot(ctx sessionctx.Context, expr Expression) Expression {
newExpr, _ := pushNotAcrossExpr(ctx, expr, false)
Expand Down
32 changes: 32 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,11 @@ type SessionVars struct {
// When it is false, ANALYZE reads the latest data.
// When it is true, ANALYZE reads data on the snapshot at the beginning of ANALYZE.
EnableAnalyzeSnapshot bool

// DefaultStrMatchSelectivity adjust the estimation strategy for string matching expressions that can't be estimated by building into range.
// when > 0: it's the selectivity for the expression.
// when = 0: try to use TopN to evaluate the like expression to estimate the selectivity.
DefaultStrMatchSelectivity float64
}

// InitStatementContext initializes a StatementContext, the object is reused to reduce allocation.
Expand Down Expand Up @@ -2724,3 +2729,30 @@ func (s *SessionVars) GetSeekFactor(tbl *model.TableInfo) float64 {
}
return s.seekFactor
}

// EnableEvalTopNEstimationForStrMatch means if we need to evaluate expression with TopN to improve estimation.
// Currently, it's only for string matching functions (like and regexp).
func (s *SessionVars) EnableEvalTopNEstimationForStrMatch() bool {
return s.DefaultStrMatchSelectivity == 0
}

// GetStrMatchDefaultSelectivity means the default selectivity for like and regexp.
// Note: 0 is a special value, which means the default selectivity is 0.1 and TopN assisted estimation is enabled.
func (s *SessionVars) GetStrMatchDefaultSelectivity() float64 {
if s.DefaultStrMatchSelectivity == 0 {
return 0.1
}
return s.DefaultStrMatchSelectivity
}

// GetNegateStrMatchDefaultSelectivity means the default selectivity for not like and not regexp.
// Note:
// 0 is a special value, which means the default selectivity is 0.9 and TopN assisted estimation is enabled.
// 0.8 (the default value) is also a special value. For backward compatibility, when the variable is set to 0.8, we
// keep the default selectivity of like/regexp and not like/regexp all 0.8.
func (s *SessionVars) GetNegateStrMatchDefaultSelectivity() float64 {
if s.DefaultStrMatchSelectivity == DefTiDBDefaultStrMatchSelectivity {
return DefTiDBDefaultStrMatchSelectivity
}
return 1 - s.GetStrMatchDefaultSelectivity()
}
5 changes: 5 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -1714,6 +1714,11 @@ var defaultSysVars = []*SysVar{
GenerateBinaryPlan.Store(TiDBOptOn(val))
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBDefaultStrMatchSelectivity, Value: strconv.FormatFloat(DefTiDBDefaultStrMatchSelectivity, 'f', -1, 64), Type: TypeFloat, MinValue: 0, MaxValue: 1,
SetSession: func(s *SessionVars, val string) error {
s.DefaultStrMatchSelectivity = tidbOptFloat64(val, DefTiDBDefaultStrMatchSelectivity)
return nil
}},
}

// FeedbackProbability points to the FeedbackProbability in statistics package.
Expand Down
8 changes: 8 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,13 @@ const (
// When set to false, ANALYZE reads the latest data.
// When set to true, ANALYZE reads data on the snapshot at the beginning of ANALYZE.
TiDBEnableAnalyzeSnapshot = "tidb_enable_analyze_snapshot"

// TiDBDefaultStrMatchSelectivity controls some special cardinality estimation strategy for string match functions (like and regexp).
// When set to 0, Selectivity() will try to evaluate those functions with TopN and NULL in the stats to estimate,
// and the default selectivity and the selectivity for the histogram part will be 0.1.
// When set to (0, 1], Selectivity() will use the value of this variable as the default selectivity of those
// functions instead of the selectionFactor (0.8).
TiDBDefaultStrMatchSelectivity = "tidb_default_string_match_selectivity"
)

// TiDB vars that have only global scope
Expand Down Expand Up @@ -988,6 +995,7 @@ const (
DefTiDBEnableAnalyzeSnapshot = false
DefTiDBGenerateBinaryPlan = true
DefEnableTiDBGCAwareMemoryTrack = true
DefTiDBDefaultStrMatchSelectivity = 0.8
)

// Process global variables.
Expand Down
212 changes: 139 additions & 73 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,109 +317,175 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
}
}

// Try to cover Constants
notCoveredConstants := make(map[int]*expression.Constant)
notCoveredDNF := make(map[int]*expression.ScalarFunction)
notCoveredStrMatch := make(map[int]*expression.ScalarFunction)
notCoveredNegateStrMatch := make(map[int]*expression.ScalarFunction)
notCoveredOtherExpr := make(map[int]expression.Expression)
if mask > 0 {
for i, expr := range remainedExprs {
if mask&(1<<uint64(i)) == 0 {
continue
}
if c, ok := expr.(*expression.Constant); ok {
if expression.MaybeOverOptimized4PlanCache(ctx, []expression.Expression{c}) {
switch x := expr.(type) {
case *expression.Constant:
notCoveredConstants[i] = x
continue
case *expression.ScalarFunction:
switch x.FuncName.L {
case ast.LogicOr:
notCoveredDNF[i] = x
continue
}
if c.Value.IsNull() {
// c is null
ret *= 0
mask &^= 1 << uint64(i)
} else if isTrue, err := c.Value.ToBool(sc); err == nil {
if isTrue == 0 {
// c is false
ret *= 0
case ast.Like, ast.Regexp:
notCoveredStrMatch[i] = x
continue
case ast.UnaryNot:
inner := expression.GetExprInsideIsTruth(x.GetArgs()[0])
innerSF, ok := inner.(*expression.ScalarFunction)
if ok {
switch innerSF.FuncName.L {
case ast.Like, ast.Regexp:
notCoveredNegateStrMatch[i] = x
continue
}
}
// c is true, no need to change ret
mask &^= 1 << uint64(i)
}
// Not expected to come here:
// err != nil, no need to do anything.
}
notCoveredOtherExpr[i] = expr
}
}

// Now we try to cover those still not covered DNF conditions using independence assumption,
// Try to cover remaining Constants
for i, c := range notCoveredConstants {
if expression.MaybeOverOptimized4PlanCache(ctx, []expression.Expression{c}) {
continue
}
if c.Value.IsNull() {
// c is null
ret *= 0
mask &^= 1 << uint64(i)
delete(notCoveredConstants, i)
} else if isTrue, err := c.Value.ToBool(sc); err == nil {
if isTrue == 0 {
// c is false
ret *= 0
}
// c is true, no need to change ret
mask &^= 1 << uint64(i)
delete(notCoveredConstants, i)
}
// Not expected to come here:
// err != nil, no need to do anything.
}

// Try to cover remaining DNF conditions using independence assumption,
// i.e., sel(condA or condB) = sel(condA) + sel(condB) - sel(condA) * sel(condB)
if mask > 0 {
OUTER:
for i, expr := range remainedExprs {
if mask&(1<<uint64(i)) == 0 {
continue
OUTER:
for i, scalarCond := range notCoveredDNF {
// If there are columns not in stats, we won't handle them. This case might happen after DDL operations.
cols := expression.ExtractColumns(scalarCond)
for i := range cols {
if _, ok := coll.Columns[cols[i].UniqueID]; !ok {
continue OUTER
}
scalarCond, ok := expr.(*expression.ScalarFunction)
// Make sure we only handle DNF condition.
if !ok || scalarCond.FuncName.L != ast.LogicOr {
}

dnfItems := expression.FlattenDNFConditions(scalarCond)
dnfItems = ranger.MergeDNFItems4Col(ctx, dnfItems)
// If the conditions only contain a single column, we won't handle them.
if len(dnfItems) <= 1 {
continue
}

selectivity := 0.0
for _, cond := range dnfItems {
// In selectivity calculation, we don't handle CorrelatedColumn, so we directly skip over it.
// Other kinds of `Expression`, i.e., Constant, Column and ScalarFunction all can possibly be built into
// ranges and used to calculation selectivity, so we accept them all.
_, ok := cond.(*expression.CorrelatedColumn)
if ok {
continue
}
// If there're columns not in stats, we won't handle them. This case might happen after DDL operations.
cols := expression.ExtractColumns(scalarCond)
for i := range cols {
if _, ok := coll.Columns[cols[i].UniqueID]; !ok {
continue OUTER
}
}

dnfItems := expression.FlattenDNFConditions(scalarCond)
dnfItems = ranger.MergeDNFItems4Col(ctx, dnfItems)
// If the conditions only contain a single column, we won't handle them.
if len(dnfItems) <= 1 {
continue
var cnfItems []expression.Expression
if scalar, ok := cond.(*expression.ScalarFunction); ok && scalar.FuncName.L == ast.LogicAnd {
cnfItems = expression.FlattenCNFConditions(scalar)
} else {
cnfItems = append(cnfItems, cond)
}

selectivity := 0.0
for _, cond := range dnfItems {
// In selectivity calculation, we don't handle CorrelatedColumn, so we directly skip over it.
// Other kinds of `Expression`, i.e., Constant, Column and ScalarFunction all can possibly be built into
// ranges and used to calculation selectivity, so we accept them all.
_, ok := cond.(*expression.CorrelatedColumn)
if ok {
continue
}
curSelectivity, _, err := coll.Selectivity(ctx, cnfItems, nil)
if err != nil {
logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err))
curSelectivity = selectionFactor
}

var cnfItems []expression.Expression
if scalar, ok := cond.(*expression.ScalarFunction); ok && scalar.FuncName.L == ast.LogicAnd {
cnfItems = expression.FlattenCNFConditions(scalar)
} else {
cnfItems = append(cnfItems, cond)
}
selectivity = selectivity + curSelectivity - selectivity*curSelectivity
if sc.EnableOptimizerCETrace {
// Tracing for the expression estimation results of this DNF.
CETraceExpr(ctx, tableID, "Table Stats-Expression-DNF", scalarCond, selectivity*float64(coll.Count))
}
}

curSelectivity, _, err := coll.Selectivity(ctx, cnfItems, nil)
if err != nil {
logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err))
curSelectivity = selectionFactor
}
if selectivity != 0 {
ret *= selectivity
mask &^= 1 << uint64(i)
delete(notCoveredDNF, i)
}
if sc.EnableOptimizerCETrace {
// Tracing for the expression estimation results after applying the DNF estimation result.
curExpr = append(curExpr, remainedExprs[i])
expr := expression.ComposeCNFCondition(ctx, curExpr...)
CETraceExpr(ctx, tableID, "Table Stats-Expression-CNF", expr, ret*float64(coll.Count))
}
}

selectivity = selectivity + curSelectivity - selectivity*curSelectivity
if sc.EnableOptimizerCETrace {
// Tracing for the expression estimation results of this DNF.
CETraceExpr(ctx, tableID, "Table Stats-Expression-DNF", scalarCond, selectivity*float64(coll.Count))
}
// Try to cover remaining string matching functions by evaluating the expressions with TopN to estimate.
if ctx.GetSessionVars().EnableEvalTopNEstimationForStrMatch() {
for i, scalarCond := range notCoveredStrMatch {
ok, sel, err := coll.GetSelectivityByFilter(ctx, ctx.GetSessionVars().GetStrMatchDefaultSelectivity(), []expression.Expression{scalarCond})
if err != nil {
sc.AppendWarning(errors.New("Error when using TopN-assisted estimation: " + err.Error()))
}

if selectivity != 0 {
ret *= selectivity
mask &^= 1 << uint64(i)
if !ok {
continue
}
if sc.EnableOptimizerCETrace {
// Tracing for the expression estimation results after applying the DNF estimation result.
curExpr = append(curExpr, remainedExprs[i])
expr := expression.ComposeCNFCondition(ctx, curExpr...)
CETraceExpr(ctx, tableID, "Table Stats-Expression-CNF", expr, ret*float64(coll.Count))
ret *= sel
mask &^= 1 << uint64(i)
delete(notCoveredStrMatch, i)
}
for i, scalarCond := range notCoveredNegateStrMatch {
ok, sel, err := coll.GetSelectivityByFilter(ctx, ctx.GetSessionVars().GetNegateStrMatchDefaultSelectivity(), []expression.Expression{scalarCond})
if err != nil {
sc.AppendWarning(errors.New("Error when using TopN-assisted estimation: " + err.Error()))
}
if !ok {
continue
}
ret *= sel
mask &^= 1 << uint64(i)
delete(notCoveredNegateStrMatch, i)
}
}

// If there's still conditions which cannot be calculated, we will multiply a selectionFactor.
// At last, if there are still conditions which cannot be estimated, we multiply the selectivity with
// the minimal default selectivity of the remaining conditions.
// Currently, only string matching functions (like and regexp) may have a different default selectivity,
// other expressions' default selectivity is selectionFactor.
if mask > 0 {
ret *= selectionFactor
minSelectivity := 1.0
if len(notCoveredConstants) > 0 || len(notCoveredDNF) > 0 || len(notCoveredOtherExpr) > 0 {
minSelectivity = math.Min(minSelectivity, selectionFactor)
}
if len(notCoveredStrMatch) > 0 {
minSelectivity = math.Min(minSelectivity, ctx.GetSessionVars().GetStrMatchDefaultSelectivity())
}
if len(notCoveredNegateStrMatch) > 0 {
minSelectivity = math.Min(minSelectivity, ctx.GetSessionVars().GetNegateStrMatchDefaultSelectivity())
}
ret *= minSelectivity
}

if sc.EnableOptimizerCETrace {
// Tracing for the expression estimation results after applying the default selectivity.
totalExpr := expression.ComposeCNFCondition(ctx, remainedExprs...)
Expand Down
Loading

0 comments on commit 12fce72

Please sign in to comment.