Skip to content

Commit

Permalink
planner: open the new only_full_group_by check (#7)
Browse files Browse the repository at this point in the history
Co-authored-by: ailinkid <314806019@qq.com>
Co-authored-by: Arenatlx <ailinsilence4@gmail.com>
  • Loading branch information
3 people authored Mar 14, 2022
1 parent d297b73 commit 2a8e86f
Show file tree
Hide file tree
Showing 13 changed files with 852 additions and 92 deletions.
18 changes: 12 additions & 6 deletions executor/aggregate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -742,16 +742,22 @@ func TestOnlyFullGroupBy(t *testing.T) {
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrMixOfGroupFuncAndFields), "err %v", err)
err = tk.ExecToErr("select count(b), c from t")
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrMixOfGroupFuncAndFields), "err %v", err)
tk.MustQuery("select count(b), any_value(c) from t")
tk.MustQuery("select count(b), any_value(c) + 2 from t")
err = tk.ExecToErr("select distinct a, b, count(a) from t")
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrMixOfGroupFuncAndFields), "err %v", err)
// test compatible with sql_mode = ONLY_FULL_GROUP_BY
tk.MustQuery("select a from t group by a,b,c")
tk.MustQuery("select b from t group by b")
err = tk.ExecToErr("select b*rand() from t group by b")
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrFieldNotInGroupBy), "err %v", err)
tk.MustQuery("select b as e from t group by b")
tk.MustQuery("select b+c from t group by b+c")
tk.MustQuery("select b+c, min(a) from t group by b+c, b-c")
tk.MustQuery("select b+c, min(a) from t group by b, c")
tk.MustQuery("select b+c from t group by b,c")
err = tk.ExecToErr("select b+c from (select b, b+rand() as c from t) t group by b")
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrFieldNotInGroupBy), "err %v", err)
tk.MustQuery("select b between c and d from t group by b,c,d")
tk.MustQuery("select case b when 1 then c when 2 then d else d end from t group by b,c,d")
tk.MustQuery("select c > (select b from t) from t group by c")
Expand Down Expand Up @@ -779,8 +785,7 @@ func TestOnlyFullGroupBy(t *testing.T) {
tk.MustQuery("select t.*, x.* from t, x where t.b = x.b and t.d = x.d group by t.b, t.d")
tk.MustQuery("select t.*, x.* from t, x where t.b = x.a group by t.b, t.d")
tk.MustQuery("select t.b, x.* from t, x where t.b = x.a group by t.b")
err = tk.ExecToErr("select t.*, x.* from t, x where t.c = x.a group by t.b, t.c")
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrFieldNotInGroupBy), "err %v", err)
tk.MustQuery("select t.*, x.* from t, x where t.c = x.a group by t.b, t.c")
// test functional dependency derived from keys in join
tk.MustQuery("select t.*, x.* from t inner join x on t.a = x.a group by t.a")
tk.MustQuery("select t.*, x.* from t inner join x on (t.b = x.b and t.d = x.d) group by t.b, x.d")
Expand All @@ -795,10 +800,11 @@ func TestOnlyFullGroupBy(t *testing.T) {
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrFieldNotInGroupBy), "err %v", err)

// FixMe: test functional dependency of derived table
// tk.MustQuery("select * from (select * from t) as e group by a")
// tk.MustQuery("select * from (select * from t) as e group by b,d")
// err = tk.ExecToErr("select * from (select * from t) as e group by b,c")
// c.Assert(terror.ErrorEqual(err, plannercore.ErrFieldNotInGroupBy), IsTrue)
tk.MustQuery("select * from (select * from t) as e group by a")
tk.MustQuery("select * from (select * from t) as e group by b,d")
// failed
err = tk.ExecToErr("select * from (select * from t) as e group by b,c")
require.Truef(t, terror.ErrorEqual(err, plannercore.ErrFieldNotInGroupBy), "err %v", err)

// test order by
tk.MustQuery("select c from t group by c,d order by d")
Expand Down
2 changes: 2 additions & 0 deletions expression/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ type Column struct {
InOperand bool

collationInfo

CorrelatedColUniqueID int64
}

// Equal implements Expression interface.
Expand Down
1 change: 1 addition & 0 deletions expression/function_traits.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ var unFoldableFunctions = map[string]struct{}{
ast.NextVal: {},
ast.LastVal: {},
ast.SetVal: {},
ast.AnyValue: {},
}

// DisableFoldFunctions stores functions which prevent child scope functions from being constant folded.
Expand Down
34 changes: 34 additions & 0 deletions expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,40 @@ func IsRuntimeConstExpr(expr Expression) bool {
return false
}

func CheckNonDeterministic(e Expression) bool {
switch x := e.(type) {
case *Constant, *Column, *CorrelatedColumn:
return false
case *ScalarFunction:
if _, ok := unFoldableFunctions[x.FuncName.L]; ok {
return true
}
for _, arg := range x.GetArgs() {
if CheckNonDeterministic(arg) {
return true
}
}
}
return false
}

func CheckFuncInExpr(e Expression, funcName string) bool {
switch x := e.(type) {
case *Constant, *Column, *CorrelatedColumn:
return false
case *ScalarFunction:
if x.FuncName.L == funcName {
return true
}
for _, arg := range x.GetArgs() {
if CheckFuncInExpr(arg, funcName) {
return true
}
}
}
return false
}

// IsMutableEffectsExpr checks if expr contains function which is mutable or has side effects.
func IsMutableEffectsExpr(expr Expression) bool {
switch x := expr.(type) {
Expand Down
162 changes: 150 additions & 12 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import (
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/dbterror"
"github.com/pingcap/tidb/util/hack"
"github.com/pingcap/tidb/util/plancodec"
"github.com/pingcap/tidb/util/set"
)
Expand Down Expand Up @@ -1141,10 +1142,16 @@ func (b *PlanBuilder) buildProjectionField(ctx context.Context, p LogicalPlan, f
if expr == nil {
return nil, name, nil
}
// invalid unique id
correlatedColUniqueID := int64(0)
if cc, ok := expr.(*expression.CorrelatedColumn); ok {
correlatedColUniqueID = cc.UniqueID
}
// for expr projection, we should record the map relationship <hashcode, uniqueID> down.
newCol := &expression.Column{
UniqueID: b.ctx.GetSessionVars().AllocPlanColumnID(),
RetType: expr.GetType(),
UniqueID: b.ctx.GetSessionVars().AllocPlanColumnID(),
RetType: expr.GetType(),
CorrelatedColUniqueID: correlatedColUniqueID,
}
if b.ctx.GetSessionVars().MapHashCode2UniqueID4ExtendedCol == nil {
b.ctx.GetSessionVars().MapHashCode2UniqueID4ExtendedCol = make(map[string]int, 1)
Expand Down Expand Up @@ -1303,6 +1310,89 @@ func (b *PlanBuilder) buildProjection(ctx context.Context, p LogicalPlan, fields
}
}
proj.SetChildren(p)
// delay the only-full-group-by-check in create view statement to later query.
if !b.isCreateView {
fds := proj.ExtractFD()
// Projection -> Children -> ...
// Let the projection itself to evaluate the whole FD, which will build the connection
// 1: from select-expr to registered-expr
// 2: from base-column to select-expr
// After that
if p.SCtx().GetSessionVars().SQLMode.HasOnlyFullGroupBy() && fds.HasAggBuilt {
for offset, expr := range proj.Exprs[:len(fields)] {
item := fd.NewFastIntSet()
switch x := expr.(type) {
case *expression.Column:
item.Insert(int(x.UniqueID))
case *expression.ScalarFunction:
if expression.CheckFuncInExpr(x, ast.AnyValue) {
continue
}
scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
if !ok {
panic("selected expr must have been registered, shouldn't be here")
}
item.Insert(scalarUniqueID)
default:
}
// Rule #1, if select fields are constant, it's ok.
if item.SubsetOf(fds.ConstantCols()) {
continue
}
// Rule #2, if select fields are subset of group by items, it's ok.
if item.SubsetOf(fds.GroupByCols) {
continue
}

// Rule #3, if select fields are dependencies of Strict FD with determinants in group-by items, it's ok.
// lax FD couldn't be done here, eg: for unique key (b), index key NULL & NULL are different rows with
// uncertain other column values.
strictClosure := fds.ClosureOfStrict(fds.GroupByCols)
if item.SubsetOf(strictClosure) {
continue
}
// locate the base col that are not in (constant list / group by list / strict fd closure) for error show.
baseCols := expression.ExtractColumns(expr)
errShowCol := baseCols[0]
for _, col := range baseCols {
colSet := fd.NewFastIntSet(int(col.UniqueID))
if !colSet.SubsetOf(strictClosure) {
errShowCol = col
break
}
}
name := errShowCol.OrigName
if name == "" {
name = proj.names[offset].String()
}
// Only1Zero is to judge whether it's no-group-by-items case.
if !fds.GroupByCols.Only1Zero() {
return nil, nil, 0, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInSelect, name)
} else {
return nil, nil, 0, ErrMixOfGroupFuncAndFields.GenWithStackByArgs(offset+1, name)
}
}
if fds.GroupByCols.Only1Zero() {
// maxOneRow is delayed from agg's ExtractFD logic since some details listed in it.
projectionUniqueIDs := fd.NewFastIntSet()
for _, expr := range proj.Exprs {
switch x := expr.(type) {
case *expression.Column:
projectionUniqueIDs.Insert(int(x.UniqueID))
case *expression.ScalarFunction:
scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx))))
if !ok {
panic("selected expr must have been registered, shouldn't be here")
}
projectionUniqueIDs.Insert(scalarUniqueID)
}
}
fds.MaxOneRow(projectionUniqueIDs)
}
// for select * from view (include agg), outer projection don't have to check select list with the inner group-by flag.
fds.HasAggBuilt = false
}
}
return proj, proj.Exprs, oldLen, nil
}

Expand Down Expand Up @@ -3577,13 +3667,6 @@ func (b *PlanBuilder) buildSelect(ctx context.Context, sel *ast.SelectStmt) (p L
}
}

if b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy() && sel.From != nil {
err = b.checkOnlyFullGroupBy(p, sel)
if err != nil {
return nil, err
}
}

hasWindowFuncField := b.detectSelectWindow(sel)
// Some SQL statements define WINDOW but do not use them. But we also need to check the window specification list.
// For example: select id from t group by id WINDOW w AS (ORDER BY uids DESC) ORDER BY id;
Expand Down Expand Up @@ -4286,10 +4369,17 @@ func (ds *DataSource) ExtractFD() *fd.FDSet {
fds.AddStrictFunctionalDependency(keyCols, allCols)
fds.MakeNotNull(keyCols)
} else {
fds.AddLaxFunctionalDependency(keyCols, allCols)
// unique index:
// 1: normal value should be unique
// 2: null value can be multiple
// for this kind of lax to be strict, we need to make the determinant not-null.
fds.AddLaxFunctionalDependency(keyCols, allCols, true)
}
} else {
fds.AddLaxFunctionalDependency(keyCols, allCols)
// 1: normal value can be multiple
// 2: null value can be multiple
// for this kind of lax to be strict, we need to make both the determinant and dependency not-null.
fds.AddLaxFunctionalDependency(keyCols, allCols, false)
}
}
// handle the datasource conditions (maybe pushed down from upper layer OP)
Expand All @@ -4310,6 +4400,28 @@ func (ds *DataSource) ExtractFD() *fd.FDSet {
fds.AddEquivalence(equiv[0], equiv[1])
}
}
// build the dependency for generated columns.
// the generated column is sequentially dependent on the forward column.
// a int, b int as (a+1), c int as (b+1), here we can build the strict FD down:
// {a} -> {b}, {b} -> {c}, put the maintenance of the dependencies between generated columns to the FD graph.
notNullCols := fd.NewFastIntSet()
for _, col := range ds.TblCols {
if col.VirtualExpr != nil {
dependencies := fd.NewFastIntSet()
dependencies.Insert(int(col.UniqueID))
// dig out just for 1 level.
directBaseCol := expression.ExtractColumns(col.VirtualExpr)
determinant := fd.NewFastIntSet()
for _, col := range directBaseCol {
determinant.Insert(int(col.UniqueID))
}
fds.AddStrictFunctionalDependency(determinant, dependencies)
}
if mysql.HasNotNullFlag(col.RetType.Flag) {
notNullCols.Insert(int(col.UniqueID))
}
}
fds.MakeNotNull(notNullCols)
ds.fdSet = fds
}
return ds.fdSet
Expand Down Expand Up @@ -4474,7 +4586,9 @@ func (b *PlanBuilder) BuildDataSourceFromView(ctx context.Context, dbName model.
if err != nil {
if terror.ErrorNotEqual(err, ErrViewRecursive) &&
terror.ErrorNotEqual(err, ErrNoSuchTable) &&
terror.ErrorNotEqual(err, ErrInternal) {
terror.ErrorNotEqual(err, ErrInternal) &&
terror.ErrorNotEqual(err, ErrFieldNotInGroupBy) &&
terror.ErrorNotEqual(err, ErrMixOfGroupFuncAndFields) {
err = ErrViewInvalid.GenWithStackByArgs(dbName.O, tableInfo.Name.O)
}
return nil, err
Expand Down Expand Up @@ -4570,6 +4684,30 @@ func (b *PlanBuilder) buildApplyWithJoinType(outerPlan, innerPlan LogicalPlan, t
for i := outerPlan.Schema().Len(); i < ap.Schema().Len(); i++ {
ap.names[i] = types.EmptyName
}
// build the join correlated equal condition for apply join, this equal condition is used for deriving the transitive FD between outer and inner side.
correlatedCols := ExtractCorrelatedCols4LogicalPlan(innerPlan)
deduplicateCorrelatedCols := make(map[int64]*expression.CorrelatedColumn)
for _, cc := range correlatedCols {
if _, ok := deduplicateCorrelatedCols[cc.UniqueID]; !ok {
deduplicateCorrelatedCols[cc.UniqueID] = cc
}
}
// for case like select (select t1.a from t2) from t1. <t1.a> will be assigned with new UniqueID after sub query projection is built.
// we should distinguish them out, building the equivalence relationship from inner <t1.a> == outer <t1.a> in the apply-join for FD derivation.
for _, cc := range deduplicateCorrelatedCols {
// for every correlated column, find the connection with the inner newly built column.
for _, col := range innerPlan.Schema().Columns {
if cc.UniqueID == col.CorrelatedColUniqueID {
ccc := &cc.Column
cond, err := expression.NewFunction(b.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), ccc, col)
if err != nil {
// give up connection building for not interfering old logic.
return ap
}
ap.EqualConditions = append(ap.EqualConditions, cond.(*expression.ScalarFunction))
}
}
}
return ap
}

Expand Down
Loading

0 comments on commit 2a8e86f

Please sign in to comment.