From 0b4bb874b01d5d15bc36549cd8f54a4b3b5387bd Mon Sep 17 00:00:00 2001 From: IggieWang <1002203581@qq.com> Date: Fri, 9 Aug 2019 15:52:42 +0800 Subject: [PATCH 1/3] finish multiple tables advisor --- executor/compiler.go | 10 ++-- idxadvisor/idxadvisor.go | 55 ++++++++++++++-------- idxadvisor/index.go | 16 +++++-- planner/core/index_advisor.go | 86 ++++++++++++++++++++++++++--------- 4 files changed, 118 insertions(+), 49 deletions(-) diff --git a/executor/compiler.go b/executor/compiler.go index b3b3e6ba38889..2ca69bec7cb6b 100644 --- a/executor/compiler.go +++ b/executor/compiler.go @@ -34,8 +34,6 @@ import ( "go.uber.org/zap" ) -const tblname string = "idxadv" - var ( stmtNodeCounterUse = metrics.StmtNodeCounter.WithLabelValues("Use") stmtNodeCounterShow = metrics.StmtNodeCounter.WithLabelValues("Show") @@ -94,8 +92,10 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind queryInfo := plannercore.NewQueryExprInfo(p) m := plannercore.NewTableInfoSets(queryInfo) - for _, v := range m { - fmt.Println(v.TblInfo.Name.L) + tblNames := []string{} + for k, v := range m { + tblNames = append(tblNames, k) + fmt.Println(k) fmt.Println(v.Eq) fmt.Println(v.O) fmt.Println(v.Rg) @@ -110,7 +110,7 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind // Construct virtual infoschema dbname := c.Ctx.GetSessionVars().CurrentDB - virtualIS := idxadvisor.GetVirtualInfoschema(infoSchema, dbname, tblname) + virtualIS := idxadvisor.GetVirtualInfoschema(infoSchema, dbname, tblNames) // Get virtual final plan. vFinalPlan, err := planner.Optimize(ctx, c.Ctx, stmtNode, virtualIS) diff --git a/idxadvisor/idxadvisor.go b/idxadvisor/idxadvisor.go index 1553c8bfa6a96..5ce209a23d692 100644 --- a/idxadvisor/idxadvisor.go +++ b/idxadvisor/idxadvisor.go @@ -86,53 +86,72 @@ func (ia *IdxAdvisor) StartTask(query string) { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select * from IDXADV where a = 1 and c = 3"); err != nil { + if _, err := ia.dbClient.Exec("select * from idxadv where a = 1 and c = 3"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select c from IDXADV where a in (1,3)"); err != nil { + if _, err := ia.dbClient.Exec("select c from idxadv where a in (1,3)"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select c from IDXADV where a+c=2"); err != nil { + if _, err := ia.dbClient.Exec("select * from idxadv where a = 1 and c = 3 or b = 1"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select * from IDXADV where c in (select a from IDXADV where a>0)"); err != nil { + if _, err := ia.dbClient.Exec("select c from idxadv where a + c = 2"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select * from IDXADV where c in (select a from t1 where a>0)"); err != nil { + if _, err := ia.dbClient.Exec("select a, c, count(*) from idxadv group by a, c"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select * from IDXADV, t1 where IDXADV.c = t1.c"); err != nil { + if _, err := ia.dbClient.Exec("select * from idxadv where c in (select a from t1 where a>0)"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select c,sum(a) as v from idxadv where b=1 group by c having sum(a) >= (select sum(a)*0.1 from t1 where b = 1) order by v"); err != nil { + if _, err := ia.dbClient.Exec("select * from idxadv, t1 where IDXADV.c = t1.c"); err != nil { + fmt.Printf("**********query execution error: %v\n", err) + panic(err) + } + if _, err := ia.dbClient.Exec("select c,sum(id*(a+1)) as v from idxadv where b=1 group by c having sum(id*(a+1)) >= (select sum(a)*0.1 from t1 where b = 1) order by v"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } } } - -func GetVirtualInfoschema(is infoschema.InfoSchema, dbName, tblName string) infoschema.InfoSchema { +/* +// StartTask start handling queries in idxadv mode after session variable tidb_enable_index_advisor has been set +func (ia *IdxAdvisor) StartTask(query string) { + if ia.IsReady() { + // var err error + sqlFile := "/tmp/queries" + queries := readQuery(&sqlFile) + for i, query := range queries { + fmt.Printf("$$$$$$$$$$$$$$$$$$$$$$[%v]$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n", i+1) + ia.dbClient.Exec(query) + } + } +} +*/ +func GetVirtualInfoschema(is infoschema.InfoSchema, dbName string, tblNames []string) infoschema.InfoSchema { // Get a copy of InfoSchema dbInfos := is.Clone() ISCopy := infoschema.MockInfoSchemaWithDBInfos(dbInfos, is.SchemaMetaVersion()) dbname := model.NewCIStr(dbName) - tblname := model.NewCIStr(tblName) - tblCopy, err := ISCopy.TableByName(dbname, tblname) - if err != nil { - panic(err) - } - tblInfoCopy := tblCopy.Meta() + for _, tblname := range tblNames { + tblname := model.NewCIStr(tblname) + tblCopy, err := ISCopy.TableByName(dbname, tblname) + if err != nil { + panic(err) + } + tblInfoCopy := tblCopy.Meta() - // add virtual indexes to InfoSchemaCopy.TblInfo - virtualIndexes := BuildVirtualIndexes(tblInfoCopy, dbname, tblname) - tblInfoCopy.Indices = append(tblInfoCopy.Indices, virtualIndexes...) + // add virtual indexes to InfoSchemaCopy.TblInfo + virtualIndexes := BuildVirtualIndexes(tblInfoCopy, dbname, tblname) + tblInfoCopy.Indices = append(tblInfoCopy.Indices, virtualIndexes...) + } return ISCopy } diff --git a/idxadvisor/index.go b/idxadvisor/index.go index c2a482c54c50f..b00d1d17bab31 100644 --- a/idxadvisor/index.go +++ b/idxadvisor/index.go @@ -13,8 +13,8 @@ type IndicesWithCost struct { Cost float64 } -// MIN_PRECISION is Precision for comparing cost or benefit. -const MIN_PRECISION = 0.0001 +// Deviation is a deviation standard for comparing benefit. +const Deviation = 0.01 // FindVirtualIndices finds the final physical plan's indices. func FindVirtualIndices(plan plannercore.PhysicalPlan) []*model.IndexInfo { @@ -33,6 +33,7 @@ func travelPhysicalPlan(plan plannercore.PhysicalPlan, indices *[]*model.IndexIn for _, idxPlan := range t.IndexPlans { switch x := idxPlan.(type) { case *plannercore.PhysicalIndexScan: + x.Index.Table = x.Table.Name *indices = append(*indices, x.Index) } } @@ -40,6 +41,7 @@ func travelPhysicalPlan(plan plannercore.PhysicalPlan, indices *[]*model.IndexIn for _, idxPlan := range t.IndexPlans { switch x := idxPlan.(type) { case *plannercore.PhysicalIndexScan: + x.Index.Table = x.Table.Name *indices = append(*indices, x.Index) } } @@ -52,8 +54,10 @@ func travelPhysicalPlan(plan plannercore.PhysicalPlan, indices *[]*model.IndexIn // WriteResult save virtual indices and cost and print them. func WriteResult(iwc IndicesWithCost, connectionID uint64, origCost float64) { + fmt.Printf("***Connection id %d, virtual physical plan's cost: %f, original cost: %f, \n***Virtual index:", connectionID, iwc.Cost, origCost) benefit := origCost - iwc.Cost - if benefit < MIN_PRECISION { + if benefit / origCost < Deviation { + fmt.Println("needn't create index") return } @@ -63,12 +67,13 @@ func WriteResult(iwc IndicesWithCost, connectionID uint64, origCost float64) { indices := iwc.Indices ia := registeredIdxAdv[connectionID] - fmt.Printf("***Connection id %d, virtual physical plan's cost: %f, original cost: %f, \n***Virtual index:", connectionID, iwc.Cost, origCost) if len(indices) != 0 { for _, idx := range indices { fmt.Printf("(") + tblName := idx.Table.L for _, col := range idx.Columns { - fmt.Printf("%s ", col.Name.L) + idxCol := tblName + "." + col.Name.L + fmt.Printf("%s ", idxCol) } fmt.Printf("\b) ") @@ -82,6 +87,7 @@ func WriteResult(iwc IndicesWithCost, connectionID uint64, origCost float64) { fmt.Println("\n----------------Result----------------") for _, v := range registeredIdxAdv { for _, i := range v.Candidate_idx { + fmt.Printf("%s: ", i.Index.Table.L) fmt.Printf("(") for _, col := range i.Index.Columns { fmt.Printf("%s ", col.Name.L) diff --git a/planner/core/index_advisor.go b/planner/core/index_advisor.go index cb1a0ffc6171a..08dde9caaa103 100644 --- a/planner/core/index_advisor.go +++ b/planner/core/index_advisor.go @@ -37,6 +37,7 @@ type dataSource struct { ColCnt int } +// Join multiple QueryExprInfos. func multiJoinQueryExprInfo(queryInfos []*QueryExprInfo) *QueryExprInfo { if len(queryInfos) == 1 { return queryInfos[0] @@ -49,6 +50,7 @@ func multiJoinQueryExprInfo(queryInfos []*QueryExprInfo) *QueryExprInfo { return q } +// Join two QueryExprInfos. func doubleJoinQueryExprInfo(first *QueryExprInfo, next *QueryExprInfo) *QueryExprInfo { first.ScalarFuncExpr = append(first.ScalarFuncExpr, next.ScalarFuncExpr...) first.ColumnExpr = append(first.ColumnExpr, next.ColumnExpr...) @@ -57,7 +59,7 @@ func doubleJoinQueryExprInfo(first *QueryExprInfo, next *QueryExprInfo) *QueryEx return first } -// NewQueryExprInfo constructs the expression information of the query. +// NewQueryExprInfo constructs the expression information of the query and returns a QueryExprInfo. func NewQueryExprInfo(p PhysicalPlan) *QueryExprInfo { queryInfos, _ := recursiveGenQueryInfo(p, []*QueryExprInfo{}, []int{}) return multiJoinQueryExprInfo(queryInfos) @@ -168,6 +170,11 @@ func recursiveGenQueryInfo(in PhysicalPlan, queryInfos []*QueryExprInfo, idxs [] switch expr := e.(type) { case *expression.Column: queryInfo.ColumnExpr = append(queryInfo.ColumnExpr, []*expression.Column{expr}) + case *expression.ScalarFunction: + columns := DeriveColumn(expr) + for _, col := range columns { + queryInfo.ColumnExpr = append(queryInfo.ColumnExpr, []*expression.Column{col}) + } } } groupByCols := []*expression.Column{} @@ -184,6 +191,11 @@ func recursiveGenQueryInfo(in PhysicalPlan, queryInfos []*QueryExprInfo, idxs [] switch expr := e.(type) { case *expression.Column: queryInfo.ColumnExpr = append(queryInfo.ColumnExpr, []*expression.Column{expr}) + case *expression.ScalarFunction: + columns := DeriveColumn(expr) + for _, col := range columns { + queryInfo.ColumnExpr = append(queryInfo.ColumnExpr, []*expression.Column{col}) + } } } groupByCols := []*expression.Column{} @@ -230,43 +242,66 @@ func recursiveGenQueryInfo(in PhysicalPlan, queryInfos []*QueryExprInfo, idxs [] return queryInfos, idxs } -func getAllScalarFunc(functions []*expression.ScalarFunction) []*expression.ScalarFunction { +// DeriveScalarFunc will return a set of ScalarFunc from CNF and DNF. +func DeriveScalarFunc(functions []*expression.ScalarFunction) []*expression.ScalarFunction { allScalarFunc := []*expression.ScalarFunction{} for _, f := range functions { scalarFunc := []*expression.ScalarFunction{} - recursiveGetScalarFunc(f, &scalarFunc) + recursiveDeriveScalarFunc(f, &scalarFunc) allScalarFunc = append(allScalarFunc, scalarFunc...) } return allScalarFunc } -func recursiveGetScalarFunc(f *expression.ScalarFunction, functions *[]*expression.ScalarFunction) { +func recursiveDeriveScalarFunc(f *expression.ScalarFunction, functions *[]*expression.ScalarFunction) { switch f.FuncName.L { case "or", "and": args := f.GetArgs() - switch e := args[0].(type) { - case *expression.ScalarFunction: - recursiveGetScalarFunc(e, functions) - } - switch e := args[1].(type) { - case *expression.ScalarFunction: - recursiveGetScalarFunc(e, functions) + for _, arg := range args { + switch e := arg.(type) { + case *expression.ScalarFunction: + recursiveDeriveScalarFunc(e, functions) + } } default: *functions = append(*functions, f) } } +// DeriveColumn will return a set of Column from arithmetic expression. +func DeriveColumn(function *expression.ScalarFunction) []*expression.Column { + allColumns := []*expression.Column{} + cols := []*expression.Column{} + recursiveDeriveColumn(function, &cols) + allColumns = append(allColumns, cols...) + return allColumns +} + +func recursiveDeriveColumn(f *expression.ScalarFunction, columns *[]*expression.Column) { + switch f.FuncName.L { + case "plus", "minus", "mul", "div": + args := f.GetArgs() + for _, arg := range args { + switch e := arg.(type) { + case *expression.ScalarFunction: + recursiveDeriveColumn(e, columns) + case *expression.Column: + *columns = append(*columns, e) + } + } + } +} + // NewTableInfoSets constructs the table and its sets for forming virtual indices with queryInfo. -func NewTableInfoSets(queryInfo *QueryExprInfo) map[int64]*TableInfoSets { - tblInfoMap := make(map[int64]*TableInfoSets) +func NewTableInfoSets(queryInfo *QueryExprInfo) map[string]*TableInfoSets { + tblInfoMap := make(map[string]*TableInfoSets) for _, ds := range queryInfo.Ds { meta := ds.Table - tblInfoMap[meta.ID] = &TableInfoSets{TblInfo: meta} + tblInfoMap[meta.Name.L] = &TableInfoSets{TblInfo: meta} } // form eq or rg - queryInfo.ScalarFuncExpr = getAllScalarFunc(queryInfo.ScalarFuncExpr) + queryInfo.ScalarFuncExpr = DeriveScalarFunc(queryInfo.ScalarFuncExpr) for _, expr := range queryInfo.ScalarFuncExpr { var flag string switch expr.FuncName.L { @@ -293,10 +328,6 @@ func NewTableInfoSets(queryInfo *QueryExprInfo) map[int64]*TableInfoSets { } } - for _, tblInfoSets := range tblInfoMap { - tblInfoSets.O = removeRepeatedColumnSet(tblInfoSets.O) - } - // form ref for _, expr := range queryInfo.ProjExpr { switch e := expr.(type) { @@ -306,10 +337,19 @@ func NewTableInfoSets(queryInfo *QueryExprInfo) map[int64]*TableInfoSets { } } + // remove duplication + for _, tblInfoSets := range tblInfoMap { + tblInfoSets.Eq = removeRepeatedColumn(tblInfoSets.Eq) + tblInfoSets.O = removeRepeatedColumnSet(tblInfoSets.O) + tblInfoSets.Rg = removeRepeatedColumn(tblInfoSets.Rg) + tblInfoSets.Ref = removeRepeatedColumn(tblInfoSets.Ref) + } + return tblInfoMap } -func addToSet(e *expression.Column, tblInfoMap *map[int64]*TableInfoSets, flag string) { +// add column to Eq, Rg and Ref Set. +func addToSet(e *expression.Column, tblInfoMap *map[string]*TableInfoSets, flag string) { if e.OrigColName.O == "" { return } @@ -330,7 +370,8 @@ func addToSet(e *expression.Column, tblInfoMap *map[int64]*TableInfoSets, flag s } } -func addToOSet(name string, set []model.CIStr, tblInfoMap *map[int64]*TableInfoSets) { +// add column to O Set. +func addToOSet(name string, set []model.CIStr, tblInfoMap *map[string]*TableInfoSets) { for _, tblInfoSets := range *tblInfoMap { if tblInfoSets.TblInfo.Name.L == name { tblInfoSets.O = append(tblInfoSets.O, set) @@ -338,6 +379,7 @@ func addToOSet(name string, set []model.CIStr, tblInfoMap *map[int64]*TableInfoS } } +// categorize columns by their table. func splitColumns(columnExpr []*expression.Column) map[string]*[]model.CIStr { tblNameSet := make(map[string]*[]model.CIStr) for _, expr := range columnExpr { @@ -354,6 +396,7 @@ func splitColumns(columnExpr []*expression.Column) map[string]*[]model.CIStr { return tblNameSet } +// remove duplicates from columns. func removeRepeatedColumn(columns []model.CIStr) (ret []model.CIStr) { ret = make([]model.CIStr, 0) for _, s := range columns { @@ -373,6 +416,7 @@ func removeRepeatedColumn(columns []model.CIStr) (ret []model.CIStr) { return } +//remove duplicates from column set. func removeRepeatedColumnSet(columnSet [][]model.CIStr) (ret [][]model.CIStr) { ret = make([][]model.CIStr, 0) for _, s := range columnSet { From a69939453c18aeafbf646161b0621fdf399ca389 Mon Sep 17 00:00:00 2001 From: IggieWang <1002203581@qq.com> Date: Fri, 9 Aug 2019 20:45:56 +0800 Subject: [PATCH 2/3] finish removing unnecessary virtual indexes according to existing indexes --- executor/compiler.go | 4 +- idxadvisor/idxadvisor.go | 64 ++++++++-------- idxadvisor/index.go | 155 ++++++++++++++++++++++----------------- session/session.go | 2 +- 4 files changed, 122 insertions(+), 103 deletions(-) diff --git a/executor/compiler.go b/executor/compiler.go index 2ca69bec7cb6b..dde12713e029c 100644 --- a/executor/compiler.go +++ b/executor/compiler.go @@ -110,6 +110,7 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind // Construct virtual infoschema dbname := c.Ctx.GetSessionVars().CurrentDB + conn := c.Ctx.GetSessionVars().ConnectionID virtualIS := idxadvisor.GetVirtualInfoschema(infoSchema, dbname, tblNames) // Get virtual final plan. @@ -134,7 +135,8 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind // Get virtual indices with cost. selectedIndices := idxadvisor.FindVirtualIndices(vPhysicalPlan) iwc := idxadvisor.IndicesWithCost{Indices: selectedIndices, Cost: vcost} - idxadvisor.WriteResult(iwc, c.Ctx.GetSessionVars().ConnectionID, cost) + idxadvisor.SaveVirtualIndices(infoSchema, dbname, iwc, conn, cost) + idxadvisor.WriteResult() finalPlan = nil } diff --git a/idxadvisor/idxadvisor.go b/idxadvisor/idxadvisor.go index 5ce209a23d692..30873ae8e2ec2 100644 --- a/idxadvisor/idxadvisor.go +++ b/idxadvisor/idxadvisor.go @@ -44,7 +44,7 @@ type IdxAdvisor struct { // CandidateIdx includes in index and its benefit. type CandidateIdx struct { - Index *model.IndexInfo + Index *IdxAndTblInfo Benefit float64 } @@ -86,10 +86,6 @@ func (ia *IdxAdvisor) StartTask(query string) { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select * from idxadv where a = 1 and c = 3"); err != nil { - fmt.Printf("**********query execution error: %v\n", err) - panic(err) - } if _, err := ia.dbClient.Exec("select c from idxadv where a in (1,3)"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) @@ -147,10 +143,15 @@ func GetVirtualInfoschema(is infoschema.InfoSchema, dbName string, tblNames []st panic(err) } tblInfoCopy := tblCopy.Meta() + idxInfo := tblCopy.Meta().Indices // add virtual indexes to InfoSchemaCopy.TblInfo virtualIndexes := BuildVirtualIndexes(tblInfoCopy, dbname, tblname) - tblInfoCopy.Indices = append(tblInfoCopy.Indices, virtualIndexes...) + for _, virtualIndex := range virtualIndexes { + if !isExistedInTable(virtualIndex, idxInfo) { + tblInfoCopy.Indices = append(tblInfoCopy.Indices, virtualIndex) + } + } } return ISCopy } @@ -168,7 +169,6 @@ func BuildVirtualIndexes(tblInfo *model.TableInfo, dbname, tblname model.CIStr) result = append(result, indexinfo) } return result - } func GenVirtualIndexCols(tblInfo *model.TableInfo, dbname, tblname model.CIStr) [][]*ast.IndexColName { @@ -177,21 +177,19 @@ func GenVirtualIndexCols(tblInfo *model.TableInfo, dbname, tblname model.CIStr) for _, columnInfo := range columnInfos { idxCols := make([]*ast.IndexColName, 1, 1) idxCols[0] = BuildIdxColNameFromColInfo(columnInfo, dbname, tblname) - if !IndexesHasAlreadyExist(idxCols, tblInfo.Indices) { - result = append(result, idxCols) - } + result = append(result, idxCols) } nCols := len(columnInfos) for i := 0; i < nCols; i++ { - for j := 0; j < nCols; j++ { - if i != j { - idxTwoCols := make([]*ast.IndexColName, 2, 2) - idxTwoCols[0] = BuildIdxColNameFromColInfo(columnInfos[i], dbname, tblname) - idxTwoCols[1] = BuildIdxColNameFromColInfo(columnInfos[j], dbname, tblname) - result = append(result, idxTwoCols) - } + for j := 0; j < nCols; j++ { + if i != j { + idxTwoCols := make([]*ast.IndexColName, 2, 2) + idxTwoCols[0] = BuildIdxColNameFromColInfo(columnInfos[i], dbname, tblname) + idxTwoCols[1] = BuildIdxColNameFromColInfo(columnInfos[j], dbname, tblname) + result = append(result, idxTwoCols) } + } } return result @@ -204,31 +202,31 @@ func BuildIdxColNameFromColInfo(colInfo *model.ColumnInfo, dbname, tblname model return idxColName } -// TODO: This is only single col index recomendation -func IndexesHasAlreadyExist(idxCols []*ast.IndexColName, indices []*model.IndexInfo) bool { - primaryKey := findPrimaryKey(indices) - if primaryKey == nil { - return false +func GenIndexCols(index *model.IndexInfo) []model.CIStr { + cols := []model.CIStr{} + for _, idxColumn := range index.Columns { + cols = append(cols, idxColumn.Name) } - return primaryKey.Columns[0].Name.String() == idxCols[0].Column.Name.String() + return cols } -func findPrimaryKey(indices []*model.IndexInfo) *model.IndexInfo { - if len(indices) == 0 { - return nil - } - for _, indexInfo := range indices { - if indexInfo.Primary { - return indexInfo +func isExistedInTable(virtualIndex *model.IndexInfo, indices []*model.IndexInfo) bool { + is := false + virtualIndexCols := GenIndexCols(virtualIndex) + for _, idx := range indices { + indexCols := GenIndexCols(idx) + if reflect.DeepEqual(virtualIndexCols, indexCols) { + is = true + break } } - return nil + return is } func (ia *IdxAdvisor) addCandidate(virtualIdx *CandidateIdx) { in := false for _, candidateIdx := range ia.Candidate_idx { - if reflect.DeepEqual(candidateIdx.Index.Columns, virtualIdx.Index.Columns) { + if reflect.DeepEqual(candidateIdx.Index.Index.Columns, virtualIdx.Index.Index.Columns) && reflect.DeepEqual(candidateIdx.Index.Table.Name, virtualIdx.Index.Table.Name) { candidateIdx.Benefit += virtualIdx.Benefit in = true break @@ -238,4 +236,4 @@ func (ia *IdxAdvisor) addCandidate(virtualIdx *CandidateIdx) { if !in { ia.Candidate_idx = append(ia.Candidate_idx, virtualIdx) } -} \ No newline at end of file +} diff --git a/idxadvisor/index.go b/idxadvisor/index.go index b00d1d17bab31..6602623e1b998 100644 --- a/idxadvisor/index.go +++ b/idxadvisor/index.go @@ -1,99 +1,118 @@ package idxadvisor import ( - "fmt" + "fmt" "github.com/pingcap/parser/model" + "github.com/pingcap/tidb/infoschema" plannercore "github.com/pingcap/tidb/planner/core" ) // IndicesWithCost includes in indices and their physical plan cost. type IndicesWithCost struct { - Indices []*model.IndexInfo - Cost float64 + Indices []*IdxAndTblInfo + Cost float64 +} + +// IdxAndTblInfo provides a IndexInfo and its TableInfo. +type IdxAndTblInfo struct { + Index *model.IndexInfo + Table *model.TableInfo } // Deviation is a deviation standard for comparing benefit. -const Deviation = 0.01 +const Deviation = 0.01 // FindVirtualIndices finds the final physical plan's indices. -func FindVirtualIndices(plan plannercore.PhysicalPlan) []*model.IndexInfo { - indices := []*model.IndexInfo{} - travelPhysicalPlan(plan, &indices) - return indices +func FindVirtualIndices(plan plannercore.PhysicalPlan) []*IdxAndTblInfo { + indices := []*IdxAndTblInfo{} + travelPhysicalPlan(plan, &indices) + return indices } -func travelPhysicalPlan(plan plannercore.PhysicalPlan, indices *[]*model.IndexInfo) { - if plan == nil { - return - } +func travelPhysicalPlan(plan plannercore.PhysicalPlan, indices *[]*IdxAndTblInfo) { + if plan == nil { + return + } - switch t := plan.(type) { - case *plannercore.PhysicalIndexReader: - for _, idxPlan := range t.IndexPlans { - switch x := idxPlan.(type) { - case *plannercore.PhysicalIndexScan: - x.Index.Table = x.Table.Name - *indices = append(*indices, x.Index) - } - } - case *plannercore.PhysicalIndexLookUpReader: - for _, idxPlan := range t.IndexPlans { - switch x := idxPlan.(type) { - case *plannercore.PhysicalIndexScan: - x.Index.Table = x.Table.Name - *indices = append(*indices, x.Index) - } + switch t := plan.(type) { + case *plannercore.PhysicalIndexReader: + for _, idxPlan := range t.IndexPlans { + switch x := idxPlan.(type) { + case *plannercore.PhysicalIndexScan: + x.Index.Table = x.Table.Name + index := &IdxAndTblInfo{Index: x.Index, Table: x.Table} + *indices = append(*indices, index) + } + } + case *plannercore.PhysicalIndexLookUpReader: + for _, idxPlan := range t.IndexPlans { + switch x := idxPlan.(type) { + case *plannercore.PhysicalIndexScan: + x.Index.Table = x.Table.Name + index := &IdxAndTblInfo{Index: x.Index, Table: x.Table} + *indices = append(*indices, index) + } } } - for _, p := range plan.Children() { - travelPhysicalPlan(p, indices) - } + for _, p := range plan.Children() { + travelPhysicalPlan(p, indices) + } } -// WriteResult save virtual indices and cost and print them. -func WriteResult(iwc IndicesWithCost, connectionID uint64, origCost float64) { +// SaveVirtualIndices saves virtual indices and their benefit. +func SaveVirtualIndices(is infoschema.InfoSchema, dbname string, iwc IndicesWithCost, connectionID uint64, origCost float64) { fmt.Printf("***Connection id %d, virtual physical plan's cost: %f, original cost: %f, \n***Virtual index:", connectionID, iwc.Cost, origCost) - benefit := origCost - iwc.Cost - if benefit / origCost < Deviation { - fmt.Println("needn't create index") - return - } + benefit := (origCost - iwc.Cost) / origCost + if benefit < Deviation { + fmt.Println("needn't create index") + return + } - if _, ok := registeredIdxAdv[connectionID]; !ok { - registeredIdxAdv[connectionID] = new(IdxAdvisor) - } + if _, ok := registeredIdxAdv[connectionID]; !ok { + registeredIdxAdv[connectionID] = new(IdxAdvisor) + } - indices := iwc.Indices - ia := registeredIdxAdv[connectionID] - if len(indices) != 0 { - for _, idx := range indices { - fmt.Printf("(") - tblName := idx.Table.L - for _, col := range idx.Columns { - idxCol := tblName + "." + col.Name.L - fmt.Printf("%s ", idxCol) - } - fmt.Printf("\b) ") - - candidateIdx := &CandidateIdx{Index: idx, - Benefit: benefit, - } - ia.addCandidate(candidateIdx) + indices := iwc.Indices + ia := registeredIdxAdv[connectionID] + for _, idx := range indices { + table, err := is.TableByName(model.NewCIStr(dbname), idx.Table.Name) + if err != nil { + panic(err) } - } - fmt.Println("\n----------------Result----------------") - for _, v := range registeredIdxAdv { - for _, i := range v.Candidate_idx { - fmt.Printf("%s: ", i.Index.Table.L) - fmt.Printf("(") - for _, col := range i.Index.Columns { - fmt.Printf("%s ", col.Name.L) - } - fmt.Printf("\b) %f \n", i.Benefit) + if isExistedInTable(idx.Index, table.Meta().Indices) { + continue + } + + candidateIdx := &CandidateIdx{Index: idx, + Benefit: benefit, + } + ia.addCandidate(candidateIdx) + + fmt.Printf(" (") + tblName := idx.Index.Table.L + for _, col := range idx.Index.Columns { + idxCol := tblName + "." + col.Name.L + fmt.Printf("%s ", idxCol) } - fmt.Println("--------------------------------------") + fmt.Printf("\b)\n") } +} + +// WriteResult prints virtual indices and their benefit. +func WriteResult() { + fmt.Println("----------------------Result----------------------") + for _, v := range registeredIdxAdv { + for _, i := range v.Candidate_idx { + fmt.Printf("%s: ", i.Index.Index.Table.L) + fmt.Printf("(") + for _, col := range i.Index.Index.Columns { + fmt.Printf("%s ", col.Name.L) + } + fmt.Printf("\b) %f \n", i.Benefit) + } + fmt.Println("-----------------------------------------------") + } } \ No newline at end of file diff --git a/session/session.go b/session/session.go index 9c2467b9f69e8..692f64a928162 100644 --- a/session/session.go +++ b/session/session.go @@ -1044,7 +1044,7 @@ func (s *session) execute(ctx context.Context, sql string) (recordSets []sqlexec return nil, err } - if strings.Contains(sql, "IDXADV") || strings.Contains(sql, "t1") { + if strings.Contains(sql, "idxadv") || strings.Contains(sql, "t1") { fmt.Printf("###############%v##############\n", sql) } charsetInfo, collation := s.sessionVars.GetCharsetInfo() From 5538b4db152193984a96cf3ce3679b28aa19cf52 Mon Sep 17 00:00:00 2001 From: IggieWang <1002203581@qq.com> Date: Sat, 10 Aug 2019 17:34:31 +0800 Subject: [PATCH 3/3] complete multi columns index advisor --- executor/compiler.go | 12 +---- idxadvisor/idxadvisor.go | 94 +++++++++++++++++++++++++++++++---- idxadvisor/index.go | 33 +++++++++--- planner/core/index_advisor.go | 18 +++---- 4 files changed, 121 insertions(+), 36 deletions(-) diff --git a/executor/compiler.go b/executor/compiler.go index dde12713e029c..1e319f9f4d4ca 100644 --- a/executor/compiler.go +++ b/executor/compiler.go @@ -92,15 +92,6 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind queryInfo := plannercore.NewQueryExprInfo(p) m := plannercore.NewTableInfoSets(queryInfo) - tblNames := []string{} - for k, v := range m { - tblNames = append(tblNames, k) - fmt.Println(k) - fmt.Println(v.Eq) - fmt.Println(v.O) - fmt.Println(v.Rg) - fmt.Println(v.Ref) - } // Get final plan cost. cost, err := plannercore.GetTaskCost(finalPlan) @@ -111,7 +102,7 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind // Construct virtual infoschema dbname := c.Ctx.GetSessionVars().CurrentDB conn := c.Ctx.GetSessionVars().ConnectionID - virtualIS := idxadvisor.GetVirtualInfoschema(infoSchema, dbname, tblNames) + virtualIS := idxadvisor.GetVirtualInfoschema(infoSchema, dbname, m) // Get virtual final plan. vFinalPlan, err := planner.Optimize(ctx, c.Ctx, stmtNode, virtualIS) @@ -136,7 +127,6 @@ func (c *Compiler) compile(ctx context.Context, stmtNode ast.StmtNode, skipBind selectedIndices := idxadvisor.FindVirtualIndices(vPhysicalPlan) iwc := idxadvisor.IndicesWithCost{Indices: selectedIndices, Cost: vcost} idxadvisor.SaveVirtualIndices(infoSchema, dbname, iwc, conn, cost) - idxadvisor.WriteResult() finalPlan = nil } diff --git a/idxadvisor/idxadvisor.go b/idxadvisor/idxadvisor.go index 30873ae8e2ec2..875d9b9b46a64 100644 --- a/idxadvisor/idxadvisor.go +++ b/idxadvisor/idxadvisor.go @@ -11,6 +11,7 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/tidb/ddl" "github.com/pingcap/tidb/infoschema" + plannercore "github.com/pingcap/tidb/planner/core" ) type idxAdvPool []*IdxAdvisor @@ -86,11 +87,11 @@ func (ia *IdxAdvisor) StartTask(query string) { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select c from idxadv where a in (1,3)"); err != nil { + if _, err := ia.dbClient.Exec("select a from idxadv where c in (1,3)"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } - if _, err := ia.dbClient.Exec("select * from idxadv where a = 1 and c = 3 or b = 1"); err != nil { + if _, err := ia.dbClient.Exec("select * from idxadv where c = 3 and a = 1 or b = 1"); err != nil { fmt.Printf("**********query execution error: %v\n", err) panic(err) } @@ -115,6 +116,7 @@ func (ia *IdxAdvisor) StartTask(query string) { panic(err) } } + WritFinaleResult() } /* // StartTask start handling queries in idxadv mode after session variable tidb_enable_index_advisor has been set @@ -130,13 +132,13 @@ func (ia *IdxAdvisor) StartTask(query string) { } } */ -func GetVirtualInfoschema(is infoschema.InfoSchema, dbName string, tblNames []string) infoschema.InfoSchema { +func GetVirtualInfoschema(is infoschema.InfoSchema, dbName string, tableInfoSets map[string]*plannercore.TableInfoSets) infoschema.InfoSchema { // Get a copy of InfoSchema dbInfos := is.Clone() ISCopy := infoschema.MockInfoSchemaWithDBInfos(dbInfos, is.SchemaMetaVersion()) dbname := model.NewCIStr(dbName) - for _, tblname := range tblNames { + for tblname, tblInfoSets := range tableInfoSets { tblname := model.NewCIStr(tblname) tblCopy, err := ISCopy.TableByName(dbname, tblname) if err != nil { @@ -146,7 +148,7 @@ func GetVirtualInfoschema(is infoschema.InfoSchema, dbName string, tblNames []st idxInfo := tblCopy.Meta().Indices // add virtual indexes to InfoSchemaCopy.TblInfo - virtualIndexes := BuildVirtualIndexes(tblInfoCopy, dbname, tblname) + virtualIndexes := BuildVirtualIndexes(tblInfoCopy, dbname, tblname, tblInfoSets) for _, virtualIndex := range virtualIndexes { if !isExistedInTable(virtualIndex, idxInfo) { tblInfoCopy.Indices = append(tblInfoCopy.Indices, virtualIndex) @@ -156,14 +158,14 @@ func GetVirtualInfoschema(is infoschema.InfoSchema, dbName string, tblNames []st return ISCopy } -func BuildVirtualIndexes(tblInfo *model.TableInfo, dbname, tblname model.CIStr) []*model.IndexInfo { - indexes := GenVirtualIndexCols(tblInfo, dbname, tblname) +func BuildVirtualIndexes(tblInfo *model.TableInfo, dbname, tblname model.CIStr, tblInfoSets *plannercore.TableInfoSets) []*model.IndexInfo { + indexes := GenVirtualIndexCols(tblInfo, dbname, tblname, tblInfoSets) result := make([]*model.IndexInfo, 0) for i, idxColNames := range indexes { indexName := model.NewCIStr("vIndex" + string(i)) indexinfo, err := ddl.BuildIndexInfo(tblInfo, indexName, idxColNames, model.StatePublic) if err != nil { - fmt.Printf("***************BuildVirtualIndexes error: %v!\n", err) + fmt.Printf("BuildVirtualIndexes error: %v!\n", err) panic(err) } result = append(result, indexinfo) @@ -171,15 +173,18 @@ func BuildVirtualIndexes(tblInfo *model.TableInfo, dbname, tblname model.CIStr) return result } -func GenVirtualIndexCols(tblInfo *model.TableInfo, dbname, tblname model.CIStr) [][]*ast.IndexColName { +func GenVirtualIndexCols(tblInfo *model.TableInfo, dbname, tblname model.CIStr, tblInfoSets *plannercore.TableInfoSets) [][]*ast.IndexColName { columnInfos := tblInfo.Columns var result [][]*ast.IndexColName + + // one column for _, columnInfo := range columnInfos { idxCols := make([]*ast.IndexColName, 1, 1) idxCols[0] = BuildIdxColNameFromColInfo(columnInfo, dbname, tblname) result = append(result, idxCols) } + // two columns nCols := len(columnInfos) for i := 0; i < nCols; i++ { for j := 0; j < nCols; j++ { @@ -192,9 +197,80 @@ func GenVirtualIndexCols(tblInfo *model.TableInfo, dbname, tblname model.CIStr) } } + // multi columns + candidateCols := [][]model.CIStr{} + eq := tblInfoSets.Eq + o := tblInfoSets.O + rg := tblInfoSets.Rg + ref := tblInfoSets.Ref + + // EQ + O + RANGE + REF + cols := [][]model.CIStr{} + for i, oCols := range o { + cols = append(cols, []model.CIStr{}) + addToCandidateCols(eq, &cols[i], &candidateCols) + addToCandidateCols(oCols, &cols[i], &candidateCols) + addToCandidateCols(rg, &cols[i], &candidateCols) + addToCandidateCols(ref, &cols[i], &candidateCols) + } + if len(cols) == 0 { + cols = append(cols, []model.CIStr{}) + addToCandidateCols(eq, &cols[0], &candidateCols) + addToCandidateCols(rg, &cols[0], &candidateCols) + addToCandidateCols(ref, &cols[0], &candidateCols) + } + + // O + EQ + RANGE + REF + cols = cols[:0] + for i, oCols := range o { + cols = append(cols, []model.CIStr{}) + addToCandidateCols(oCols, &cols[i], &candidateCols) + addToCandidateCols(eq, &cols[i], &candidateCols) + addToCandidateCols(rg, &cols[i], &candidateCols) + addToCandidateCols(ref, &cols[i], &candidateCols) + } + if len(cols) == 0 { + cols = append(cols, []model.CIStr{}) + addToCandidateCols(eq, &cols[0], &candidateCols) + addToCandidateCols(rg, &cols[0], &candidateCols) + addToCandidateCols(ref, &cols[0], &candidateCols) + } + + candidateCols = plannercore.RemoveRepeatedColumnSet(candidateCols) + if len(candidateCols) > 0 { + fmt.Printf("table %s multi candidate index: ", tblname) + fmt.Println(candidateCols) + } + for _, candidateColumns := range candidateCols { + idxCols := make([]*ast.IndexColName, len(candidateColumns), len(candidateColumns)) + for i, column := range candidateColumns { + columnInfo := new(model.ColumnInfo) + for _, tmpColumn := range columnInfos { + if tmpColumn.Name.L == column.L { + columnInfo = tmpColumn + break + } + } + idxCols[i] = BuildIdxColNameFromColInfo(columnInfo, dbname, tblname) + } + result = append(result, idxCols) + } + return result } +func addToCandidateCols(readyCols []model.CIStr, cols *[]model.CIStr, candidateCols *[][]model.CIStr) { + if len(readyCols) == 0 { + return + } + + *cols = append(*cols, readyCols...) + *cols = plannercore.RemoveRepeatedColumn(*cols) + if len(*cols) > 2 { + *candidateCols = append(*candidateCols, *cols) + } +} + func BuildIdxColNameFromColInfo(colInfo *model.ColumnInfo, dbname, tblname model.CIStr) *ast.IndexColName { idxColName := &ast.IndexColName{} idxColName.Column = &ast.ColumnName{Schema: dbname, Table: tblname, Name: colInfo.Name} diff --git a/idxadvisor/index.go b/idxadvisor/index.go index 6602623e1b998..28c2b5dc0e74b 100644 --- a/idxadvisor/index.go +++ b/idxadvisor/index.go @@ -20,8 +20,10 @@ type IdxAndTblInfo struct { Table *model.TableInfo } -// Deviation is a deviation standard for comparing benefit. -const Deviation = 0.01 +const( + // Deviation is a deviation standard for comparing benefit. + Deviation = 0.01 +) // FindVirtualIndices finds the final physical plan's indices. func FindVirtualIndices(plan plannercore.PhysicalPlan) []*IdxAndTblInfo { @@ -63,9 +65,9 @@ func travelPhysicalPlan(plan plannercore.PhysicalPlan, indices *[]*IdxAndTblInfo // SaveVirtualIndices saves virtual indices and their benefit. func SaveVirtualIndices(is infoschema.InfoSchema, dbname string, iwc IndicesWithCost, connectionID uint64, origCost float64) { - fmt.Printf("***Connection id %d, virtual physical plan's cost: %f, original cost: %f, \n***Virtual index:", connectionID, iwc.Cost, origCost) - benefit := (origCost - iwc.Cost) / origCost - if benefit < Deviation { + fmt.Printf("***Connection id %d, virtual physical plan's cost: %f, original cost: %f \n", connectionID, iwc.Cost, origCost) + benefit := origCost - iwc.Cost + if benefit / origCost < Deviation { fmt.Println("needn't create index") return } @@ -76,6 +78,7 @@ func SaveVirtualIndices(is infoschema.InfoSchema, dbname string, iwc IndicesWith indices := iwc.Indices ia := registeredIdxAdv[connectionID] + fmt.Printf("***Index:") for _, idx := range indices { table, err := is.TableByName(model.NewCIStr(dbname), idx.Table.Name) if err != nil { @@ -83,7 +86,7 @@ func SaveVirtualIndices(is infoschema.InfoSchema, dbname string, iwc IndicesWith } if isExistedInTable(idx.Index, table.Meta().Indices) { - continue + continue } candidateIdx := &CandidateIdx{Index: idx, @@ -111,7 +114,23 @@ func WriteResult() { for _, col := range i.Index.Index.Columns { fmt.Printf("%s ", col.Name.L) } - fmt.Printf("\b) %f \n", i.Benefit) + fmt.Printf("\b) %f\n", i.Benefit) + } + fmt.Println("-----------------------------------------------") + } +} + +// WritFinaleResult saves virtual indices and their benefit. +func WritFinaleResult() { + fmt.Println("----------------------Result----------------------") + for _, v := range registeredIdxAdv { + for _, i := range v.Candidate_idx { + fmt.Printf("%s: ", i.Index.Index.Table.L) + fmt.Printf("(") + for _, col := range i.Index.Index.Columns { + fmt.Printf("%s ", col.Name.L) + } + fmt.Printf("\b) %f\n", i.Benefit) } fmt.Println("-----------------------------------------------") } diff --git a/planner/core/index_advisor.go b/planner/core/index_advisor.go index 08dde9caaa103..191f6c19f6952 100644 --- a/planner/core/index_advisor.go +++ b/planner/core/index_advisor.go @@ -339,10 +339,10 @@ func NewTableInfoSets(queryInfo *QueryExprInfo) map[string]*TableInfoSets { // remove duplication for _, tblInfoSets := range tblInfoMap { - tblInfoSets.Eq = removeRepeatedColumn(tblInfoSets.Eq) - tblInfoSets.O = removeRepeatedColumnSet(tblInfoSets.O) - tblInfoSets.Rg = removeRepeatedColumn(tblInfoSets.Rg) - tblInfoSets.Ref = removeRepeatedColumn(tblInfoSets.Ref) + tblInfoSets.Eq = RemoveRepeatedColumn(tblInfoSets.Eq) + tblInfoSets.O = RemoveRepeatedColumnSet(tblInfoSets.O) + tblInfoSets.Rg = RemoveRepeatedColumn(tblInfoSets.Rg) + tblInfoSets.Ref = RemoveRepeatedColumn(tblInfoSets.Ref) } return tblInfoMap @@ -390,14 +390,14 @@ func splitColumns(columnExpr []*expression.Column) map[string]*[]model.CIStr { } for _, columns := range tblNameSet { - *columns = removeRepeatedColumn(*columns) + *columns = RemoveRepeatedColumn(*columns) } return tblNameSet } -// remove duplicates from columns. -func removeRepeatedColumn(columns []model.CIStr) (ret []model.CIStr) { +// RemoveRepeatedColumn removes duplicates from columns. +func RemoveRepeatedColumn(columns []model.CIStr) (ret []model.CIStr) { ret = make([]model.CIStr, 0) for _, s := range columns { if len(ret) == 0 { @@ -416,8 +416,8 @@ func removeRepeatedColumn(columns []model.CIStr) (ret []model.CIStr) { return } -//remove duplicates from column set. -func removeRepeatedColumnSet(columnSet [][]model.CIStr) (ret [][]model.CIStr) { +// RemoveRepeatedColumnSet removes duplicates from column set. +func RemoveRepeatedColumnSet(columnSet [][]model.CIStr) (ret [][]model.CIStr) { ret = make([][]model.CIStr, 0) for _, s := range columnSet { if len(ret) == 0 {