Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: support automatically create sql baselines #12434

Merged
merged 11 commits into from
Oct 17, 2019
27 changes: 25 additions & 2 deletions bindinfo/bind_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ func (s *testSuite) TearDownTest(c *C) {
}

func (s *testSuite) cleanBindingEnv(tk *testkit.TestKit) {
tk.MustExec("drop table if exists mysql.bind_info")
tk.MustExec(session.CreateBindInfoTable)
tk.MustExec("truncate table mysql.bind_info")
s.domain.BindHandle().Clear()
}

func (s *testSuite) TestBindParse(c *C) {
Expand Down Expand Up @@ -486,3 +486,26 @@ func (s *testSuite) TestPreparedStmt(c *C) {
tk.MustExec("execute stmt1")
c.Assert(len(tk.Se.GetSessionVars().StmtCtx.IndexNames), Equals, 0)
}

func (s *testSuite) TestCapturePlanBaseline(c *C) {
tk := testkit.NewTestKit(c, s.store)
s.cleanBindingEnv(tk)
tk.MustExec("set @@tidb_enable_stmt_summary = on")
tk.MustExec(" set @@tidb_capture_plan_baselines = on")
defer func() {
tk.MustExec("set @@tidb_enable_stmt_summary = off")
tk.MustExec(" set @@tidb_capture_plan_baselines = off")
}()
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int)")
s.domain.BindHandle().CaptureBaselines(s.domain.InfoSchema())
tk.MustQuery("show global bindings").Check(testkit.Rows())
tk.MustExec("select * from t")
tk.MustExec("select * from t")
s.domain.BindHandle().CaptureBaselines(s.domain.InfoSchema())
rows := tk.MustQuery("show global bindings").Rows()
c.Assert(len(rows), Equals, 1)
c.Assert(rows[0][0], Equals, "select * from t")
c.Assert(rows[0][1], Equals, "select /*+ USE_INDEX(@`sel_1` `t` )*/ * from t")
}
57 changes: 56 additions & 1 deletion bindinfo/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,24 @@ package bindinfo
import (
"context"
"fmt"
"go.uber.org/zap"
"strings"
"sync"
"sync/atomic"
"time"

"github.com/pingcap/parser"
"github.com/pingcap/parser/ast"
"github.com/pingcap/parser/mysql"
"github.com/pingcap/parser/terror"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/store/tikv/oracle"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/sqlexec"
"github.com/pingcap/tidb/util/stmtsummary"
"go.uber.org/zap"
)

// BindHandle is used to handle all global sql bind operations.
Expand Down Expand Up @@ -70,6 +74,8 @@ type BindHandle struct {
}

lastUpdateTime types.Time

parser4Baseline *parser.Parser
}

// Lease influences the duration of loading bind info and handling invalid bind.
Expand All @@ -86,6 +92,7 @@ func NewBindHandle(ctx sessionctx.Context) *BindHandle {
handle.sctx.Context = ctx
handle.bindInfo.Value.Store(make(cache, 32))
handle.bindInfo.parser = parser.New()
handle.parser4Baseline = parser.New()
handle.invalidBindRecordMap.Value.Store(make(map[string]*invalidBindRecordMap))
return handle
}
Expand Down Expand Up @@ -444,3 +451,51 @@ func (h *BindHandle) logicalDeleteBindInfoSQL(normdOrigSQL, db string, updateTs
normdOrigSQL,
db)
}

// GenHintsFromSQL is used to generate hints from SQL.
// It is used to avoid the circle dependence with planner package.
var GenHintsFromSQL func(ctx context.Context, sctx sessionctx.Context, node ast.Node, is infoschema.InfoSchema) (string, error)

// CaptureBaselines is used to automatically capture plan baselines.
func (h *BindHandle) CaptureBaselines(is infoschema.InfoSchema) {
schemas, sqls := stmtsummary.StmtSummaryByDigestMap.GetMoreThanOnceSelect()
for i := range sqls {
stmt, err := h.parser4Baseline.ParseOneStmt(sqls[i], "", "")
if err != nil {
logutil.BgLogger().Debug("parse SQL failed", zap.String("SQL", sqls[i]), zap.Error(err))
continue
}
normalizedSQL, digiest := parser.NormalizeDigest(sqls[i])
if r := h.GetBindRecord(digiest, normalizedSQL, schemas[i]); r != nil && r.Status == Using {
continue
}
h.sctx.Lock()
err = h.sctx.RefreshTxnCtx(context.TODO())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this function call?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will create a new txn and the txn maybe used by the optimizer when generating hints, since it may execute some sub queries, and without it, the test will panic.

var hints string
if err == nil {
h.sctx.GetSessionVars().CurrentDB = schemas[i]
hints, err = GenHintsFromSQL(context.TODO(), h.sctx.Context, stmt, is)
}
h.sctx.Unlock()
if err != nil {
logutil.BgLogger().Info("generate hints failed", zap.String("SQL", sqls[i]), zap.Error(err))
continue
}
// We can skip simple query like point get.
if hints == "" {
continue
}
bindsql := strings.Replace(normalizedSQL, "select", fmt.Sprintf("select /*+ %s*/", hints), 1)
err = h.AddBindRecord(&BindRecord{OriginalSQL: sqls[i], BindSQL: bindsql, Db: schemas[i], Status: Using})
if err != nil {
logutil.BgLogger().Info("capture baseline failed", zap.String("SQL", sqls[i]), zap.Error(err))
}
}
}

// Clear resets the bind handle. It is used for test.
func (h *BindHandle) Clear() {
h.bindInfo.Store(make(cache))
h.invalidBindRecordMap.Store(make(map[string]*invalidBindRecordMap))
h.lastUpdateTime = types.ZeroTimestamp
}
25 changes: 17 additions & 8 deletions domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -827,25 +827,34 @@ func (do *Domain) LoadBindInfoLoop(ctx sessionctx.Context) error {
return err
}

do.loadBindInfoLoop()
do.globalBindHandleWorkerLoop()
do.handleInvalidBindTaskLoop()
return nil
}

func (do *Domain) loadBindInfoLoop() {
func (do *Domain) globalBindHandleWorkerLoop() {
do.wg.Add(1)
go func() {
defer do.wg.Done()
defer recoverInDomain("loadBindInfoLoop", false)
defer recoverInDomain("globalBindHandleWorkerLoop", false)
loadTicker := time.NewTicker(bindinfo.Lease)
defer loadTicker.Stop()
captureBaselineTicker := time.NewTicker(bindinfo.Lease)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just use a single ticker for 2 actions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, we can.

defer captureBaselineTicker.Stop()
lzmhhh123 marked this conversation as resolved.
Show resolved Hide resolved
for {
select {
case <-do.exit:
return
case <-time.After(bindinfo.Lease):
}
err := do.bindHandle.Update(false)
if err != nil {
logutil.BgLogger().Error("update bindinfo failed", zap.Error(err))
case <-loadTicker.C:
err := do.bindHandle.Update(false)
if err != nil {
logutil.BgLogger().Error("update bindinfo failed", zap.Error(err))
}
case <-captureBaselineTicker.C:
if !variable.TiDBOptOn(variable.CapturePlanBaseline.GetVal()) {
continue
}
do.bindHandle.CaptureBaselines(do.InfoSchema())
}
}
}()
Expand Down
28 changes: 13 additions & 15 deletions domain/global_vars_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func (gvc *GlobalVariableCache) Update(rows []chunk.Row, fields []*ast.ResultFie
gvc.fields = fields
gvc.Unlock()

checkEnableStmtSummary(rows, fields)
checkEnableServerGlobalVar(rows)
}

// Get gets the global variables from cache.
Expand All @@ -67,24 +67,22 @@ func (gvc *GlobalVariableCache) Disable() {
return
}

// checkEnableStmtSummary looks for TiDBEnableStmtSummary and notifies StmtSummary
func checkEnableStmtSummary(rows []chunk.Row, fields []*ast.ResultField) {
// checkEnableServerGlobalVar processes variables that acts in server and global level.
func checkEnableServerGlobalVar(rows []chunk.Row) {
for _, row := range rows {
varName := row.GetString(0)
if varName == variable.TiDBEnableStmtSummary {
varVal := row.GetDatum(1, &fields[1].Column.FieldType)

switch row.GetString(0) {
case variable.TiDBEnableStmtSummary:
sVal := ""
if !varVal.IsNull() {
var err error
sVal, err = varVal.ToString()
if err != nil {
return
}
if !row.IsNull(1) {
sVal = row.GetString(1)
}

stmtsummary.StmtSummaryByDigestMap.SetEnabled(sVal, false)
break
case variable.TiDBCapturePlanBaseline:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we add dependency check between these 2 variables? if not, would there be possible panics when accessing structures of statements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it won't panic because the StmtSummaryByDigestMap is initilized as a global variable, so it is always valid.

sVal := ""
if !row.IsNull(1) {
sVal = row.GetString(1)
}
variable.CapturePlanBaseline.Set(sVal, false)
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions executor/set.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ func (e *SetExecutor) setSysVariable(name string, v *expression.VarAssignment) e

if name == variable.TiDBEnableStmtSummary {
stmtsummary.StmtSummaryByDigestMap.SetEnabled(valStr, !v.IsGlobal)
} else if name == variable.TiDBCapturePlanBaseline {
variable.CapturePlanBaseline.Set(valStr, !v.IsGlobal)
}

return nil
Expand Down
14 changes: 14 additions & 0 deletions planner/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,20 @@ func OptimizeExecStmt(ctx context.Context, sctx sessionctx.Context,
return nil, err
}

// GenHintsFromSQL is used to generate hints from SQL and inject the hints into original SQL.
func GenHintsFromSQL(ctx context.Context, sctx sessionctx.Context, node ast.Node, is infoschema.InfoSchema) (string, error) {
err := plannercore.Preprocess(sctx, node, is)
if err != nil {
return "", err
}
p, err := Optimize(ctx, sctx, node, is)
if err != nil {
return "", err
}
return plannercore.GenHintsFromPhysicalPlan(p), nil
}

func init() {
plannercore.OptimizeAstNode = Optimize
bindinfo.GenHintsFromSQL = GenHintsFromSQL
}
1 change: 1 addition & 0 deletions session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1835,6 +1835,7 @@ var builtinGlobalVariable = []string{
variable.TiDBTxnMode,
variable.TiDBEnableStmtSummary,
variable.TiDBMaxDeltaSchemaCount,
variable.TiDBCapturePlanBaseline,
}

var (
Expand Down
1 change: 1 addition & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ var defaultSysVars = []*SysVar{
{ScopeSession, TiDBReplicaRead, "leader"},
{ScopeSession, TiDBAllowRemoveAutoInc, BoolToIntStr(DefTiDBAllowRemoveAutoInc)},
{ScopeGlobal | ScopeSession, TiDBEnableStmtSummary, "0"},
{ScopeGlobal | ScopeSession, TiDBCapturePlanBaseline, "0"},
}

// SynonymsSysVariables is synonyms of system variables.
Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@ const (

// TiDBEnableStmtSummary indicates whether the statement summary is enabled.
TiDBEnableStmtSummary = "tidb_enable_stmt_summary"

// TiDBCapturePlanBaseline indicates whether the capture of plan baselines is enabled.
TiDBCapturePlanBaseline = "tidb_capture_plan_baselines"
)

// Default TiDB system variable values.
Expand Down Expand Up @@ -416,4 +419,5 @@ var (
MaxOfMaxAllowedPacket uint64 = 1073741824
ExpensiveQueryTimeThreshold uint64 = DefTiDBExpensiveQueryTimeThreshold
MinExpensiveQueryTimeThreshold uint64 = 10 //10s
CapturePlanBaseline = serverGlobalVariable{globalVal: "0"}
)
31 changes: 30 additions & 1 deletion sessionctx/variable/varsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"math"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"

Expand Down Expand Up @@ -611,7 +612,7 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string,
return "off", nil
}
return value, ErrWrongValueForVar.GenWithStackByArgs(name, value)
case TiDBEnableStmtSummary:
case TiDBEnableStmtSummary, TiDBCapturePlanBaseline:
switch {
case strings.EqualFold(value, "ON") || value == "1":
return "1", nil
Expand Down Expand Up @@ -737,3 +738,31 @@ func setAnalyzeTime(s *SessionVars, val string) (string, error) {
}
return t.Format(AnalyzeFullTimeFormat), nil
}

// serverGlobalVariable is used to handle variables that acts in server and global scope.
type serverGlobalVariable struct {
sync.Mutex
serverVal string
globalVal string
}

// Set sets the value according to variable scope.
func (v *serverGlobalVariable) Set(val string, isServer bool) {
v.Lock()
if isServer {
v.serverVal = val
} else {
v.globalVal = val
}
v.Unlock()
}

// GetVal gets the value.
func (v *serverGlobalVariable) GetVal() string {
v.Lock()
Copy link
Contributor

@lzmhhh123 lzmhhh123 Oct 16, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just set read lock here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is not necessary because we do not expect the server global variables be set or read very frequent, a mutex is enough now.

defer v.Unlock()
if v.serverVal != "" {
return v.serverVal
}
return v.globalVal
}
20 changes: 20 additions & 0 deletions util/stmtsummary/statement_summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,26 @@ func (ssMap *stmtSummaryByDigestMap) ToDatum() [][]types.Datum {
return rows
}

// GetMoreThanOnceSelect gets select SQLs that occurred more than once.
func (ssMap *stmtSummaryByDigestMap) GetMoreThanOnceSelect() ([]string, []string) {
ssMap.Lock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a mutex, not rwmutex.

values := ssMap.summaryMap.Values()
ssMap.Unlock()

schemas := make([]string, 0, len(values))
sqls := make([]string, 0, len(values))
for _, value := range values {
summary := value.(*stmtSummaryByDigest)
summary.Lock()
if strings.HasPrefix(summary.normalizedSQL, "select") && summary.execCount > 1 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, can we make the SQL occurrent number under control instead of 1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we can, but we can do it when only when it is necessary because doing it would require another session variables, for now, it is enough.

schemas = append(schemas, summary.schemaName)
sqls = append(sqls, summary.sampleSQL)
}
summary.Unlock()
}
return schemas, sqls
}

// SetEnabled enables or disables statement summary in global(cluster) or session(server) scope.
func (ssMap *stmtSummaryByDigestMap) SetEnabled(value string, inSession bool) {
value = ssMap.normalizeEnableValue(value)
Expand Down