From 1fc91d46f8e00aff91cb11fc66bd79be7b47a401 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 29 Feb 2024 20:08:09 +0800 Subject: [PATCH] planner: introduce a new fuzzy binding cache (#51401) ref pingcap/tidb#51347 --- pkg/bindinfo/binding_cache.go | 142 ++++++++++++++++++++++++++++- pkg/bindinfo/binding_cache_test.go | 57 ++++++++++++ pkg/bindinfo/global_handle.go | 81 +++------------- 3 files changed, 206 insertions(+), 74 deletions(-) diff --git a/pkg/bindinfo/binding_cache.go b/pkg/bindinfo/binding_cache.go index 87ea0b77c7f98..341d1c0e22aee 100644 --- a/pkg/bindinfo/binding_cache.go +++ b/pkg/bindinfo/binding_cache.go @@ -18,6 +18,10 @@ import ( "errors" "sync" + "github.com/pingcap/tidb/pkg/bindinfo/norm" + "github.com/pingcap/tidb/pkg/parser" + "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/util/hack" "github.com/pingcap/tidb/pkg/util/kvcache" @@ -25,6 +29,136 @@ import ( "github.com/pingcap/tidb/pkg/util/memory" ) +// FuzzyBindingCache is based on BindingCache, and provide some more advanced features, like +// fuzzy matching, loading binding if cache miss automatically (TODO). +type FuzzyBindingCache interface { + // FuzzyMatchingBinding supports fuzzy matching on bindings. + FuzzyMatchingBinding(sctx sessionctx.Context, fuzzyDigest string, tableNames []*ast.TableName) (bindings Binding, isMatched bool) + + // Copy copies this cache. + Copy() (c FuzzyBindingCache, err error) + + BindingCache +} + +type fuzzyBindingCache struct { + BindingCache + + mu sync.RWMutex + + // fuzzy2SQLDigests is used to support fuzzy matching. + // fuzzyDigest is the digest calculated after eliminating all DB names, e.g. `select * from test.t` -> `select * from t` -> fuzzyDigest. + // sqlDigest is the digest where all DB names are kept, e.g. `select * from test.t` -> exactDigest. + fuzzy2SQLDigests map[string][]string // fuzzyDigest --> sqlDigests + + sql2FuzzyDigest map[string]string // sqlDigest --> fuzzyDigest +} + +func newFuzzyBindingCache() FuzzyBindingCache { + return &fuzzyBindingCache{ + BindingCache: newBindCache(), + fuzzy2SQLDigests: make(map[string][]string), + sql2FuzzyDigest: make(map[string]string), + } +} + +func (fbc *fuzzyBindingCache) FuzzyMatchingBinding(sctx sessionctx.Context, fuzzyDigest string, tableNames []*ast.TableName) (matchedBinding Binding, isMatched bool) { + fbc.mu.RLock() + defer fbc.mu.RUnlock() + bindingCache := fbc.BindingCache + if bindingCache.Size() == 0 { + return + } + + leastWildcards := len(tableNames) + 1 + enableFuzzyBinding := sctx.GetSessionVars().EnableFuzzyBinding + for _, sqlDigest := range fbc.fuzzy2SQLDigests[fuzzyDigest] { + if bindings := bindingCache.GetBinding(sqlDigest); bindings != nil { + for _, binding := range bindings { + numWildcards, matched := fuzzyMatchBindingTableName(sctx.GetSessionVars().CurrentDB, tableNames, binding.TableNames) + if matched && numWildcards > 0 && sctx != nil && !enableFuzzyBinding { + continue // fuzzy binding is disabled, skip this binding + } + if matched && numWildcards < leastWildcards { + matchedBinding = binding + isMatched = true + leastWildcards = numWildcards + break + } + } + } + } + return +} + +func (fbc *fuzzyBindingCache) SetBinding(sqlDigest string, bindings Bindings) (err error) { + fbc.mu.Lock() + defer fbc.mu.Unlock() + + // prepare fuzzy digests for all bindings + fuzzyDigests := make([]string, 0, len(bindings)) + p := parser.New() + for _, binding := range bindings { + stmt, err := p.ParseOneStmt(binding.BindSQL, binding.Charset, binding.Collation) + if err != nil { + return err + } + _, fuzzyDigest := norm.NormalizeStmtForBinding(stmt, norm.WithFuzz(true)) + fuzzyDigests = append(fuzzyDigests, fuzzyDigest) + } + + for i, binding := range bindings { + fbc.fuzzy2SQLDigests[fuzzyDigests[i]] = append(fbc.fuzzy2SQLDigests[fuzzyDigests[i]], binding.SQLDigest) + fbc.sql2FuzzyDigest[binding.SQLDigest] = fuzzyDigests[i] + } + // NOTE: due to LRU eviction, the underlying BindingCache state might be inconsistent with fuzzy2SQLDigests and + // sql2FuzzyDigest, but it's acceptable, just return cache-miss in that case. + return fbc.BindingCache.SetBinding(sqlDigest, bindings) +} + +func (fbc *fuzzyBindingCache) RemoveBinding(sqlDigest string) { + fbc.mu.Lock() + defer fbc.mu.Unlock() + fuzzyDigest, ok := fbc.sql2FuzzyDigest[sqlDigest] + if !ok { + return + } + digestList := fbc.fuzzy2SQLDigests[fuzzyDigest] + for i := range digestList { // remove sqlDigest from this list + if digestList[i] == sqlDigest { + digestList = append(digestList[:i], digestList[i+1:]...) + break + } + } + fbc.fuzzy2SQLDigests[fuzzyDigest] = digestList + delete(fbc.sql2FuzzyDigest, sqlDigest) + fbc.BindingCache.RemoveBinding(sqlDigest) +} + +func (fbc *fuzzyBindingCache) Copy() (c FuzzyBindingCache, err error) { + fbc.mu.RLock() + defer fbc.mu.RUnlock() + bc, err := fbc.BindingCache.CopyBindingCache() + if err != nil { + return nil, err + } + sql2FuzzyDigest := make(map[string]string, len(fbc.sql2FuzzyDigest)) + for k, v := range fbc.sql2FuzzyDigest { + sql2FuzzyDigest[k] = v + } + fuzzy2SQLDigests := make(map[string][]string, len(fbc.fuzzy2SQLDigests)) + for k, list := range fbc.fuzzy2SQLDigests { + newList := make([]string, len(list)) + copy(newList, list) + fuzzy2SQLDigests[k] = newList + } + return &fuzzyBindingCache{ + BindingCache: bc, + fuzzy2SQLDigests: fuzzy2SQLDigests, + sql2FuzzyDigest: sql2FuzzyDigest, + }, nil +} + // BindingCache is the interface for the cache of the SQL plan bindings. type BindingCache interface { // GetBinding gets the binding for the specified sqlDigest. @@ -41,8 +175,8 @@ type BindingCache interface { GetMemUsage() int64 // GetMemCapacity gets the memory capacity of the cache. GetMemCapacity() int64 - // Copy copies the cache. - Copy() (newCache BindingCache, err error) + // CopyBindingCache copies the cache. + CopyBindingCache() (newCache BindingCache, err error) // Size returns the number of items in the cache. Size() int } @@ -201,9 +335,9 @@ func (c *bindingCache) GetMemCapacity() int64 { return c.memCapacity } -// Copy copies a new bindingCache from the origin cache. +// CopyBindingCache copies a new bindingCache from the origin cache. // The function is thread-safe. -func (c *bindingCache) Copy() (BindingCache, error) { +func (c *bindingCache) CopyBindingCache() (BindingCache, error) { c.lock.Lock() defer c.lock.Unlock() var err error diff --git a/pkg/bindinfo/binding_cache_test.go b/pkg/bindinfo/binding_cache_test.go index e398685a97d5c..28b733bba5dce 100644 --- a/pkg/bindinfo/binding_cache_test.go +++ b/pkg/bindinfo/binding_cache_test.go @@ -19,11 +19,68 @@ import ( "strings" "testing" + "github.com/pingcap/tidb/pkg/bindinfo/norm" + "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/sessionctx/variable" "github.com/pingcap/tidb/pkg/util/hack" "github.com/stretchr/testify/require" ) +func bindingFuzzyDigest(t *testing.T, b Binding) string { + p := parser.New() + stmt, err := p.ParseOneStmt(b.BindSQL, b.Charset, b.Collation) + require.NoError(t, err) + _, fuzzyDigest := norm.NormalizeStmtForBinding(stmt, norm.WithFuzz(true)) + return fuzzyDigest +} + +func TestFuzzyBindingCache(t *testing.T) { + fbc := newFuzzyBindingCache().(*fuzzyBindingCache) + b1 := Binding{BindSQL: "SELECT * FROM db1.t1", SQLDigest: "b1"} + fDigest1 := bindingFuzzyDigest(t, b1) + b2 := Binding{BindSQL: "SELECT * FROM db2.t1", SQLDigest: "b2"} + b3 := Binding{BindSQL: "SELECT * FROM db2.t3", SQLDigest: "b3"} + fDigest3 := bindingFuzzyDigest(t, b3) + + // add 3 bindings and b1 and b2 have the same fuzzy digest + require.NoError(t, fbc.SetBinding(b1.SQLDigest, []Binding{b1})) + require.NoError(t, fbc.SetBinding(b2.SQLDigest, []Binding{b2})) + require.NoError(t, fbc.SetBinding(b3.SQLDigest, []Binding{b3})) + require.Equal(t, len(fbc.fuzzy2SQLDigests), 2) // b1 and b2 have the same fuzzy digest + require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest1]), 2) + require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest3]), 1) + require.Equal(t, len(fbc.sql2FuzzyDigest), 3) + _, ok := fbc.sql2FuzzyDigest[b1.SQLDigest] + require.True(t, ok) + _, ok = fbc.sql2FuzzyDigest[b2.SQLDigest] + require.True(t, ok) + _, ok = fbc.sql2FuzzyDigest[b3.SQLDigest] + require.True(t, ok) + + // remove b2 + fbc.RemoveBinding(b2.SQLDigest) + require.Equal(t, len(fbc.fuzzy2SQLDigests), 2) + require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest1]), 1) + require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest3]), 1) + require.Equal(t, len(fbc.sql2FuzzyDigest), 2) + _, ok = fbc.sql2FuzzyDigest[b1.SQLDigest] + require.True(t, ok) + _, ok = fbc.sql2FuzzyDigest[b2.SQLDigest] + require.False(t, ok) // can't find b2 now + _, ok = fbc.sql2FuzzyDigest[b3.SQLDigest] + require.True(t, ok) + + // test deep copy + newCache, err := fbc.Copy() + require.NoError(t, err) + newFBC := newCache.(*fuzzyBindingCache) + newFBC.fuzzy2SQLDigests[fDigest1] = nil + delete(newFBC.sql2FuzzyDigest, b1.SQLDigest) + require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest1]), 1) // no impact to the original cache + _, ok = fbc.sql2FuzzyDigest[b1.SQLDigest] + require.True(t, ok) +} + func TestBindCache(t *testing.T) { variable.MemQuotaBindingCache.Store(250) bindCache := newBindCache().(*bindingCache) diff --git a/pkg/bindinfo/global_handle.go b/pkg/bindinfo/global_handle.go index 0f63bbb7b9e17..bd00ea5d2498a 100644 --- a/pkg/bindinfo/global_handle.go +++ b/pkg/bindinfo/global_handle.go @@ -23,7 +23,6 @@ import ( "time" "github.com/pingcap/errors" - "github.com/pingcap/tidb/pkg/bindinfo/norm" "github.com/pingcap/tidb/pkg/metrics" "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/ast" @@ -110,12 +109,7 @@ type GlobalBindingHandle interface { type globalBindingHandle struct { sPool SessionPool - bindingCache atomic.Value - - // fuzzyDigestMap is used to support fuzzy matching. - // fuzzyDigest is the digest calculated after eliminating all DB names, e.g. `select * from test.t` -> `select * from t` -> fuzzyDigest. - // exactDigest is the digest where all DB names are kept, e.g. `select * from test.t` -> exactDigest. - fuzzyDigestMap atomic.Value // map[string][]string fuzzyDigest --> exactDigests + fuzzyBindingCache atomic.Value // lastTaskTime records the last update time for the global sql bind cache. // This value is used to avoid reload duplicated bindings from storage. @@ -156,44 +150,20 @@ func NewGlobalBindingHandle(sPool SessionPool) GlobalBindingHandle { return handle } -func (h *globalBindingHandle) getCache() BindingCache { - return h.bindingCache.Load().(BindingCache) +func (h *globalBindingHandle) getCache() FuzzyBindingCache { + return h.fuzzyBindingCache.Load().(FuzzyBindingCache) } -func (h *globalBindingHandle) setCache(c BindingCache) { +func (h *globalBindingHandle) setCache(c FuzzyBindingCache) { // TODO: update the global cache in-place instead of replacing it and remove this function. - h.bindingCache.Store(c) -} - -func (h *globalBindingHandle) getFuzzyDigestMap() map[string][]string { - return h.fuzzyDigestMap.Load().(map[string][]string) -} - -func (h *globalBindingHandle) setFuzzyDigestMap(m map[string][]string) { - h.fuzzyDigestMap.Store(m) -} - -func buildFuzzyDigestMap(bindings Bindings) map[string][]string { - m := make(map[string][]string) - p := parser.New() - for _, binding := range bindings { - stmt, err := p.ParseOneStmt(binding.BindSQL, binding.Charset, binding.Collation) - if err != nil { - logutil.BgLogger().Warn("parse bindSQL failed", zap.String("bindSQL", binding.BindSQL), zap.Error(err)) - p = parser.New() - continue - } - _, fuzzyDigest := norm.NormalizeStmtForBinding(stmt, norm.WithFuzz(true)) - m[fuzzyDigest] = append(m[fuzzyDigest], binding.SQLDigest) - } - return m + h.fuzzyBindingCache.Store(c) } // Reset is to reset the BindHandle and clean old info. func (h *globalBindingHandle) Reset() { h.lastUpdateTime.Store(types.ZeroTimestamp) h.invalidBindings = newInvalidBindingCache() - h.setCache(newBindCache()) + h.setCache(newFuzzyBindingCache()) variable.RegisterStatistics(h) } @@ -209,11 +179,11 @@ func (h *globalBindingHandle) setLastUpdateTime(t types.Time) { func (h *globalBindingHandle) LoadFromStorageToCache(fullLoad bool) (err error) { var lastUpdateTime types.Time var timeCondition string - var newCache BindingCache + var newCache FuzzyBindingCache if fullLoad { lastUpdateTime = types.ZeroTimestamp timeCondition = "" - newCache = newBindCache() + newCache = newFuzzyBindingCache() } else { lastUpdateTime = h.getLastUpdateTime() timeCondition = fmt.Sprintf("WHERE update_time>'%s'", lastUpdateTime.String()) @@ -235,8 +205,7 @@ func (h *globalBindingHandle) LoadFromStorageToCache(fullLoad bool) (err error) defer func() { h.setLastUpdateTime(lastUpdateTime) - h.setCache(newCache) // TODO: update it in place - h.setFuzzyDigestMap(buildFuzzyDigestMap(newCache.GetAllBindings())) + h.setCache(newCache) }() for _, row := range rows { @@ -493,35 +462,7 @@ func (h *globalBindingHandle) Size() int { // MatchGlobalBinding returns the matched binding for this statement. func (h *globalBindingHandle) MatchGlobalBinding(sctx sessionctx.Context, fuzzyDigest string, tableNames []*ast.TableName) (matchedBinding Binding, isMatched bool) { - bindingCache := h.getCache() - if bindingCache.Size() == 0 { - return - } - fuzzyDigestMap := h.getFuzzyDigestMap() - if len(fuzzyDigestMap) == 0 { - return - } - - leastWildcards := len(tableNames) + 1 - enableFuzzyBinding := sctx.GetSessionVars().EnableFuzzyBinding - for _, exactDigest := range fuzzyDigestMap[fuzzyDigest] { - sqlDigest := exactDigest - if bindings := bindingCache.GetBinding(sqlDigest); bindings != nil { - for _, binding := range bindings { - numWildcards, matched := fuzzyMatchBindingTableName(sctx.GetSessionVars().CurrentDB, tableNames, binding.TableNames) - if matched && numWildcards > 0 && sctx != nil && !enableFuzzyBinding { - continue // fuzzy binding is disabled, skip this binding - } - if matched && numWildcards < leastWildcards { - matchedBinding = binding - isMatched = true - leastWildcards = numWildcards - break - } - } - } - } - return + return h.getCache().FuzzyMatchingBinding(sctx, fuzzyDigest, tableNames) } // GetAllGlobalBindings returns all bind records in cache. @@ -696,7 +637,7 @@ func (*paramMarkerChecker) Leave(in ast.Node) (ast.Node, bool) { // Clear resets the bind handle. It is only used for test. func (h *globalBindingHandle) Clear() { - h.setCache(newBindCache()) + h.setCache(newFuzzyBindingCache()) h.setLastUpdateTime(types.ZeroTimestamp) h.invalidBindings.reset() }