Skip to content

Commit

Permalink
planner: introduce a new fuzzy binding cache (#51401)
Browse files Browse the repository at this point in the history
ref #51347
  • Loading branch information
qw4990 authored Feb 29, 2024
1 parent 38ab23b commit 1fc91d4
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 74 deletions.
142 changes: 138 additions & 4 deletions pkg/bindinfo/binding_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,147 @@ import (
"errors"
"sync"

"github.com/pingcap/tidb/pkg/bindinfo/norm"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/util/hack"
"github.com/pingcap/tidb/pkg/util/kvcache"
"github.com/pingcap/tidb/pkg/util/mathutil"
"github.com/pingcap/tidb/pkg/util/memory"
)

// FuzzyBindingCache is based on BindingCache, and provide some more advanced features, like
// fuzzy matching, loading binding if cache miss automatically (TODO).
type FuzzyBindingCache interface {
// FuzzyMatchingBinding supports fuzzy matching on bindings.
FuzzyMatchingBinding(sctx sessionctx.Context, fuzzyDigest string, tableNames []*ast.TableName) (bindings Binding, isMatched bool)

// Copy copies this cache.
Copy() (c FuzzyBindingCache, err error)

BindingCache
}

type fuzzyBindingCache struct {
BindingCache

mu sync.RWMutex

// fuzzy2SQLDigests is used to support fuzzy matching.
// fuzzyDigest is the digest calculated after eliminating all DB names, e.g. `select * from test.t` -> `select * from t` -> fuzzyDigest.
// sqlDigest is the digest where all DB names are kept, e.g. `select * from test.t` -> exactDigest.
fuzzy2SQLDigests map[string][]string // fuzzyDigest --> sqlDigests

sql2FuzzyDigest map[string]string // sqlDigest --> fuzzyDigest
}

func newFuzzyBindingCache() FuzzyBindingCache {
return &fuzzyBindingCache{
BindingCache: newBindCache(),
fuzzy2SQLDigests: make(map[string][]string),
sql2FuzzyDigest: make(map[string]string),
}
}

func (fbc *fuzzyBindingCache) FuzzyMatchingBinding(sctx sessionctx.Context, fuzzyDigest string, tableNames []*ast.TableName) (matchedBinding Binding, isMatched bool) {
fbc.mu.RLock()
defer fbc.mu.RUnlock()
bindingCache := fbc.BindingCache
if bindingCache.Size() == 0 {
return
}

leastWildcards := len(tableNames) + 1
enableFuzzyBinding := sctx.GetSessionVars().EnableFuzzyBinding
for _, sqlDigest := range fbc.fuzzy2SQLDigests[fuzzyDigest] {
if bindings := bindingCache.GetBinding(sqlDigest); bindings != nil {
for _, binding := range bindings {
numWildcards, matched := fuzzyMatchBindingTableName(sctx.GetSessionVars().CurrentDB, tableNames, binding.TableNames)
if matched && numWildcards > 0 && sctx != nil && !enableFuzzyBinding {
continue // fuzzy binding is disabled, skip this binding
}
if matched && numWildcards < leastWildcards {
matchedBinding = binding
isMatched = true
leastWildcards = numWildcards
break
}
}
}
}
return
}

func (fbc *fuzzyBindingCache) SetBinding(sqlDigest string, bindings Bindings) (err error) {
fbc.mu.Lock()
defer fbc.mu.Unlock()

// prepare fuzzy digests for all bindings
fuzzyDigests := make([]string, 0, len(bindings))
p := parser.New()
for _, binding := range bindings {
stmt, err := p.ParseOneStmt(binding.BindSQL, binding.Charset, binding.Collation)
if err != nil {
return err
}
_, fuzzyDigest := norm.NormalizeStmtForBinding(stmt, norm.WithFuzz(true))
fuzzyDigests = append(fuzzyDigests, fuzzyDigest)
}

for i, binding := range bindings {
fbc.fuzzy2SQLDigests[fuzzyDigests[i]] = append(fbc.fuzzy2SQLDigests[fuzzyDigests[i]], binding.SQLDigest)
fbc.sql2FuzzyDigest[binding.SQLDigest] = fuzzyDigests[i]
}
// NOTE: due to LRU eviction, the underlying BindingCache state might be inconsistent with fuzzy2SQLDigests and
// sql2FuzzyDigest, but it's acceptable, just return cache-miss in that case.
return fbc.BindingCache.SetBinding(sqlDigest, bindings)
}

func (fbc *fuzzyBindingCache) RemoveBinding(sqlDigest string) {
fbc.mu.Lock()
defer fbc.mu.Unlock()
fuzzyDigest, ok := fbc.sql2FuzzyDigest[sqlDigest]
if !ok {
return
}
digestList := fbc.fuzzy2SQLDigests[fuzzyDigest]
for i := range digestList { // remove sqlDigest from this list
if digestList[i] == sqlDigest {
digestList = append(digestList[:i], digestList[i+1:]...)
break
}
}
fbc.fuzzy2SQLDigests[fuzzyDigest] = digestList
delete(fbc.sql2FuzzyDigest, sqlDigest)
fbc.BindingCache.RemoveBinding(sqlDigest)
}

func (fbc *fuzzyBindingCache) Copy() (c FuzzyBindingCache, err error) {
fbc.mu.RLock()
defer fbc.mu.RUnlock()
bc, err := fbc.BindingCache.CopyBindingCache()
if err != nil {
return nil, err
}
sql2FuzzyDigest := make(map[string]string, len(fbc.sql2FuzzyDigest))
for k, v := range fbc.sql2FuzzyDigest {
sql2FuzzyDigest[k] = v
}
fuzzy2SQLDigests := make(map[string][]string, len(fbc.fuzzy2SQLDigests))
for k, list := range fbc.fuzzy2SQLDigests {
newList := make([]string, len(list))
copy(newList, list)
fuzzy2SQLDigests[k] = newList
}
return &fuzzyBindingCache{
BindingCache: bc,
fuzzy2SQLDigests: fuzzy2SQLDigests,
sql2FuzzyDigest: sql2FuzzyDigest,
}, nil
}

// BindingCache is the interface for the cache of the SQL plan bindings.
type BindingCache interface {
// GetBinding gets the binding for the specified sqlDigest.
Expand All @@ -41,8 +175,8 @@ type BindingCache interface {
GetMemUsage() int64
// GetMemCapacity gets the memory capacity of the cache.
GetMemCapacity() int64
// Copy copies the cache.
Copy() (newCache BindingCache, err error)
// CopyBindingCache copies the cache.
CopyBindingCache() (newCache BindingCache, err error)
// Size returns the number of items in the cache.
Size() int
}
Expand Down Expand Up @@ -201,9 +335,9 @@ func (c *bindingCache) GetMemCapacity() int64 {
return c.memCapacity
}

// Copy copies a new bindingCache from the origin cache.
// CopyBindingCache copies a new bindingCache from the origin cache.
// The function is thread-safe.
func (c *bindingCache) Copy() (BindingCache, error) {
func (c *bindingCache) CopyBindingCache() (BindingCache, error) {
c.lock.Lock()
defer c.lock.Unlock()
var err error
Expand Down
57 changes: 57 additions & 0 deletions pkg/bindinfo/binding_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,68 @@ import (
"strings"
"testing"

"github.com/pingcap/tidb/pkg/bindinfo/norm"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/sessionctx/variable"
"github.com/pingcap/tidb/pkg/util/hack"
"github.com/stretchr/testify/require"
)

func bindingFuzzyDigest(t *testing.T, b Binding) string {
p := parser.New()
stmt, err := p.ParseOneStmt(b.BindSQL, b.Charset, b.Collation)
require.NoError(t, err)
_, fuzzyDigest := norm.NormalizeStmtForBinding(stmt, norm.WithFuzz(true))
return fuzzyDigest
}

func TestFuzzyBindingCache(t *testing.T) {
fbc := newFuzzyBindingCache().(*fuzzyBindingCache)
b1 := Binding{BindSQL: "SELECT * FROM db1.t1", SQLDigest: "b1"}
fDigest1 := bindingFuzzyDigest(t, b1)
b2 := Binding{BindSQL: "SELECT * FROM db2.t1", SQLDigest: "b2"}
b3 := Binding{BindSQL: "SELECT * FROM db2.t3", SQLDigest: "b3"}
fDigest3 := bindingFuzzyDigest(t, b3)

// add 3 bindings and b1 and b2 have the same fuzzy digest
require.NoError(t, fbc.SetBinding(b1.SQLDigest, []Binding{b1}))
require.NoError(t, fbc.SetBinding(b2.SQLDigest, []Binding{b2}))
require.NoError(t, fbc.SetBinding(b3.SQLDigest, []Binding{b3}))
require.Equal(t, len(fbc.fuzzy2SQLDigests), 2) // b1 and b2 have the same fuzzy digest
require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest1]), 2)
require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest3]), 1)
require.Equal(t, len(fbc.sql2FuzzyDigest), 3)
_, ok := fbc.sql2FuzzyDigest[b1.SQLDigest]
require.True(t, ok)
_, ok = fbc.sql2FuzzyDigest[b2.SQLDigest]
require.True(t, ok)
_, ok = fbc.sql2FuzzyDigest[b3.SQLDigest]
require.True(t, ok)

// remove b2
fbc.RemoveBinding(b2.SQLDigest)
require.Equal(t, len(fbc.fuzzy2SQLDigests), 2)
require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest1]), 1)
require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest3]), 1)
require.Equal(t, len(fbc.sql2FuzzyDigest), 2)
_, ok = fbc.sql2FuzzyDigest[b1.SQLDigest]
require.True(t, ok)
_, ok = fbc.sql2FuzzyDigest[b2.SQLDigest]
require.False(t, ok) // can't find b2 now
_, ok = fbc.sql2FuzzyDigest[b3.SQLDigest]
require.True(t, ok)

// test deep copy
newCache, err := fbc.Copy()
require.NoError(t, err)
newFBC := newCache.(*fuzzyBindingCache)
newFBC.fuzzy2SQLDigests[fDigest1] = nil
delete(newFBC.sql2FuzzyDigest, b1.SQLDigest)
require.Equal(t, len(fbc.fuzzy2SQLDigests[fDigest1]), 1) // no impact to the original cache
_, ok = fbc.sql2FuzzyDigest[b1.SQLDigest]
require.True(t, ok)
}

func TestBindCache(t *testing.T) {
variable.MemQuotaBindingCache.Store(250)
bindCache := newBindCache().(*bindingCache)
Expand Down
81 changes: 11 additions & 70 deletions pkg/bindinfo/global_handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"time"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/bindinfo/norm"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
Expand Down Expand Up @@ -110,12 +109,7 @@ type GlobalBindingHandle interface {
type globalBindingHandle struct {
sPool SessionPool

bindingCache atomic.Value

// fuzzyDigestMap is used to support fuzzy matching.
// fuzzyDigest is the digest calculated after eliminating all DB names, e.g. `select * from test.t` -> `select * from t` -> fuzzyDigest.
// exactDigest is the digest where all DB names are kept, e.g. `select * from test.t` -> exactDigest.
fuzzyDigestMap atomic.Value // map[string][]string fuzzyDigest --> exactDigests
fuzzyBindingCache atomic.Value

// lastTaskTime records the last update time for the global sql bind cache.
// This value is used to avoid reload duplicated bindings from storage.
Expand Down Expand Up @@ -156,44 +150,20 @@ func NewGlobalBindingHandle(sPool SessionPool) GlobalBindingHandle {
return handle
}

func (h *globalBindingHandle) getCache() BindingCache {
return h.bindingCache.Load().(BindingCache)
func (h *globalBindingHandle) getCache() FuzzyBindingCache {
return h.fuzzyBindingCache.Load().(FuzzyBindingCache)
}

func (h *globalBindingHandle) setCache(c BindingCache) {
func (h *globalBindingHandle) setCache(c FuzzyBindingCache) {
// TODO: update the global cache in-place instead of replacing it and remove this function.
h.bindingCache.Store(c)
}

func (h *globalBindingHandle) getFuzzyDigestMap() map[string][]string {
return h.fuzzyDigestMap.Load().(map[string][]string)
}

func (h *globalBindingHandle) setFuzzyDigestMap(m map[string][]string) {
h.fuzzyDigestMap.Store(m)
}

func buildFuzzyDigestMap(bindings Bindings) map[string][]string {
m := make(map[string][]string)
p := parser.New()
for _, binding := range bindings {
stmt, err := p.ParseOneStmt(binding.BindSQL, binding.Charset, binding.Collation)
if err != nil {
logutil.BgLogger().Warn("parse bindSQL failed", zap.String("bindSQL", binding.BindSQL), zap.Error(err))
p = parser.New()
continue
}
_, fuzzyDigest := norm.NormalizeStmtForBinding(stmt, norm.WithFuzz(true))
m[fuzzyDigest] = append(m[fuzzyDigest], binding.SQLDigest)
}
return m
h.fuzzyBindingCache.Store(c)
}

// Reset is to reset the BindHandle and clean old info.
func (h *globalBindingHandle) Reset() {
h.lastUpdateTime.Store(types.ZeroTimestamp)
h.invalidBindings = newInvalidBindingCache()
h.setCache(newBindCache())
h.setCache(newFuzzyBindingCache())
variable.RegisterStatistics(h)
}

Expand All @@ -209,11 +179,11 @@ func (h *globalBindingHandle) setLastUpdateTime(t types.Time) {
func (h *globalBindingHandle) LoadFromStorageToCache(fullLoad bool) (err error) {
var lastUpdateTime types.Time
var timeCondition string
var newCache BindingCache
var newCache FuzzyBindingCache
if fullLoad {
lastUpdateTime = types.ZeroTimestamp
timeCondition = ""
newCache = newBindCache()
newCache = newFuzzyBindingCache()
} else {
lastUpdateTime = h.getLastUpdateTime()
timeCondition = fmt.Sprintf("WHERE update_time>'%s'", lastUpdateTime.String())
Expand All @@ -235,8 +205,7 @@ func (h *globalBindingHandle) LoadFromStorageToCache(fullLoad bool) (err error)

defer func() {
h.setLastUpdateTime(lastUpdateTime)
h.setCache(newCache) // TODO: update it in place
h.setFuzzyDigestMap(buildFuzzyDigestMap(newCache.GetAllBindings()))
h.setCache(newCache)
}()

for _, row := range rows {
Expand Down Expand Up @@ -493,35 +462,7 @@ func (h *globalBindingHandle) Size() int {

// MatchGlobalBinding returns the matched binding for this statement.
func (h *globalBindingHandle) MatchGlobalBinding(sctx sessionctx.Context, fuzzyDigest string, tableNames []*ast.TableName) (matchedBinding Binding, isMatched bool) {
bindingCache := h.getCache()
if bindingCache.Size() == 0 {
return
}
fuzzyDigestMap := h.getFuzzyDigestMap()
if len(fuzzyDigestMap) == 0 {
return
}

leastWildcards := len(tableNames) + 1
enableFuzzyBinding := sctx.GetSessionVars().EnableFuzzyBinding
for _, exactDigest := range fuzzyDigestMap[fuzzyDigest] {
sqlDigest := exactDigest
if bindings := bindingCache.GetBinding(sqlDigest); bindings != nil {
for _, binding := range bindings {
numWildcards, matched := fuzzyMatchBindingTableName(sctx.GetSessionVars().CurrentDB, tableNames, binding.TableNames)
if matched && numWildcards > 0 && sctx != nil && !enableFuzzyBinding {
continue // fuzzy binding is disabled, skip this binding
}
if matched && numWildcards < leastWildcards {
matchedBinding = binding
isMatched = true
leastWildcards = numWildcards
break
}
}
}
}
return
return h.getCache().FuzzyMatchingBinding(sctx, fuzzyDigest, tableNames)
}

// GetAllGlobalBindings returns all bind records in cache.
Expand Down Expand Up @@ -696,7 +637,7 @@ func (*paramMarkerChecker) Leave(in ast.Node) (ast.Node, bool) {

// Clear resets the bind handle. It is only used for test.
func (h *globalBindingHandle) Clear() {
h.setCache(newBindCache())
h.setCache(newFuzzyBindingCache())
h.setLastUpdateTime(types.ZeroTimestamp)
h.invalidBindings.reset()
}
Expand Down

0 comments on commit 1fc91d4

Please sign in to comment.