Skip to content

Commit

Permalink
infoschema: fix issue of information schema cache miss cause by schem…
Browse files Browse the repository at this point in the history
…a version gap (#53445) (#53585)

close #53428
  • Loading branch information
ti-chi-bot authored May 28, 2024
1 parent 61e5fa0 commit a6ebfa3
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 7 deletions.
2 changes: 2 additions & 0 deletions domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ func (do *Domain) tryLoadSchemaDiffs(m *meta.Meta, usedVersion, newVersion int64
if diff == nil {
// Empty diff means the txn of generating schema version is committed, but the txn of `runDDLJob` is not or fail.
// It is safe to skip the empty diff because the infoschema is new enough and consistent.
logutil.BgLogger().Info("diff load InfoSchema get empty schema diff", zap.Int64("version", usedVersion))
do.infoCache.InsertEmptySchemaVersion(usedVersion)
continue
}
diffs = append(diffs, diff)
Expand Down
82 changes: 75 additions & 7 deletions infoschema/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ type InfoCache struct {
mu sync.RWMutex
// cache is sorted by both SchemaVersion and timestamp in descending order, assume they have same order
cache []schemaAndTimestamp

// emptySchemaVersions stores schema version which has no schema_diff.
emptySchemaVersions map[int64]struct{}
}

type schemaAndTimestamp struct {
Expand All @@ -40,10 +43,18 @@ type schemaAndTimestamp struct {
// NewCache creates a new InfoCache.
func NewCache(capacity int) *InfoCache {
return &InfoCache{
cache: make([]schemaAndTimestamp, 0, capacity),
cache: make([]schemaAndTimestamp, 0, capacity),
emptySchemaVersions: make(map[int64]struct{}),
}
}

// Size returns the size of the cache, export for test.
func (h *InfoCache) Size() int {
h.mu.Lock()
defer h.mu.Unlock()
return len(h.cache)
}

// Reset resets the cache.
func (h *InfoCache) Reset(capacity int) {
h.mu.Lock()
Expand All @@ -63,22 +74,57 @@ func (h *InfoCache) GetLatest() InfoSchema {
return nil
}

// GetEmptySchemaVersions returns emptySchemaVersions, exports for testing.
func (h *InfoCache) GetEmptySchemaVersions() map[int64]struct{} {
return h.emptySchemaVersions
}

func (h *InfoCache) getSchemaByTimestampNoLock(ts uint64) (InfoSchema, bool) {
logutil.BgLogger().Debug("SCHEMA CACHE get schema", zap.Uint64("timestamp", ts))
// search one by one instead of binary search, because the timestamp of a schema could be 0
// this is ok because the size of h.cache is small (currently set to 16)
// moreover, the most likely hit element in the array is the first one in steady mode
// thus it may have better performance than binary search
for i, is := range h.cache {
if is.timestamp == 0 || (i > 0 && h.cache[i-1].infoschema.SchemaMetaVersion() != is.infoschema.SchemaMetaVersion()+1) {
// the schema version doesn't have a timestamp or there is a gap in the schema cache
// ignore all the schema cache equals or less than this version in search by timestamp
break
if is.timestamp == 0 || ts < uint64(is.timestamp) {
// is.timestamp == 0 means the schema ts is unknown, so we can't use it, then just skip it.
// ts < is.timestamp means the schema is newer than ts, so we can't use it too, just skip it to find the older one.
continue
}
if ts >= uint64(is.timestamp) {
// found the largest version before the given ts
// ts >= is.timestamp must be true after the above condition.
if i == 0 {
// the first element is the latest schema, so we can return it directly.
return is.infoschema, true
}

if uint64(h.cache[i-1].timestamp) > ts {
// The first condition is to make sure the cache[i-1].timestamp > ts >= cache[i].timestamp, then the current schema is suitable for ts.
lastVersion := h.cache[i-1].infoschema.SchemaMetaVersion()
currentVersion := is.infoschema.SchemaMetaVersion()
if lastVersion == currentVersion+1 {
// This condition is to make sure the schema version is continuous. If last(cache[i-1]) schema-version is 10,
// but current(cache[i]) schema-version is not 9, then current schema may not suitable for ts.
return is.infoschema, true
}
if lastVersion > currentVersion {
found := true
for ver := currentVersion + 1; ver < lastVersion; ver++ {
_, ok := h.emptySchemaVersions[ver]
if !ok {
found = false
break
}
}
if found {
// This condition is to make sure the schema version is continuous. If last(cache[i-1]) schema-version is 10, and
// current(cache[i]) schema-version is 8, then there is a gap exist, and if all the gap version can be found in cache.emptySchemaVersions
// which means those gap versions don't have schema info, then current schema is also suitable for ts.
return is.infoschema, true
}
}
}
// current schema is not suitable for ts, then break the loop to avoid the unnecessary search.
break
}

logutil.BgLogger().Debug("SCHEMA CACHE no schema found")
Expand Down Expand Up @@ -185,3 +231,25 @@ func (h *InfoCache) Insert(is InfoSchema, schemaTS uint64) bool {

return true
}

// InsertEmptySchemaVersion inserts empty schema version into a map. If exceeded the cache capacity, remove the oldest version.
func (h *InfoCache) InsertEmptySchemaVersion(version int64) {
h.mu.Lock()
defer h.mu.Unlock()

h.emptySchemaVersions[version] = struct{}{}
if len(h.emptySchemaVersions) > cap(h.cache) {
// remove oldest version.
versions := make([]int64, 0, len(h.emptySchemaVersions))
for ver := range h.emptySchemaVersions {
versions = append(versions, ver)
}
sort.Slice(versions, func(i, j int) bool { return versions[i] < versions[j] })
for _, ver := range versions {
delete(h.emptySchemaVersions, ver)
if len(h.emptySchemaVersions) <= cap(h.cache) {
break
}
}
}
}
127 changes: 127 additions & 0 deletions infoschema/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package infoschema_test

import (
"fmt"
"testing"

"github.com/pingcap/tidb/infoschema"
Expand Down Expand Up @@ -165,3 +166,129 @@ func TestGetByTimestamp(t *testing.T) {
require.Equal(t, is3, ic.GetBySnapshotTS(3))
require.Equal(t, is3, ic.GetBySnapshotTS(4))
}

func TestCacheWithSchemaTsZero(t *testing.T) {
ic := infoschema.NewCache(16)
require.NotNil(t, ic)

for i := 1; i <= 8; i++ {
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, int64(i)), uint64(i))
}

checkFn := func(start, end int64, exist bool) {
require.True(t, start <= end)
latestSchemaVersion := ic.GetLatest().SchemaMetaVersion()
for ts := start; ts <= end; ts++ {
is := ic.GetBySnapshotTS(uint64(ts))
if exist {
require.NotNil(t, is, fmt.Sprintf("ts %d", ts))
if ts > latestSchemaVersion {
require.Equal(t, latestSchemaVersion, is.SchemaMetaVersion(), fmt.Sprintf("ts %d", ts))
} else {
require.Equal(t, ts, is.SchemaMetaVersion(), fmt.Sprintf("ts %d", ts))
}
} else {
require.Nil(t, is, fmt.Sprintf("ts %d", ts))
}
}
}
checkFn(1, 8, true)
checkFn(8, 10, true)

// mock for meet error There is no Write MVCC info for the schema version
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, 9), 0)
checkFn(1, 7, true)
checkFn(8, 9, false)
checkFn(9, 10, false)

for i := 10; i <= 16; i++ {
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, int64(i)), uint64(i))
checkFn(1, 7, true)
checkFn(8, 9, false)
checkFn(10, 16, true)
}
require.Equal(t, 16, ic.Size())

// refill the cache
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, 9), 9)
checkFn(1, 16, true)
require.Equal(t, 16, ic.Size())

// Test more than capacity
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, 17), 17)
checkFn(1, 1, false)
checkFn(2, 17, true)
checkFn(2, 20, true)
require.Equal(t, 16, ic.Size())

// Test for there is a hole in the middle.
ic = infoschema.NewCache(16)

// mock for restart with full load the latest version schema.
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, 100), 100)
checkFn(1, 99, false)
checkFn(100, 100, true)

for i := 1; i <= 16; i++ {
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, int64(i)), uint64(i))
}
checkFn(1, 1, false)
checkFn(2, 15, true)
checkFn(16, 16, false)
checkFn(100, 100, true)
require.Equal(t, 16, ic.Size())

for i := 85; i < 100; i++ {
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, int64(i)), uint64(i))
}
checkFn(1, 84, false)
checkFn(85, 100, true)
require.Equal(t, 16, ic.Size())

// Test cache with schema version hole, which is cause by schema version doesn't has related schema-diff.
ic = infoschema.NewCache(16)
require.NotNil(t, ic)
for i := 1; i <= 8; i++ {
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, int64(i)), uint64(i))
}
checkFn(1, 10, true)
// mock for schema version hole, schema-version 9 is missing.
ic.Insert(infoschema.MockInfoSchemaWithSchemaVer(nil, 10), 10)
checkFn(1, 7, true)
// without empty schema version map, get snapshot by ts 8, 9 will both failed.
checkFn(8, 9, false)
checkFn(10, 10, true)
// add empty schema version 9.
ic.InsertEmptySchemaVersion(9)
// after set empty schema version, get snapshot by ts 8, 9 will both success.
checkFn(1, 8, true)
checkFn(10, 10, true)
is := ic.GetBySnapshotTS(uint64(9))
require.NotNil(t, is)
// since schema version 9 is empty, so get by ts 9 will get schema which version is 8.
require.Equal(t, int64(8), is.SchemaMetaVersion())
}

func TestCacheEmptySchemaVersion(t *testing.T) {
ic := infoschema.NewCache(16)
require.NotNil(t, ic)
require.Equal(t, 0, len(ic.GetEmptySchemaVersions()))
for i := 0; i < 16; i++ {
ic.InsertEmptySchemaVersion(int64(i))
}
emptyVersions := ic.GetEmptySchemaVersions()
require.Equal(t, 16, len(emptyVersions))
for i := 0; i < 16; i++ {
_, ok := emptyVersions[int64(i)]
require.True(t, ok)
}
for i := 16; i < 20; i++ {
ic.InsertEmptySchemaVersion(int64(i))
}
emptyVersions = ic.GetEmptySchemaVersions()
require.Equal(t, 16, len(emptyVersions))
for i := 4; i < 20; i++ {
_, ok := emptyVersions[int64(i)]
require.True(t, ok)
}
}

0 comments on commit a6ebfa3

Please sign in to comment.