Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: dynamically update the average column size #6170

Merged
merged 19 commits into from
Apr 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ func (e *SelectLockExec) Open(ctx context.Context) error {
txnCtx.ForUpdate = true
for id := range e.Schema().TblID2Handle {
// This operation is only for schema validator check.
txnCtx.UpdateDeltaForTable(id, 0, 0)
txnCtx.UpdateDeltaForTable(id, 0, 0, map[int64]int64{})
}
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion executor/show_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (e *ShowExec) fetchShowStatsHistogram() error {
statsTbl := h.GetTableStats(tbl)
if !statsTbl.Pseudo {
for _, col := range statsTbl.Columns {
e.histogramToRow(db.Name.O, tbl.Name.O, col.Info.Name.O, 0, col.Histogram, col.AvgColSize())
e.histogramToRow(db.Name.O, tbl.Name.O, col.Info.Name.O, 0, col.Histogram, col.AvgColSize(statsTbl.Count))
}
for _, idx := range statsTbl.Indices {
e.histogramToRow(db.Name.O, tbl.Name.O, idx.Info.Name.O, 1, idx.Histogram, 0)
Expand Down
19 changes: 16 additions & 3 deletions executor/write.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,14 @@ func updateRecord(ctx sessionctx.Context, h int64, oldData, newData []types.Datu
} else {
sc.AddAffectedRows(1)
}

ctx.GetSessionVars().TxnCtx.UpdateDeltaForTable(t.Meta().ID, 0, 1)
colSize := make(map[int64]int64)
for id, col := range t.Cols() {
val := int64(len(newData[id].GetBytes()) - len(oldData[id].GetBytes()))
if val != 0 {
colSize[col.ID] = val
}
}
ctx.GetSessionVars().TxnCtx.UpdateDeltaForTable(t.Meta().ID, 0, 1, colSize)
return true, handleChanged, newHandle, nil
}

Expand Down Expand Up @@ -392,7 +398,14 @@ func (e *DeleteExec) removeRow(ctx sessionctx.Context, t table.Table, h int64, d
}
ctx.StmtAddDirtyTableOP(DirtyTableDeleteRow, t.Meta().ID, h, nil)
ctx.GetSessionVars().StmtCtx.AddAffectedRows(1)
ctx.GetSessionVars().TxnCtx.UpdateDeltaForTable(t.Meta().ID, -1, 1)
colSize := make(map[int64]int64)
for id, col := range t.Cols() {
val := -int64(len(data[id].GetBytes()))
if val != 0 {
colSize[col.ID] = val
}
}
ctx.GetSessionVars().TxnCtx.UpdateDeltaForTable(t.Meta().ID, -1, 1, colSize)
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ func (s *session) doCommitWithRetry(ctx context.Context) error {
mapper := s.GetSessionVars().TxnCtx.TableDeltaMap
if s.statsCollector != nil && mapper != nil {
for id, item := range mapper {
s.statsCollector.Update(id, item.Delta, item.Count)
s.statsCollector.Update(id, item.Delta, item.Count, &item.ColSize)
}
}
return nil
Expand Down
13 changes: 10 additions & 3 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,19 @@ type TransactionContext struct {
}

// UpdateDeltaForTable updates the delta info for some table.
func (tc *TransactionContext) UpdateDeltaForTable(tableID int64, delta int64, count int64) {
func (tc *TransactionContext) UpdateDeltaForTable(tableID int64, delta int64, count int64, colSize map[int64]int64) {
if tc.TableDeltaMap == nil {
tc.TableDeltaMap = make(map[int64]TableDelta)
}
item := tc.TableDeltaMap[tableID]
if item.ColSize == nil {
item.ColSize = make(map[int64]int64)
}
item.Delta += delta
item.Count += count
for key, val := range colSize {
item.ColSize[key] += val
}
tc.TableDeltaMap[tableID] = item
}

Expand Down Expand Up @@ -568,6 +574,7 @@ const (

// TableDelta stands for the changed count for one table.
type TableDelta struct {
Delta int64
Count int64
Delta int64
Count int64
ColSize map[int64]int64
}
10 changes: 1 addition & 9 deletions statistics/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,8 @@ func (h *Handle) insertColStats2KV(tableID int64, colInfo *model.ColumnInfo) err
return errors.Trace(err)
}
} else {
var totColSize int64
switch colInfo.Tp {
case mysql.TypeFloat, mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong,
mysql.TypeDouble, mysql.TypeYear, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeNewDecimal:
totColSize = 0
default:
totColSize = int64(len(value.GetBytes()))
}
// If this stats exists, we insert histogram meta first, the distinct_count will always be one.
_, err = exec.Execute(ctx, fmt.Sprintf("insert into mysql.stats_histograms (version, table_id, is_index, hist_id, distinct_count, tot_col_size) values (%d, %d, 0, %d, 1, %d)", h.ctx.Txn().StartTS(), tableID, colInfo.ID, totColSize*count))
_, err = exec.Execute(ctx, fmt.Sprintf("insert into mysql.stats_histograms (version, table_id, is_index, hist_id, distinct_count, tot_col_size) values (%d, %d, 0, %d, 1, %d)", h.ctx.Txn().StartTS(), tableID, colInfo.ID, int64(len(value.GetBytes()))*count))
if err != nil {
return errors.Trace(err)
}
Expand Down
2 changes: 1 addition & 1 deletion statistics/ddl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func (s *testStatsCacheSuite) TestDDLHistogram(c *C) {
statsTbl = do.StatsHandle().GetTableStats(tableInfo)
c.Assert(statsTbl.Pseudo, IsFalse)
sc = new(stmtctx.StatementContext)
c.Check(statsTbl.Columns[tableInfo.Columns[5].ID].AvgColSize(), Equals, 3.0)
c.Check(statsTbl.Columns[tableInfo.Columns[5].ID].AvgColSize(statsTbl.Count), Equals, 3.0)

testKit.MustExec("create index i on t(c2, c1)")
testKit.MustExec("analyze table t")
Expand Down
2 changes: 1 addition & 1 deletion statistics/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
tipb "github.com/pingcap/tipb/go-tipb"
"github.com/pingcap/tipb/go-tipb"
)

// JSONTable is used for dumping statistics.
Expand Down
14 changes: 7 additions & 7 deletions statistics/feedback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
defaultBucketCount = 5
defer func() { defaultBucketCount = originBucketCount }()
c.Assert(UpdateHistogram(q.Hist(), q).ToString(0), Equals,
"column:0 ndv:0\n"+
"column:0 ndv:0 totColSize:0\n"+
"num: 10000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
"num: 10003\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
"num: 10021\tlower_bound: 4\tupper_bound: 20\trepeats: 0\n"+
Expand All @@ -88,7 +88,7 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
q.feedback = feedbacks
buckets, isNewBuckets, totalCount := splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0\n"+
"column:0 ndv:0 totColSize:0\n"+
"num: 1\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
"num: 1\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
"num: 1\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+
Expand All @@ -107,7 +107,7 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
q.feedback = feedbacks
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0\n"+
"column:0 ndv:0 totColSize:0\n"+
"num: 100000\tlower_bound: 0\tupper_bound: 1\trepeats: 0\n"+
"num: 100000\tlower_bound: 2\tupper_bound: 3\trepeats: 0\n"+
"num: 100000\tlower_bound: 5\tupper_bound: 7\trepeats: 0\n"+
Expand All @@ -127,7 +127,7 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
q.feedback = feedbacks
buckets, isNewBuckets, totalCount = splitBuckets(q.Hist(), q)
c.Assert(buildNewHistogram(q.Hist(), buckets).ToString(0), Equals,
"column:0 ndv:0\n"+
"column:0 ndv:0 totColSize:0\n"+
"num: 1000000\tlower_bound: 0\tupper_bound: 1000000\trepeats: 0")
c.Assert(isNewBuckets, DeepEquals, []bool{false})
c.Assert(totalCount, Equals, int64(1000000))
Expand All @@ -148,14 +148,14 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
counts: []int64{1},
isNewBuckets: []bool{false},
bucketCount: 1,
result: "column:0 ndv:0\nnum: 1\tlower_bound: 1\tupper_bound: 2\trepeats: 0",
result: "column:0 ndv:0 totColSize:0\nnum: 1\tlower_bound: 1\tupper_bound: 2\trepeats: 0",
},
{
points: []int64{1, 2, 2, 3, 3, 4},
counts: []int64{100000, 1, 1},
isNewBuckets: []bool{false, false, false},
bucketCount: 2,
result: "column:0 ndv:0\n" +
result: "column:0 ndv:0 totColSize:0\n" +
"num: 100000\tlower_bound: 1\tupper_bound: 2\trepeats: 0\n" +
"num: 100002\tlower_bound: 2\tupper_bound: 4\trepeats: 0",
},
Expand All @@ -165,7 +165,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
counts: []int64{1, 1, 100000, 100000},
isNewBuckets: []bool{false, false, false, false},
bucketCount: 3,
result: "column:0 ndv:0\n" +
result: "column:0 ndv:0 totColSize:0\n" +
"num: 2\tlower_bound: 1\tupper_bound: 3\trepeats: 0\n" +
"num: 100002\tlower_bound: 3\tupper_bound: 4\trepeats: 0\n" +
"num: 200002\tlower_bound: 4\tupper_bound: 5\trepeats: 0",
Expand Down
35 changes: 22 additions & 13 deletions statistics/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,19 +220,19 @@ func (s *testStatsCacheSuite) TestAvgColLen(c *C) {
c.Assert(err, IsNil)
tableInfo := tbl.Meta()
statsTbl := do.StatsHandle().GetTableStats(tableInfo)
c.Assert(statsTbl.Columns[tableInfo.Columns[0].ID].AvgColSize(), Equals, 8.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[0].ID].AvgColSize(statsTbl.Count), Equals, 8.0)

// The size of varchar type is LEN + BYTE, here is 1 + 7 = 8
c.Assert(statsTbl.Columns[tableInfo.Columns[1].ID].AvgColSize(), Equals, 8.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[2].ID].AvgColSize(), Equals, 4.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[3].ID].AvgColSize(), Equals, 16.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[1].ID].AvgColSize(statsTbl.Count), Equals, 8.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[2].ID].AvgColSize(statsTbl.Count), Equals, 4.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[3].ID].AvgColSize(statsTbl.Count), Equals, 16.0)
testKit.MustExec("insert into t values(132, '123456789112', 1232.3, '2018-03-07 19:17:29')")
testKit.MustExec("analyze table t")
statsTbl = do.StatsHandle().GetTableStats(tableInfo)
c.Assert(statsTbl.Columns[tableInfo.Columns[0].ID].AvgColSize(), Equals, 8.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[1].ID].AvgColSize(), Equals, 10.5)
c.Assert(statsTbl.Columns[tableInfo.Columns[2].ID].AvgColSize(), Equals, 4.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[3].ID].AvgColSize(), Equals, 16.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[0].ID].AvgColSize(statsTbl.Count), Equals, 8.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[1].ID].AvgColSize(statsTbl.Count), Equals, 10.5)
c.Assert(statsTbl.Columns[tableInfo.Columns[2].ID].AvgColSize(statsTbl.Count), Equals, 4.0)
c.Assert(statsTbl.Columns[tableInfo.Columns[3].ID].AvgColSize(statsTbl.Count), Equals, 16.0)
}

func (s *testStatsCacheSuite) TestVersion(c *C) {
Expand Down Expand Up @@ -319,14 +319,14 @@ func (s *testStatsCacheSuite) TestLoadHist(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("create table t (c1 int, c2 int)")
testKit.MustExec("create table t (c1 varchar(12), c2 char(12))")
do := s.do
h := do.StatsHandle()
err := h.HandleDDLEvent(<-h.DDLEventCh())
c.Assert(err, IsNil)
rowCount := 10
for i := 0; i < rowCount; i++ {
testKit.MustExec("insert into t values(1,2)")
testKit.MustExec("insert into t values('a','ddd')")
}
testKit.MustExec("analyze table t")
is := do.InfoSchema()
Expand All @@ -335,16 +335,25 @@ func (s *testStatsCacheSuite) TestLoadHist(c *C) {
tableInfo := tbl.Meta()
oldStatsTbl := h.GetTableStats(tableInfo)
for i := 0; i < rowCount; i++ {
testKit.MustExec("insert into t values(1,2)")
testKit.MustExec("insert into t values('bb','sdfga')")
}
h.DumpStatsDeltaToKV()
h.Update(do.InfoSchema())
newStatsTbl := h.GetTableStats(tableInfo)
// The stats table is updated.
c.Assert(oldStatsTbl == newStatsTbl, IsFalse)
// The histograms is not updated.
// Only the TotColSize of histograms is updated.
for id, hist := range oldStatsTbl.Columns {
c.Assert(hist, Equals, newStatsTbl.Columns[id])
c.Assert(hist.TotColSize, Less, newStatsTbl.Columns[id].TotColSize)

temp := hist.TotColSize
hist.TotColSize = newStatsTbl.Columns[id].TotColSize
c.Assert(statistics.HistogramEqual(&hist.Histogram, &newStatsTbl.Columns[id].Histogram, false), IsTrue)
hist.TotColSize = temp

c.Assert(hist.CMSketch.Equal(newStatsTbl.Columns[id].CMSketch), IsTrue)
c.Assert(hist.Count, Equals, newStatsTbl.Columns[id].Count)
c.Assert(hist.Info, Equals, newStatsTbl.Columns[id].Info)
}
// Add column c3, we only update c3.
testKit.MustExec("alter table t add column c3 int")
Expand Down
13 changes: 7 additions & 6 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ func (hg *Histogram) GetUpper(idx int) *types.Datum {
}

// AvgColSize is the average column size of the histogram.
func (c *Column) AvgColSize() float64 {
func (c *Column) AvgColSize(count int64) float64 {
if count == 0 {
return 0
}
switch c.Histogram.tp.Tp {
case mysql.TypeFloat:
return 4
Expand All @@ -117,10 +120,8 @@ func (c *Column) AvgColSize() float64 {
case mysql.TypeNewDecimal:
return types.MyDecimalStructSize
default:
if c.Count == 0 {
return 0
}
return float64(c.TotColSize) / float64(c.Count)
// Keep two decimal place.
return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
}
}

Expand Down Expand Up @@ -316,7 +317,7 @@ func (hg *Histogram) ToString(idxCols int) string {
if idxCols > 0 {
strs = append(strs, fmt.Sprintf("index:%d ndv:%d", hg.ID, hg.NDV))
} else {
strs = append(strs, fmt.Sprintf("column:%d ndv:%d", hg.ID, hg.NDV))
strs = append(strs, fmt.Sprintf("column:%d ndv:%d totColSize:%d", hg.ID, hg.NDV, hg.TotColSize))
}
for i := 0; i < hg.Len(); i++ {
upperVal, err := ValueToString(hg.GetUpper(i), idxCols)
Expand Down
33 changes: 27 additions & 6 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,32 @@ func (h *Handle) columnStatsFromStorage(row types.Row, table *Table, tableInfo *
continue
}
isHandle := tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag)
needNotLoad := col == nil || (col.Len() == 0 && col.LastUpdateVersion < histVer)
if h.Lease > 0 && !isHandle && needNotLoad && !loadAll {
// We will not load buckets if:
// 1. Lease > 0, and:
// 2. this column is not handle, and:
// 3. the column doesn't has buckets before, and:
// 4. loadAll is false.
notNeedLoad := h.Lease > 0 &&
!isHandle &&
(col == nil || col.Len() == 0 && col.LastUpdateVersion < histVer) &&
!loadAll
if notNeedLoad {
count, err := columnCountFromStorage(h.ctx, table.TableID, histID)
if err != nil {
return errors.Trace(err)
}
col = &Column{
Histogram: Histogram{ID: histID, NDV: distinct, NullCount: nullCount, tp: &colInfo.FieldType, LastUpdateVersion: histVer, TotColSize: totColSize},
Info: colInfo,
Count: count + nullCount}
Histogram: Histogram{
ID: histID,
NDV: distinct,
NullCount: nullCount,
tp: &colInfo.FieldType,
LastUpdateVersion: histVer,
TotColSize: totColSize,
},
Info: colInfo,
Count: count + nullCount,
}
break
}
if col == nil || col.LastUpdateVersion < histVer || loadAll {
Expand All @@ -143,7 +159,12 @@ func (h *Handle) columnStatsFromStorage(row types.Row, table *Table, tableInfo *
if err != nil {
return errors.Trace(err)
}
col = &Column{Histogram: *hg, Info: colInfo, CMSketch: cms, Count: int64(hg.totalRowCount())}
col = &Column{
Histogram: *hg,
Info: colInfo,
CMSketch: cms,
Count: int64(hg.totalRowCount()),
}
}
break
}
Expand Down
Loading