Skip to content

Commit 4f568d2

Browse files
Rustin170506ti-chi-bot
authored andcommitted
statistic: fix panic when building topN (pingcap#47928)
close pingcap#35948
1 parent 2d87034 commit 4f568d2

File tree

1 file changed

+37
-1
lines changed

1 file changed

+37
-1
lines changed

pkg/statistics/builder.go

+37-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ import (
2424
"github.com/pingcap/tidb/pkg/types"
2525
"github.com/pingcap/tidb/pkg/util/codec"
2626
"github.com/pingcap/tidb/pkg/util/collate"
27+
"github.com/pingcap/tidb/pkg/util/logutil"
2728
"github.com/pingcap/tidb/pkg/util/memory"
29+
"go.uber.org/zap"
2830
)
2931

3032
// SortedBuilder is used to build histograms for PK and index.
@@ -373,12 +375,46 @@ func BuildHistAndTopN(
373375
if err != nil {
374376
return nil, nil, errors.Trace(err)
375377
}
378+
// For debugging invalid sample data.
379+
var (
380+
foundTwice bool
381+
firstTimeSample types.Datum
382+
)
376383
for j := 0; j < len(topNList); j++ {
377384
if bytes.Equal(sampleBytes, topNList[j].Encoded) {
378-
// find the same value in topn: need to skip over this value in samples
385+
// This should never happen, but we met this panic before, so we add this check here.
386+
// See: https://github.com/pingcap/tidb/issues/35948
387+
if foundTwice {
388+
datumString, err := firstTimeSample.ToString()
389+
if err != nil {
390+
logutil.BgLogger().With(
391+
zap.String("category", "stats"),
392+
).Error("try to convert datum to string failed", zap.Error(err))
393+
}
394+
395+
logutil.BgLogger().With(
396+
zap.String("category", "stats"),
397+
).Warn(
398+
"invalid sample data",
399+
zap.Bool("isColumn", isColumn),
400+
zap.Int64("columnID", id),
401+
zap.String("datum", datumString),
402+
zap.Binary("sampleBytes", sampleBytes),
403+
zap.Binary("topNBytes", topNList[j].Encoded),
404+
)
405+
// NOTE: if we don't return here, we may meet panic in the following code.
406+
// The i may decrease to a negative value.
407+
// We haven't fix the issue here, because we don't know how to
408+
// remove the invalid sample data from the samples.
409+
break
410+
}
411+
// First time to find the same value in topN: need to record the sample data for debugging.
412+
firstTimeSample = samples[i].Value
413+
// Found the same value in topn: need to skip over this value in samples.
379414
copy(samples[i:], samples[uint64(i)+topNList[j].Count:])
380415
samples = samples[:uint64(len(samples))-topNList[j].Count]
381416
i--
417+
foundTwice = true
382418
continue
383419
}
384420
}

0 commit comments

Comments
 (0)