@@ -24,7 +24,9 @@ import (
24
24
"github.com/pingcap/tidb/types"
25
25
"github.com/pingcap/tidb/util/codec"
26
26
"github.com/pingcap/tidb/util/collate"
27
+ "github.com/pingcap/tidb/util/logutil"
27
28
"github.com/pingcap/tidb/util/memory"
29
+ "go.uber.org/zap"
28
30
)
29
31
30
32
// SortedBuilder is used to build histograms for PK and index.
@@ -372,12 +374,46 @@ func BuildHistAndTopN(
372
374
if err != nil {
373
375
return nil , nil , errors .Trace (err )
374
376
}
377
+ // For debugging invalid sample data.
378
+ var (
379
+ foundTwice bool
380
+ firstTimeSample types.Datum
381
+ )
375
382
for j := 0 ; j < len (topNList ); j ++ {
376
383
if bytes .Equal (sampleBytes , topNList [j ].Encoded ) {
377
- // find the same value in topn: need to skip over this value in samples
384
+ // This should never happen, but we met this panic before, so we add this check here.
385
+ // See: https://github.com/pingcap/tidb/issues/35948
386
+ if foundTwice {
387
+ datumString , err := firstTimeSample .ToString ()
388
+ if err != nil {
389
+ logutil .BgLogger ().With (
390
+ zap .String ("category" , "stats" ),
391
+ ).Error ("try to convert datum to string failed" , zap .Error (err ))
392
+ }
393
+
394
+ logutil .BgLogger ().With (
395
+ zap .String ("category" , "stats" ),
396
+ ).Warn (
397
+ "invalid sample data" ,
398
+ zap .Bool ("isColumn" , isColumn ),
399
+ zap .Int64 ("columnID" , id ),
400
+ zap .String ("datum" , datumString ),
401
+ zap .Binary ("sampleBytes" , sampleBytes ),
402
+ zap .Binary ("topNBytes" , topNList [j ].Encoded ),
403
+ )
404
+ // NOTE: if we don't return here, we may meet panic in the following code.
405
+ // The i may decrease to a negative value.
406
+ // We haven't fix the issue here, because we don't know how to
407
+ // remove the invalid sample data from the samples.
408
+ break
409
+ }
410
+ // First time to find the same value in topN: need to record the sample data for debugging.
411
+ firstTimeSample = samples [i ].Value
412
+ // Found the same value in topn: need to skip over this value in samples.
378
413
copy (samples [i :], samples [uint64 (i )+ topNList [j ].Count :])
379
414
samples = samples [:uint64 (len (samples ))- topNList [j ].Count ]
380
415
i --
416
+ foundTwice = true
381
417
continue
382
418
}
383
419
}
0 commit comments