@@ -24,7 +24,9 @@ import (
24
24
"github.com/pingcap/tidb/pkg/types"
25
25
"github.com/pingcap/tidb/pkg/util/codec"
26
26
"github.com/pingcap/tidb/pkg/util/collate"
27
+ "github.com/pingcap/tidb/pkg/util/logutil"
27
28
"github.com/pingcap/tidb/pkg/util/memory"
29
+ "go.uber.org/zap"
28
30
)
29
31
30
32
// SortedBuilder is used to build histograms for PK and index.
@@ -373,12 +375,46 @@ func BuildHistAndTopN(
373
375
if err != nil {
374
376
return nil , nil , errors .Trace (err )
375
377
}
378
+ // For debugging invalid sample data.
379
+ var (
380
+ foundTwice bool
381
+ firstTimeSample types.Datum
382
+ )
376
383
for j := 0 ; j < len (topNList ); j ++ {
377
384
if bytes .Equal (sampleBytes , topNList [j ].Encoded ) {
378
- // find the same value in topn: need to skip over this value in samples
385
+ // This should never happen, but we met this panic before, so we add this check here.
386
+ // See: https://github.com/pingcap/tidb/issues/35948
387
+ if foundTwice {
388
+ datumString , err := firstTimeSample .ToString ()
389
+ if err != nil {
390
+ logutil .BgLogger ().With (
391
+ zap .String ("category" , "stats" ),
392
+ ).Error ("try to convert datum to string failed" , zap .Error (err ))
393
+ }
394
+
395
+ logutil .BgLogger ().With (
396
+ zap .String ("category" , "stats" ),
397
+ ).Warn (
398
+ "invalid sample data" ,
399
+ zap .Bool ("isColumn" , isColumn ),
400
+ zap .Int64 ("columnID" , id ),
401
+ zap .String ("datum" , datumString ),
402
+ zap .Binary ("sampleBytes" , sampleBytes ),
403
+ zap .Binary ("topNBytes" , topNList [j ].Encoded ),
404
+ )
405
+ // NOTE: if we don't return here, we may meet panic in the following code.
406
+ // The i may decrease to a negative value.
407
+ // We haven't fix the issue here, because we don't know how to
408
+ // remove the invalid sample data from the samples.
409
+ break
410
+ }
411
+ // First time to find the same value in topN: need to record the sample data for debugging.
412
+ firstTimeSample = samples [i ].Value
413
+ // Found the same value in topn: need to skip over this value in samples.
379
414
copy (samples [i :], samples [uint64 (i )+ topNList [j ].Count :])
380
415
samples = samples [:uint64 (len (samples ))- topNList [j ].Count ]
381
416
i --
417
+ foundTwice = true
382
418
continue
383
419
}
384
420
}
0 commit comments