@@ -118,7 +118,7 @@ func (sch *BackfillingSchedulerExt) OnNextSubtasksBatch(
118
118
119
119
failpoint .Inject ("mockWriteIngest" , func () {
120
120
m := & BackfillSubTaskMeta {
121
- SortedKVMeta : external.SortedKVMeta {},
121
+ MetaGroups : [] * external.SortedKVMeta {},
122
122
}
123
123
metaBytes , _ := json .Marshal (m )
124
124
metaArr := make ([][]byte , 0 , 16 )
@@ -297,16 +297,14 @@ func generateNonPartitionPlan(
297
297
}
298
298
batch := recordRegionMetas [i :end ]
299
299
subTaskMeta := & BackfillSubTaskMeta {
300
- SortedKVMeta : external.SortedKVMeta {
301
- StartKey : batch [0 ].StartKey (),
302
- EndKey : batch [len (batch )- 1 ].EndKey (),
303
- },
300
+ RowStart : batch [0 ].StartKey (),
301
+ RowEnd : batch [len (batch )- 1 ].EndKey (),
304
302
}
305
303
if i == 0 {
306
- subTaskMeta .StartKey = startKey
304
+ subTaskMeta .RowStart = startKey
307
305
}
308
306
if end == len (recordRegionMetas ) {
309
- subTaskMeta .EndKey = endKey
307
+ subTaskMeta .RowEnd = endKey
310
308
}
311
309
metaBytes , err := json .Marshal (subTaskMeta )
312
310
if err != nil {
@@ -339,20 +337,55 @@ func generateGlobalSortIngestPlan(
339
337
step proto.Step ,
340
338
logger * zap.Logger ,
341
339
) ([][]byte , error ) {
342
- startKeyFromSumm , endKeyFromSumm , totalSize , multiFileStat , err := getSummaryFromLastStep (taskHandle , task .ID , step )
340
+ var kvMetaGroups []* external.SortedKVMeta
341
+ err := forEachBackfillSubtaskMeta (taskHandle , task .ID , step , func (subtask * BackfillSubTaskMeta ) {
342
+ if kvMetaGroups == nil {
343
+ kvMetaGroups = make ([]* external.SortedKVMeta , len (subtask .MetaGroups ))
344
+ }
345
+ for i , cur := range subtask .MetaGroups {
346
+ if kvMetaGroups [i ] == nil {
347
+ kvMetaGroups [i ] = & external.SortedKVMeta {}
348
+ }
349
+ kvMetaGroups [i ].Merge (cur )
350
+ }
351
+ })
343
352
if err != nil {
344
353
return nil , err
345
354
}
346
- if len (startKeyFromSumm ) == 0 && len (endKeyFromSumm ) == 0 {
347
- // Skip global sort for empty table.
348
- return nil , nil
349
- }
350
355
instanceIDs , err := scheduler .GetLiveExecIDs (ctx )
351
356
if err != nil {
352
357
return nil , err
353
358
}
359
+ metaArr := make ([][]byte , 0 , 16 )
360
+ for i , g := range kvMetaGroups {
361
+ if g == nil {
362
+ logger .Error ("meet empty kv group when getting subtask summary" ,
363
+ zap .Int64 ("taskID" , task .ID ))
364
+ return nil , errors .Errorf ("subtask kv group %d is empty" , i )
365
+ }
366
+ newMeta , err := splitSubtaskMetaForOneKVMetaGroup (ctx , store , g , cloudStorageURI , int64 (len (instanceIDs )), logger )
367
+ if err != nil {
368
+ return nil , errors .Trace (err )
369
+ }
370
+ metaArr = append (metaArr , newMeta ... )
371
+ }
372
+ return metaArr , nil
373
+ }
374
+
375
+ func splitSubtaskMetaForOneKVMetaGroup (
376
+ ctx context.Context ,
377
+ store kv.StorageWithPD ,
378
+ kvMeta * external.SortedKVMeta ,
379
+ cloudStorageURI string ,
380
+ instanceCnt int64 ,
381
+ logger * zap.Logger ,
382
+ ) (metaArr [][]byte , err error ) {
383
+ if len (kvMeta .StartKey ) == 0 && len (kvMeta .EndKey ) == 0 {
384
+ // Skip global sort for empty table.
385
+ return nil , nil
386
+ }
354
387
splitter , err := getRangeSplitter (
355
- ctx , store , cloudStorageURI , int64 (totalSize ), int64 ( len ( instanceIDs )), multiFileStat , logger )
388
+ ctx , store , cloudStorageURI , int64 (kvMeta . TotalKVSize ), instanceCnt , kvMeta . MultipleFilesStats , logger )
356
389
if err != nil {
357
390
return nil , err
358
391
}
@@ -363,33 +396,32 @@ func generateGlobalSortIngestPlan(
363
396
}
364
397
}()
365
398
366
- metaArr := make ([][]byte , 0 , 16 )
367
- startKey := startKeyFromSumm
399
+ startKey := kvMeta .StartKey
368
400
var endKey kv.Key
369
401
for {
370
402
endKeyOfGroup , dataFiles , statFiles , rangeSplitKeys , err := splitter .SplitOneRangesGroup ()
371
403
if err != nil {
372
404
return nil , err
373
405
}
374
406
if len (endKeyOfGroup ) == 0 {
375
- endKey = endKeyFromSumm
407
+ endKey = kvMeta . EndKey
376
408
} else {
377
409
endKey = kv .Key (endKeyOfGroup ).Clone ()
378
410
}
379
411
logger .Info ("split subtask range" ,
380
412
zap .String ("startKey" , hex .EncodeToString (startKey )),
381
413
zap .String ("endKey" , hex .EncodeToString (endKey )))
382
414
383
- if startKey . Cmp ( endKey ) >= 0 {
415
+ if bytes . Compare ( startKey , endKey ) >= 0 {
384
416
return nil , errors .Errorf ("invalid range, startKey: %s, endKey: %s" ,
385
417
hex .EncodeToString (startKey ), hex .EncodeToString (endKey ))
386
418
}
387
419
m := & BackfillSubTaskMeta {
388
- SortedKVMeta : external.SortedKVMeta {
420
+ MetaGroups : [] * external.SortedKVMeta { {
389
421
StartKey : startKey ,
390
422
EndKey : endKey ,
391
- TotalKVSize : totalSize / uint64 (len ( instanceIDs ) ),
392
- },
423
+ TotalKVSize : kvMeta . TotalKVSize / uint64 (instanceCnt ),
424
+ }} ,
393
425
DataFiles : dataFiles ,
394
426
StatFiles : statFiles ,
395
427
RangeSplitKeys : rangeSplitKeys ,
@@ -400,10 +432,11 @@ func generateGlobalSortIngestPlan(
400
432
}
401
433
metaArr = append (metaArr , metaBytes )
402
434
if len (endKeyOfGroup ) == 0 {
403
- return metaArr , nil
435
+ break
404
436
}
405
437
startKey = endKey
406
438
}
439
+ return metaArr , nil
407
440
}
408
441
409
442
func generateMergePlan (
@@ -413,33 +446,41 @@ func generateMergePlan(
413
446
) ([][]byte , error ) {
414
447
// check data files overlaps,
415
448
// if data files overlaps too much, we need a merge step.
416
- subTaskMetas , err := taskHandle .GetPreviousSubtaskMetas (task .ID , proto .BackfillStepReadIndex )
449
+ multiStats := make ([]external.MultipleFilesStat , 0 , 100 )
450
+ var kvMetaGroups []* external.SortedKVMeta
451
+ err := forEachBackfillSubtaskMeta (taskHandle , task .ID , proto .BackfillStepReadIndex ,
452
+ func (subtask * BackfillSubTaskMeta ) {
453
+ if kvMetaGroups == nil {
454
+ kvMetaGroups = make ([]* external.SortedKVMeta , len (subtask .MetaGroups ))
455
+ }
456
+ for i , g := range subtask .MetaGroups {
457
+ if kvMetaGroups [i ] == nil {
458
+ kvMetaGroups [i ] = & external.SortedKVMeta {}
459
+ }
460
+ kvMetaGroups [i ].Merge (g )
461
+ multiStats = append (multiStats , g .MultipleFilesStats ... )
462
+ }
463
+ })
417
464
if err != nil {
418
465
return nil , err
419
466
}
420
- multiStats := make ([]external.MultipleFilesStat , 0 , 100 )
421
- for _ , bs := range subTaskMetas {
422
- var subtask BackfillSubTaskMeta
423
- err = json .Unmarshal (bs , & subtask )
424
- if err != nil {
425
- return nil , err
426
- }
427
- multiStats = append (multiStats , subtask .MultipleFilesStats ... )
428
- }
429
467
if skipMergeSort (multiStats ) {
430
468
logger .Info ("skip merge sort" )
431
469
return nil , nil
432
470
}
433
471
434
472
// generate merge sort plan.
435
- _ , _ , _ , multiFileStat , err := getSummaryFromLastStep (taskHandle , task .ID , proto .BackfillStepReadIndex )
436
- if err != nil {
437
- return nil , err
438
- }
439
473
dataFiles := make ([]string , 0 , 1000 )
440
- for _ , m := range multiFileStat {
441
- for _ , filePair := range m .Filenames {
442
- dataFiles = append (dataFiles , filePair [0 ])
474
+ for i , g := range kvMetaGroups {
475
+ if g == nil {
476
+ logger .Error ("meet empty kv group when getting subtask summary" ,
477
+ zap .Int64 ("taskID" , task .ID ))
478
+ return nil , errors .Errorf ("subtask kv group %d is empty" , i )
479
+ }
480
+ for _ , m := range g .MultipleFilesStats {
481
+ for _ , filePair := range m .Filenames {
482
+ dataFiles = append (dataFiles , filePair [0 ])
483
+ }
443
484
}
444
485
}
445
486
@@ -508,40 +549,25 @@ func getRangeSplitter(
508
549
rangeGroupSize , rangeGroupKeys , maxSizePerRange , maxKeysPerRange , true )
509
550
}
510
551
511
- func getSummaryFromLastStep (
552
+ func forEachBackfillSubtaskMeta (
512
553
taskHandle diststorage.TaskHandle ,
513
554
gTaskID int64 ,
514
555
step proto.Step ,
515
- ) (startKey , endKey kv.Key , totalKVSize uint64 , multiFileStat []external.MultipleFilesStat , err error ) {
556
+ fn func (subtask * BackfillSubTaskMeta ),
557
+ ) error {
516
558
subTaskMetas , err := taskHandle .GetPreviousSubtaskMetas (gTaskID , step )
517
559
if err != nil {
518
- return nil , nil , 0 , nil , errors .Trace (err )
560
+ return errors .Trace (err )
519
561
}
520
562
for _ , subTaskMeta := range subTaskMetas {
521
- var subtask BackfillSubTaskMeta
522
- err := json .Unmarshal (subTaskMeta , & subtask )
563
+ subtask , err := decodeBackfillSubTaskMeta (subTaskMeta )
523
564
if err != nil {
524
- return nil , nil , 0 , nil , errors .Trace (err )
525
- }
526
- // Skip empty subtask.StartKey/EndKey because it means
527
- // no records need to be written in this subtask.
528
- if subtask .StartKey == nil || subtask .EndKey == nil {
529
- continue
530
- }
531
-
532
- if len (startKey ) == 0 {
533
- startKey = subtask .StartKey
534
- } else {
535
- startKey = external .BytesMin (startKey , subtask .StartKey )
536
- }
537
- if len (endKey ) == 0 {
538
- endKey = subtask .EndKey
539
- } else {
540
- endKey = external .BytesMax (endKey , subtask .EndKey )
565
+ logutil .BgLogger ().Error ("unmarshal error" ,
566
+ zap .String ("category" , "ddl" ),
567
+ zap .Error (err ))
568
+ return errors .Trace (err )
541
569
}
542
- totalKVSize += subtask .TotalKVSize
543
-
544
- multiFileStat = append (multiFileStat , subtask .MultipleFilesStats ... )
570
+ fn (subtask )
545
571
}
546
- return startKey , endKey , totalKVSize , multiFileStat , nil
572
+ return nil
547
573
}
0 commit comments