diff --git a/pkg/restore/client.go b/pkg/restore/client.go index a7775ee15..082ec0ef4 100644 --- a/pkg/restore/client.go +++ b/pkg/restore/client.go @@ -735,16 +735,12 @@ func (rc *Client) GoValidateChecksum( outCh := make(chan struct{}, 1) workers := utils.NewWorkerPool(defaultChecksumConcurrency, "RestoreChecksum") go func() { - start := time.Now() wg, ectx := errgroup.WithContext(ctx) defer func() { log.Info("all checksum ended") if err := wg.Wait(); err != nil { errCh <- err } - elapsed := time.Since(start) - summary.CollectDuration("restore checksum", elapsed) - summary.CollectSuccessUnit("table checksum", 1, elapsed) outCh <- struct{}{} close(outCh) }() @@ -758,6 +754,12 @@ func (rc *Client) GoValidateChecksum( return } workers.ApplyOnErrorGroup(wg, func() error { + start := time.Now() + defer func() { + elapsed := time.Since(start) + summary.CollectDuration("restore checksum", elapsed) + summary.CollectSuccessUnit("table checksum", 1, elapsed) + }() err := rc.execChecksum(ectx, tbl, kvClient, concurrency) if err != nil { return errors.Trace(err) diff --git a/pkg/summary/collector.go b/pkg/summary/collector.go index 470859762..84dabaebb 100644 --- a/pkg/summary/collector.go +++ b/pkg/summary/collector.go @@ -3,10 +3,12 @@ package summary import ( - "fmt" + "strings" "sync" "time" + "github.com/docker/go-units" + "github.com/pingcap/log" "go.uber.org/zap" ) @@ -150,6 +152,10 @@ func (tc *logCollector) SetSuccessStatus(success bool) { tc.successStatus = success } +func logKeyFor(key string) string { + return strings.ReplaceAll(key, " ", "-") +} + func (tc *logCollector) Summary(name string) { tc.mu.Lock() defer func() { @@ -160,56 +166,48 @@ func (tc *logCollector) Summary(name string) { tc.mu.Unlock() }() - var msg string - switch tc.unit { - case BackupUnit: - msg = fmt.Sprintf("total backup ranges: %d, total success: %d, total failed: %d", - tc.failureUnitCount+tc.successUnitCount, tc.successUnitCount, tc.failureUnitCount) - case RestoreUnit: - msg = fmt.Sprintf("total restore files: %d, total success: %d, total failed: %d", - tc.failureUnitCount+tc.successUnitCount, tc.successUnitCount, tc.failureUnitCount) - } + logFields := make([]zap.Field, 0, len(tc.durations)+len(tc.ints)+3) + + logFields = append(logFields, + zap.Int("total-ranges", tc.failureUnitCount+tc.successUnitCount), + zap.Int("ranges-succeed", tc.successUnitCount), + zap.Int("ranges-failed", tc.failureUnitCount), + ) - logFields := make([]zap.Field, 0, len(tc.durations)+len(tc.ints)) for key, val := range tc.durations { - logFields = append(logFields, zap.Duration(key, val)) + logFields = append(logFields, zap.Duration(logKeyFor(key), val)) } for key, val := range tc.ints { - logFields = append(logFields, zap.Int(key, val)) + logFields = append(logFields, zap.Int(logKeyFor(key), val)) } for key, val := range tc.uints { - logFields = append(logFields, zap.Uint64(key, val)) + logFields = append(logFields, zap.Uint64(logKeyFor(key), val)) } if len(tc.failureReasons) != 0 || !tc.successStatus { for unitName, reason := range tc.failureReasons { - logFields = append(logFields, zap.String("unitName", unitName), zap.Error(reason)) + logFields = append(logFields, zap.String("unit-name", unitName), zap.Error(reason)) } - log.Info(name+" Failed summary : "+msg, logFields...) + log.Info(name+" failed summary", logFields...) return } totalCost := time.Duration(0) for _, cost := range tc.successCosts { totalCost += cost } - msg += fmt.Sprintf(", total take(%s time): %s", name, totalCost) - msg += fmt.Sprintf(", total take(real time): %s", time.Since(tc.startTime)) + + logFields = append(logFields, zap.Duration("total-take", time.Since(tc.startTime))) for name, data := range tc.successData { if name == TotalBytes { - fData := float64(data) / 1024 / 1024 - if fData > 1 { - msg += fmt.Sprintf(", total size(MB): %.2f", fData) - msg += fmt.Sprintf(", avg speed(MB/s): %.2f", fData/totalCost.Seconds()) - } else { - msg += fmt.Sprintf(", total size(Byte): %d", data) - msg += fmt.Sprintf(", avg speed(Byte/s): %.2f", float64(data)/totalCost.Seconds()) - } + logFields = append(logFields, + zap.String("data-size", units.HumanSize(float64(data))), + zap.String("average-speed", units.HumanSize(float64(data)/totalCost.Seconds())+"/s")) continue } - msg += fmt.Sprintf(", %s: %d", name, data) + logFields = append(logFields, zap.Uint64(logKeyFor(name), data)) } - tc.log(name+" Success summary: "+msg, logFields...) + tc.log(name+" success summary", logFields...) } // SetLogCollector allow pass LogCollector outside. diff --git a/pkg/summary/collector_test.go b/pkg/summary/collector_test.go index 4ad521848..f9ae53243 100644 --- a/pkg/summary/collector_test.go +++ b/pkg/summary/collector_test.go @@ -33,7 +33,7 @@ func (suit *testCollectorSuite) TestSumDurationInt(c *C) { col.SetSuccessStatus(true) col.Summary("foo") - c.Assert(len(fields), Equals, 3) + c.Assert(len(fields), Equals, 7) assertContains := func(field zap.Field) { for _, f := range fields { if f.Key == field.Key { diff --git a/tests/br_split_region_fail/run.sh b/tests/br_split_region_fail/run.sh index 464c194cb..84126eb98 100644 --- a/tests/br_split_region_fail/run.sh +++ b/tests/br_split_region_fail/run.sh @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -eu +set -eux DB="$TEST_NAME" TABLE="usertable" LOG="not-leader.log" @@ -51,7 +51,7 @@ BR_LOG_TO_TERM=1 grep "a error occurs on split region" $LOG && \ grep "split region meet not leader error" $LOG && \ -grep "Full restore Success" $LOG && \ +grep "Full restore success" $LOG && \ grep "find new leader" $LOG if [ $? -ne 0 ]; then