pingcap · ti-chi-bot · Apr 15, 2021 · Apr 14, 2021 · Apr 15, 2021 · Apr 15, 2021
diff --git a/pkg/restore/client.go b/pkg/restore/client.go
@@ -735,16 +735,12 @@ func (rc *Client) GoValidateChecksum(
 	outCh := make(chan struct{}, 1)
 	workers := utils.NewWorkerPool(defaultChecksumConcurrency, "RestoreChecksum")
 	go func() {
-		start := time.Now()
 		wg, ectx := errgroup.WithContext(ctx)
 		defer func() {
 			log.Info("all checksum ended")
 			if err := wg.Wait(); err != nil {
 				errCh <- err
 			}
-			elapsed := time.Since(start)
-			summary.CollectDuration("restore checksum", elapsed)
-			summary.CollectSuccessUnit("table checksum", 1, elapsed)
 			outCh <- struct{}{}
 			close(outCh)
 		}()
@@ -758,6 +754,12 @@ func (rc *Client) GoValidateChecksum(
 					return
 				}
 				workers.ApplyOnErrorGroup(wg, func() error {
+					start := time.Now()
+					defer func() {
+						elapsed := time.Since(start)
+						summary.CollectDuration("restore checksum", elapsed)
+						summary.CollectSuccessUnit("table checksum", 1, elapsed)
+					}()
 					err := rc.execChecksum(ectx, tbl, kvClient, concurrency)
 					if err != nil {
 						return errors.Trace(err)

diff --git a/pkg/summary/collector.go b/pkg/summary/collector.go
@@ -3,10 +3,12 @@
 package summary
 
 import (
-	"fmt"
+	"strings"
 	"sync"
 	"time"
 
+	"github.com/docker/go-units"
+
 	"github.com/pingcap/log"
 	"go.uber.org/zap"
 )
@@ -150,6 +152,10 @@ func (tc *logCollector) SetSuccessStatus(success bool) {
 	tc.successStatus = success
 }
 
+func logKeyFor(key string) string {
+	return strings.ReplaceAll(key, " ", "-")
+}
+
 func (tc *logCollector) Summary(name string) {
 	tc.mu.Lock()
 	defer func() {
@@ -160,56 +166,48 @@ func (tc *logCollector) Summary(name string) {
 		tc.mu.Unlock()
 	}()
 
-	var msg string
-	switch tc.unit {
-	case BackupUnit:
-		msg = fmt.Sprintf("total backup ranges: %d, total success: %d, total failed: %d",
-			tc.failureUnitCount+tc.successUnitCount, tc.successUnitCount, tc.failureUnitCount)
-	case RestoreUnit:
-		msg = fmt.Sprintf("total restore files: %d, total success: %d, total failed: %d",
-			tc.failureUnitCount+tc.successUnitCount, tc.successUnitCount, tc.failureUnitCount)
-	}
+	logFields := make([]zap.Field, 0, len(tc.durations)+len(tc.ints)+3)
+
+	logFields = append(logFields,
+		zap.Int("total-ranges", tc.failureUnitCount+tc.successUnitCount),
+		zap.Int("ranges-succeed", tc.successUnitCount),
+		zap.Int("ranges-failed", tc.failureUnitCount),
+	)
 
-	logFields := make([]zap.Field, 0, len(tc.durations)+len(tc.ints))
 	for key, val := range tc.durations {
-		logFields = append(logFields, zap.Duration(key, val))
+		logFields = append(logFields, zap.Duration(logKeyFor(key), val))
 	}
 	for key, val := range tc.ints {
-		logFields = append(logFields, zap.Int(key, val))
+		logFields = append(logFields, zap.Int(logKeyFor(key), val))
 	}
 	for key, val := range tc.uints {
-		logFields = append(logFields, zap.Uint64(key, val))
+		logFields = append(logFields, zap.Uint64(logKeyFor(key), val))
 	}
 
 	if len(tc.failureReasons) != 0 || !tc.successStatus {
 		for unitName, reason := range tc.failureReasons {
-			logFields = append(logFields, zap.String("unitName", unitName), zap.Error(reason))
+			logFields = append(logFields, zap.String("unit-name", unitName), zap.Error(reason))
 		}
-		log.Info(name+" Failed summary : "+msg, logFields...)
+		log.Info(name+" failed summary", logFields...)
 		return
 	}
 	totalCost := time.Duration(0)
 	for _, cost := range tc.successCosts {
 		totalCost += cost
 	}
-	msg += fmt.Sprintf(", total take(%s time): %s", name, totalCost)
-	msg += fmt.Sprintf(", total take(real time): %s", time.Since(tc.startTime))
+
+	logFields = append(logFields, zap.Duration("total-take", time.Since(tc.startTime)))
 	for name, data := range tc.successData {
 		if name == TotalBytes {
-			fData := float64(data) / 1024 / 1024
-			if fData > 1 {
-				msg += fmt.Sprintf(", total size(MB): %.2f", fData)
-				msg += fmt.Sprintf(", avg speed(MB/s): %.2f", fData/totalCost.Seconds())
-			} else {
-				msg += fmt.Sprintf(", total size(Byte): %d", data)
-				msg += fmt.Sprintf(", avg speed(Byte/s): %.2f", float64(data)/totalCost.Seconds())
-			}
+			logFields = append(logFields,
+				zap.String("data-size", units.HumanSize(float64(data))),
+				zap.String("average-speed", units.HumanSize(float64(data)/totalCost.Seconds())+"/s"))
 			continue
 		}
-		msg += fmt.Sprintf(", %s: %d", name, data)
+		logFields = append(logFields, zap.Uint64(logKeyFor(name), data))
 	}
 
-	tc.log(name+" Success summary: "+msg, logFields...)
+	tc.log(name+" success summary", logFields...)
 }
 
 // SetLogCollector allow pass LogCollector outside.

diff --git a/pkg/summary/collector_test.go b/pkg/summary/collector_test.go
@@ -33,7 +33,7 @@ func (suit *testCollectorSuite) TestSumDurationInt(c *C) {
 	col.SetSuccessStatus(true)
 	col.Summary("foo")
 
-	c.Assert(len(fields), Equals, 3)
+	c.Assert(len(fields), Equals, 7)
 	assertContains := func(field zap.Field) {
 		for _, f := range fields {
 			if f.Key == field.Key {

diff --git a/tests/br_split_region_fail/run.sh b/tests/br_split_region_fail/run.sh
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -eu
+set -eux
 DB="$TEST_NAME"
 TABLE="usertable"
 LOG="not-leader.log"
@@ -51,7 +51,7 @@ BR_LOG_TO_TERM=1
 
 grep "a error occurs on split region" $LOG && \
 grep "split region meet not leader error" $LOG && \
-grep "Full restore Success" $LOG && \
+grep "Full restore success" $LOG && \
 grep "find new leader" $LOG
 
 if [ $? -ne 0 ]; then