diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index 146a824fd0d5d..9133f17713ee2 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -58,6 +58,34 @@ type saveCp struct { merger TableCheckpointMerger } +type errorSummary struct { + status CheckpointStatus + err error +} +type errorSummaries struct { + sync.Mutex + summary map[string]errorSummary +} + +func (es *errorSummaries) emitLog() { + es.Lock() + defer es.Unlock() + if errorCount := len(es.summary); errorCount > 0 { + var msg strings.Builder + fmt.Fprintf(&msg, "Totally **%d** tables failed to be imported.\n", errorCount) + for tableName, errorSummary := range es.summary { + fmt.Fprintf(&msg, "- [%s] [%s] %s\n", tableName, errorSummary.status.MetricName(), errorSummary.err.Error()) + } + common.AppLogger.Error(msg.String()) + } +} + +func (es *errorSummaries) record(tableName string, err error, status CheckpointStatus) { + es.Lock() + defer es.Unlock() + es.summary[tableName] = errorSummary{status: status, err: err} +} + type RestoreController struct { cfg *config.Config dbMetas map[string]*mydump.MDDatabaseMeta @@ -67,6 +95,8 @@ type RestoreController struct { importer *kv.Importer postProcessLock sync.Mutex // a simple way to ensure post-processing is not concurrent without using complicated goroutines + errorSummaries errorSummaries + checkpointsDB CheckpointsDB saveCpCh chan saveCp checkpointsWg sync.WaitGroup @@ -90,6 +120,10 @@ func NewRestoreController(ctx context.Context, dbMetas map[string]*mydump.MDData regionWorkers: NewRestoreWorkerPool(ctx, cfg.App.RegionConcurrency, "region"), importer: importer, + errorSummaries: errorSummaries{ + summary: make(map[string]errorSummary), + }, + checkpointsDB: cpdb, saveCpCh: make(chan saveCp), } @@ -155,6 +189,8 @@ outside: common.AppLogger.Infof("Timing statistic :\n%s", statistic) common.AppLogger.Infof("the whole procedure takes %v", time.Since(timer)) + rc.errorSummaries.emitLog() + return errors.Trace(err) } @@ -222,8 +258,8 @@ func (rc *RestoreController) saveStatusCheckpoint(tableName string, err error, s case err == nil: break case !common.IsContextCanceledError(err): - common.AppLogger.Warnf("Save checkpoint error for table %s before step %d: %+v", tableName, statusIfSucceed, err) merger.SetInvalid() + rc.errorSummaries.record(tableName, err, statusIfSucceed) default: return } diff --git a/tests/error_summary/config.toml b/tests/error_summary/config.toml new file mode 100644 index 0000000000000..1b320a351227f --- /dev/null +++ b/tests/error_summary/config.toml @@ -0,0 +1,26 @@ +[lightning] +check-requirements = false +file = "/tmp/lightning_test_result/lightning-error-summary.log" +level = "info" + +[checkpoint] +enable = false + +[tikv-importer] +addr = "127.0.0.1:8808" + +[mydumper] +data-source-dir = "tests/error_summary/data" + +[tidb] +host = "127.0.0.1" +port = 4000 +user = "root" +status-port = 10080 +pd-addr = "127.0.0.1:2379" +log-level = "error" + +[post-restore] +checksum = true +compact = false +analyze = false diff --git a/tests/error_summary/data/error_summary-schema-create.sql b/tests/error_summary/data/error_summary-schema-create.sql new file mode 100644 index 0000000000000..3f2f251c05755 --- /dev/null +++ b/tests/error_summary/data/error_summary-schema-create.sql @@ -0,0 +1 @@ +CREATE DATABASE error_summary; diff --git a/tests/error_summary/data/error_summary.a-schema.sql b/tests/error_summary/data/error_summary.a-schema.sql new file mode 100644 index 0000000000000..9194930f668c0 --- /dev/null +++ b/tests/error_summary/data/error_summary.a-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE a( + id INT NOT NULL PRIMARY KEY, + k INT NOT NULL +); diff --git a/tests/error_summary/data/error_summary.a.sql b/tests/error_summary/data/error_summary.a.sql new file mode 100644 index 0000000000000..a2c1003db3991 --- /dev/null +++ b/tests/error_summary/data/error_summary.a.sql @@ -0,0 +1 @@ +INSERT INTO a (id, k) VALUES (2, 3), (5, 7); diff --git a/tests/error_summary/data/error_summary.b-schema.sql b/tests/error_summary/data/error_summary.b-schema.sql new file mode 100644 index 0000000000000..77020697ed871 --- /dev/null +++ b/tests/error_summary/data/error_summary.b-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE b( + id INT NOT NULL PRIMARY KEY, + k INT NOT NULL +); diff --git a/tests/error_summary/data/error_summary.b.sql b/tests/error_summary/data/error_summary.b.sql new file mode 100644 index 0000000000000..b79d523eece7c --- /dev/null +++ b/tests/error_summary/data/error_summary.b.sql @@ -0,0 +1 @@ +INSERT INTO b (id, k) VALUES (11, 13), (17, 19); diff --git a/tests/error_summary/data/error_summary.c-schema.sql b/tests/error_summary/data/error_summary.c-schema.sql new file mode 100644 index 0000000000000..31432c7326acc --- /dev/null +++ b/tests/error_summary/data/error_summary.c-schema.sql @@ -0,0 +1,4 @@ +CREATE TABLE c( + id INT NOT NULL PRIMARY KEY, + k INT NOT NULL +); diff --git a/tests/error_summary/data/error_summary.c.sql b/tests/error_summary/data/error_summary.c.sql new file mode 100644 index 0000000000000..be11c04ab4cc3 --- /dev/null +++ b/tests/error_summary/data/error_summary.c.sql @@ -0,0 +1 @@ +INSERT INTO c VALUES (10, 100), (1000, 10000); diff --git a/tests/error_summary/run.sh b/tests/error_summary/run.sh new file mode 100755 index 0000000000000..61b6716be04ef --- /dev/null +++ b/tests/error_summary/run.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -eu + +# Check that error summary are written at the bottom of import. + +# The easiest way to induce error is to prepopulate the target table with conflicting content. +run_sql 'CREATE DATABASE IF NOT EXISTS error_summary;' +run_sql 'DROP TABLE IF EXISTS error_summary.a;' +run_sql 'DROP TABLE IF EXISTS error_summary.c;' +run_sql 'CREATE TABLE error_summary.a (id INT NOT NULL PRIMARY KEY, k INT NOT NULL);' +run_sql 'CREATE TABLE error_summary.c (id INT NOT NULL PRIMARY KEY, k INT NOT NULL);' +run_sql 'INSERT INTO error_summary.a VALUES (2, 4), (6, 8);' +run_sql 'INSERT INTO error_summary.c VALUES (3, 9), (27, 81);' + +set +e +run_lightning +set -e + +# Verify that table `b` is indeed imported +run_sql 'SELECT sum(id), sum(k) FROM error_summary.b' +check_contains 'sum(id): 28' +check_contains 'sum(k): 32' + +# Verify the log contains the expected messages at the last few lines +tail -10 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail" +grep -Fq '[error] Totally **2** tables failed to be imported.' "$TEST_DIR/lightning-error-summary.tail" +grep -Fq '[`error_summary`.`a`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail" +grep -Fq '[`error_summary`.`c`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail" +! grep -Fq '[`error_summary`.`b`] [checksum] checksum mismatched' "$TEST_DIR/lightning-error-summary.tail"