From 1c1007b51bffdb3ffd291aab871332e6771a753c Mon Sep 17 00:00:00 2001 From: lance6716 Date: Mon, 2 Nov 2020 13:38:39 +0800 Subject: [PATCH 1/4] save my work add notes save work save work fix unit test remove tidbMgr in RestoreController remove some comments remove some comments change logger in SQLWithRetry revert replace log.Logger to *zap.Logger dep: update uuid dependency to latest google/uuid (#452) * dep: update satori/go.uuid to latest * fix tests * change to google/uuid * fix build * try fix test * get familiar with google/uuid * address comment tidb-lightning-ctl: change default of -d to 'noop://' (#453) also add noop:// to supported storage types (to represent an empty store) replace tab to space try another port to fix CI remove some comment *: more glue restore: fix the bug that gc life time ttl does not take effect (#448) * fix gc ttl loop * resolve comment and add tests fix CI report info to host TiDB config: filter out all system schemas by default (#459) backend: fix auto random default value for primary key (#457) * fix auto generate auto random primary key column * fix default for auto random primary key * fix test * use prev row id for auto random and add a test * replace chunck with session opt * fix * fix mydumper: fix parquet data parser (#435) * fix parquet * reorder imports * fix test * use empty collation * fix a error and add more test cases * add pointer type tests * resolve comments Co-authored-by: kennytm address comment backend/local: use range properties to optimize region range estimate (#422) * use range propreties to estimate region range * post-restore: add optional level for post-restore operations (#421) * add optional level for opst-restore operations * trim leading and suffix '" * use UnmarshalTOML to unmarshal post restore op level * resolve comments and fix unit test * backend/local: do not retry epochNotMatch error when ingest sst (#419) * do not retry epochNotMatch error when ingest sst * add retry ingest for 'Raft raft: proposal dropped' error in ingest * change some retryable error log level from Error to Warn * fix nextKey * add a comment for nextKey * fix comment and add a unit test * wrap time.Sleep in select Co-authored-by: kennytm * update * use range properties to optimze region range estimate * update pebble * change the default value for batch-size * add unit tests and reslove comments * add a comment to range properties test * add a comment * add a test for range property with pebble * rename const variable Co-authored-by: kennytm fix pd service id is empty (#460) fix s3 parquet reader (#461) Co-authored-by: Neil Shen fix service gc ttl again (#465) address comment mydumper: verify file routing config (#470) * fix file routing * remove useless line * remove redundant if check rename a method in interface save work try fix CI could work change ctx usage try fix CI try fix CI refine function interface refine some fucntion interface debug CI address comment config: allow four byte-size config to be specified using human-readable units ("100 GiB") (#471) * Makefile: add `make finish-prepare` action * config: accept human-readable size for most byte-related config e.g. allow `region-split-size = '96M'` in additional to `= 100663296` (known issue: these values' precisions will be truncated to 53 bits instead of supporting all 63 bits) * restore: reduce chance of spurious errors from TestGcTTLManagerSingle Co-authored-by: glorv remove debug log test: change double type syntax (#474) address comment checkpoint: add glue checkpoint resolve cycle import expose Retry refine change interface to cope with TiDB fix SQL string fix SQL adjust interface to embedded in TiDB could import now reduce TLS restore: add `glue.Glue` interface and other function (#456) * save my work * add notes * save work * save work * fix unit test * remove tidbMgr in RestoreController * remove some comments * remove some comments * change logger in SQLWithRetry * revert replace log.Logger to *zap.Logger * replace tab to space * try another port to fix CI * remove some comment * *: more glue * report info to host TiDB * fix CI * address comment * address comment * rename a method in interface * save work * try fix CI * could work * change ctx usage * try fix CI * try fix CI * refine function interface * refine some fucntion interface * debug CI * address comment * remove debug log * address comment modify code add comment refine some code --- Makefile | 8 + cmd/tidb-lightning-ctl/main.go | 20 +- cmd/tidb-lightning/main.go | 28 +- go.mod1 | 10 +- go.sum1 | 30 +- lightning/backend/backend.go | 6 +- lightning/backend/backend_test.go | 10 +- lightning/backend/importer.go | 29 +- lightning/backend/importer_test.go | 5 +- lightning/backend/local.go | 423 +++++----- lightning/backend/local_test.go | 201 +++++ lightning/backend/localhelper.go | 10 +- lightning/backend/session.go | 2 + lightning/backend/sql2kv.go | 52 +- lightning/backend/sql2kv_test.go | 46 ++ lightning/backend/tidb.go | 2 +- lightning/checkpoints/checkpoints.go | 279 ++++--- lightning/checkpoints/glue_checkpoint.go | 727 ++++++++++++++++++ lightning/common/util.go | 32 + lightning/common/util_test.go | 13 + lightning/config/bytesize.go | 44 ++ lightning/config/bytesize_test.go | 129 ++++ lightning/config/config.go | 62 +- lightning/config/config_test.go | 37 + lightning/config/const.go | 20 +- lightning/config/global.go | 2 +- lightning/glue/glue.go | 115 +++ lightning/lightning.go | 71 +- lightning/lightning_test.go | 39 +- lightning/log/log.go | 5 + lightning/mydump/csv_parser_test.go | 42 +- lightning/mydump/parquet_parser.go | 93 ++- lightning/mydump/parquet_parser_test.go | 129 +++- lightning/mydump/parser_test.go | 12 +- lightning/mydump/region.go | 10 +- lightning/mydump/region_test.go | 2 +- lightning/mydump/router.go | 3 + lightning/mydump/router_test.go | 31 + lightning/restore/checksum.go | 35 +- lightning/restore/checksum_test.go | 183 ++++- lightning/restore/restore.go | 122 ++- lightning/restore/restore_test.go | 16 +- lightning/restore/tidb.go | 93 +-- lightning/restore/tidb_test.go | 27 +- mock/backend.go | 2 +- tests/alter_random/run.sh | 3 + tests/auto_random_default/config.toml | 2 + .../data/auto_random-schema-create.sql | 1 + .../data/auto_random.t-schema.sql | 5 + .../data/auto_random.t.0.sql | 5 + .../data/auto_random.t.1.sql | 5 + tests/auto_random_default/run.sh | 58 ++ tests/parquet/run.sh | 4 + tests/s3/run.sh | 2 +- tests/various_types/data/vt.double-schema.sql | 2 +- tidb-lightning.toml | 10 +- tools/go.sum | 1 + 57 files changed, 2725 insertions(+), 630 deletions(-) create mode 100644 lightning/checkpoints/glue_checkpoint.go create mode 100644 lightning/config/bytesize.go create mode 100644 lightning/config/bytesize_test.go create mode 100644 lightning/glue/glue.go create mode 100644 tests/auto_random_default/config.toml create mode 100644 tests/auto_random_default/data/auto_random-schema-create.sql create mode 100644 tests/auto_random_default/data/auto_random.t-schema.sql create mode 100644 tests/auto_random_default/data/auto_random.t.0.sql create mode 100644 tests/auto_random_default/data/auto_random.t.1.sql create mode 100644 tests/auto_random_default/run.sh diff --git a/Makefile b/Makefile index b88c0a4ec..577ded8c4 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,9 @@ default: clean lightning lightning-ctl checksuccess prepare: $(PREPARE_MOD) +finish-prepare: + $(FINISH_MOD) + clean: rm -f $(LIGHTNING_BIN) $(LIGHTNING_CTRL_BIN) $(FAILPOINT_CTL_BIN) $(REVIVE_BIN) $(VFSGENDEV_BIN) go.mod go.sum @@ -132,6 +135,11 @@ update: GO111MODULE=on go mod tidy $(FINISH_MOD) +manual_update: + GO111MODULE=on go mod verify + GO111MODULE=on go mod tidy + $(FINISH_MOD) + $(FAILPOINT_CTL_BIN): cd tools && $(GOBUILD) -o ../$(FAILPOINT_CTL_BIN) github.com/pingcap/failpoint/failpoint-ctl diff --git a/cmd/tidb-lightning-ctl/main.go b/cmd/tidb-lightning-ctl/main.go index ca2fb2168..c3a91bcf2 100644 --- a/cmd/tidb-lightning-ctl/main.go +++ b/cmd/tidb-lightning-ctl/main.go @@ -22,11 +22,12 @@ import ( "strconv" "strings" + "github.com/google/uuid" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/import_sstpb" - uuid "github.com/satori/go.uuid" kv "github.com/pingcap/tidb-lightning/lightning/backend" + "github.com/pingcap/tidb-lightning/lightning/checkpoints" "github.com/pingcap/tidb-lightning/lightning/common" "github.com/pingcap/tidb-lightning/lightning/config" "github.com/pingcap/tidb-lightning/lightning/restore" @@ -49,6 +50,13 @@ func run() error { ) globalCfg := config.Must(config.LoadGlobalConfig(os.Args[1:], func(fs *flag.FlagSet) { + // change the default of `-d` from empty to 'noop://'. + // there is a check if `-d` points to a valid storage, and '' is not. + // since tidb-lightning-ctl does not need `-d` we change the default to a valid but harmless value. + dFlag := fs.Lookup("d") + dFlag.Value.Set("noop://") + dFlag.DefValue = "noop://" + compact = fs.Bool("compact", false, "do manual compaction on the target cluster") mode = fs.String("switch-mode", "", "switch tikv into import mode or normal mode, values can be ['import', 'normal']") flagFetchMode = fs.Bool("fetch-mode", false, "obtain the current mode of every tikv in the cluster") @@ -165,7 +173,7 @@ func fetchMode(ctx context.Context, cfg *config.Config, tls *common.TLS) error { } func checkpointRemove(ctx context.Context, cfg *config.Config, tableName string) error { - cpdb, err := restore.OpenCheckpointsDB(ctx, cfg) + cpdb, err := checkpoints.OpenCheckpointsDB(ctx, cfg) if err != nil { return errors.Trace(err) } @@ -175,7 +183,7 @@ func checkpointRemove(ctx context.Context, cfg *config.Config, tableName string) } func checkpointErrorIgnore(ctx context.Context, cfg *config.Config, tableName string) error { - cpdb, err := restore.OpenCheckpointsDB(ctx, cfg) + cpdb, err := checkpoints.OpenCheckpointsDB(ctx, cfg) if err != nil { return errors.Trace(err) } @@ -185,7 +193,7 @@ func checkpointErrorIgnore(ctx context.Context, cfg *config.Config, tableName st } func checkpointErrorDestroy(ctx context.Context, cfg *config.Config, tls *common.TLS, tableName string) error { - cpdb, err := restore.OpenCheckpointsDB(ctx, cfg) + cpdb, err := checkpoints.OpenCheckpointsDB(ctx, cfg) if err != nil { return errors.Trace(err) } @@ -257,7 +265,7 @@ func checkpointErrorDestroy(ctx context.Context, cfg *config.Config, tls *common } func checkpointDump(ctx context.Context, cfg *config.Config, dumpFolder string) error { - cpdb, err := restore.OpenCheckpointsDB(ctx, cfg) + cpdb, err := checkpoints.OpenCheckpointsDB(ctx, cfg) if err != nil { return errors.Trace(err) } @@ -311,7 +319,7 @@ func unsafeCloseEngine(ctx context.Context, importer kv.Backend, engine string) return ce, errors.Trace(err) } - engineUUID, err := uuid.FromString(engine) + engineUUID, err := uuid.Parse(engine) if err != nil { return nil, errors.Trace(err) } diff --git a/cmd/tidb-lightning/main.go b/cmd/tidb-lightning/main.go index 15b03bb56..990248131 100644 --- a/cmd/tidb-lightning/main.go +++ b/cmd/tidb-lightning/main.go @@ -14,6 +14,7 @@ package main import ( + "context" "fmt" "os" "os/signal" @@ -28,10 +29,10 @@ import ( ) func main() { - cfg := config.Must(config.LoadGlobalConfig(os.Args[1:], nil)) - fmt.Fprintf(os.Stdout, "Verbose debug logs will be written to %s\n\n", cfg.App.Config.File) + globalCfg := config.Must(config.LoadGlobalConfig(os.Args[1:], nil)) + fmt.Fprintf(os.Stdout, "Verbose debug logs will be written to %s\n\n", globalCfg.App.Config.File) - app := lightning.New(cfg) + app := lightning.New(globalCfg) sc := make(chan os.Signal, 1) signal.Notify(sc, @@ -59,7 +60,7 @@ func main() { // // Local mode need much more memory than importer/tidb mode, if the gc percentage is too high, // lightning memory usage will also be high. - if cfg.TikvImporter.Backend != config.BackendLocal { + if globalCfg.TikvImporter.Backend != config.BackendLocal { gogc := os.Getenv("GOGC") if gogc == "" { old := debug.SetGCPercent(500) @@ -74,11 +75,18 @@ func main() { return } - if cfg.App.ServerMode { - err = app.RunServer() - } else { - err = app.RunOnce() - } + err = func() error { + if globalCfg.App.ServerMode { + return app.RunServer() + } else { + cfg := config.NewConfig() + if err := cfg.LoadFromGlobal(globalCfg); err != nil { + return err + } + return app.RunOnce(context.Background(), cfg, nil, nil) + } + }() + if err != nil { logger.Error("tidb lightning encountered error stack info", zap.Error(err)) logger.Error("tidb lightning encountered error", log.ShortError(err)) @@ -89,7 +97,7 @@ func main() { } // call Sync() with log to stdout may return error in some case, so just skip it - if cfg.App.File != "" { + if globalCfg.App.File != "" { syncErr := logger.Sync() if syncErr != nil { fmt.Fprintln(os.Stderr, "sync log failed", syncErr) diff --git a/go.mod1 b/go.mod1 index 95d1e8557..ec76eca90 100644 --- a/go.mod1 +++ b/go.mod1 @@ -7,14 +7,17 @@ require ( github.com/DATA-DOG/go-sqlmock v1.4.1 github.com/aws/aws-sdk-go v1.35.2 // indirect github.com/carlmjohnson/flagext v0.0.11 - github.com/cockroachdb/pebble v0.0.0-20200617141519-3b241b76ed3b + github.com/cockroachdb/pebble v0.0.0-20201023120638-f1224da22976 github.com/coreos/go-semver v0.3.0 github.com/dgraph-io/ristretto v0.0.2-0.20200115201040-8f368f2f2ab3 // indirect + github.com/docker/go-units v0.4.0 github.com/fsouza/fake-gcs-server v1.19.0 // indirect github.com/go-sql-driver/mysql v1.5.0 github.com/gogo/protobuf v1.3.1 github.com/golang/mock v1.4.4 + github.com/google/btree v1.0.0 github.com/google/go-cmp v0.5.0 // indirect + github.com/google/uuid v1.1.1 github.com/joho/sqltocsv v0.0.0-20190824231449-5650f27fd5b6 github.com/juju/loggo v0.0.0-20180524022052-584905176618 // indirect github.com/onsi/ginkgo v1.13.0 // indirect @@ -29,16 +32,15 @@ require ( github.com/pingcap/tidb-tools v4.0.5-0.20200820092506-34ea90c93237+incompatible github.com/prometheus/client_golang v1.5.1 github.com/prometheus/client_model v0.2.0 - github.com/satori/go.uuid v1.2.0 github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/tikv/pd v1.1.0-beta.0.20200910042021-254d1345be09 - github.com/xitongsys/parquet-go v1.5.4-0.20201010004835-f51647f24120 + github.com/xitongsys/parquet-go v1.5.5-0.20201110004701-b09c49d6d457 github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 go.etcd.io/etcd v0.5.0-alpha.5.0.20200824191128-ae9734ed278b // indirect go.uber.org/zap v1.16.0 golang.org/x/net v0.0.0-20200904194848-62affa334b73 - golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 + golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f // indirect golang.org/x/text v0.3.3 golang.org/x/tools v0.0.0-20200904185747-39188db58858 // indirect diff --git a/go.sum1 b/go.sum1 index ece649fb7..3da598672 100644 --- a/go.sum1 +++ b/go.sum1 @@ -51,8 +51,10 @@ github.com/VividCortex/ewma v1.1.1 h1:MnEK4VOv6n0RSY4vtRe3h11qjxL3+t0B8yOL8iMXdc github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= github.com/VividCortex/mysqlerr v0.0.0-20200629151747-c28746d985dd/go.mod h1:f3HiCrHjHBdcm6E83vGaXh1KomZMA2P6aeo3hKx/wg0= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4 h1:Hs82Z41s6SdL1CELW+XaDYmOH4hkBN4/N9og/AsOv7E= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q= github.com/apache/thrift v0.0.0-20181112125854-24918abba929 h1:ubPe2yRkS6A/X37s0TVGfuN42NV2h0BlzWj0X76RoUw= @@ -101,8 +103,10 @@ github.com/cockroachdb/errors v1.2.4 h1:Lap807SXTH5tri2TivECb/4abUkMZC9zRoLarvcK github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= -github.com/cockroachdb/pebble v0.0.0-20200617141519-3b241b76ed3b h1:YHjo2xnqFCeFa0CdxEccHfUY1/DnXPAZdZt0+s/Mvdg= -github.com/cockroachdb/pebble v0.0.0-20200617141519-3b241b76ed3b/go.mod h1:crLnbSFbwAcQNs9FPfI1avHb5BqVgqZcr4r+IzpJ5FM= +github.com/cockroachdb/pebble v0.0.0-20201023120638-f1224da22976 h1:gGjhleKSWZCZFrhQSesjg8spRD+/p8vjwdNEGUv8Ovg= +github.com/cockroachdb/pebble v0.0.0-20201023120638-f1224da22976/go.mod h1:BbtTitvfmE0eZNcncJgJw5BlQhskTzgZgoISnY+8s6k= +github.com/cockroachdb/redact v0.0.0-20200622112456-cd282804bbd3 h1:2+dpIJzYMSbLi0587YXpi8tOJT52qCOI/1I0UNThc/I= +github.com/cockroachdb/redact v0.0.0-20200622112456-cd282804bbd3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd h1:qMd81Ts1T2OTKmB4acZcyKaMtRnY5Y44NuXGX2GFJ1w= github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= @@ -122,6 +126,7 @@ github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7 github.com/coreos/go-systemd v0.0.0-20181031085051-9002847aa142/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e h1:Wf6HqHfScWJN9/ZjdUKyjop4mf3Qdd+1TvvltAvM3m8= github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f h1:JOrtw2xFKzlg+cbHpyrpLDmnN1HqhBfnX7WDiW7eG2c= github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg= @@ -443,6 +448,7 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb github.com/montanaflynn/stats v0.0.0-20151014174947-eeaced052adb h1:bsjNADsjHq0gjU7KO7zwoX5k3HtFdf6TDzB3ncl5iUs= github.com/montanaflynn/stats v0.0.0-20151014174947-eeaced052adb/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/montanaflynn/stats v0.0.0-20180911141734-db72e6cae808/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= +github.com/montanaflynn/stats v0.5.0 h1:2EkzeTSqBB4V4bJwWrt5gIIrZmpJBcoIRGS2kWLgzmk= github.com/montanaflynn/stats v0.5.0/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/ncw/directio v1.0.4 h1:CojwI07mCEmRkajgx42Pf8jyCwTs1ji9/Ij9/PJG12k= @@ -548,6 +554,7 @@ github.com/pingcap/failpoint v0.0.0-20200506114213-c17f16071c53 h1:8sC8OLinmaw24 github.com/pingcap/failpoint v0.0.0-20200506114213-c17f16071c53/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/failpoint v0.0.0-20200603062251-b230c36c413c h1:cm0zAj+Tab94mp4OH+VoLJiSNQvZO4pWDGJ8KEk2a0c= github.com/pingcap/failpoint v0.0.0-20200603062251-b230c36c413c/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= +github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMtVcOkjUcuQKh+YrluSo7+7YMCQSzy30= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/fn v0.0.0-20191016082858-07623b84a47d h1:rCmRK0lCRrHMUbS99BKFYhK9YxJDNw0xB033cQbYo0s= github.com/pingcap/fn v0.0.0-20191016082858-07623b84a47d/go.mod h1:fMRU1BA1y+r89AxUoaAar4JjrhUkVDt0o0Np6V8XbDQ= @@ -732,6 +739,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= github.com/swaggo/gin-swagger v1.2.0/go.mod h1:qlH2+W7zXGZkczuL+r2nEBR2JTT+/lX05Nn6vPhc7OI= github.com/swaggo/http-swagger v0.0.0-20200103000832-0e9263c4b516/go.mod h1:O1lAbCgAAX/KZ80LM/OXwtWFI/5TvZlwxSg8Cq08PV0= @@ -743,6 +752,7 @@ github.com/swaggo/swag v1.6.6-0.20200323071853-8e21f4cefeea/go.mod h1:xDhTyuFIuj github.com/swaggo/swag v1.6.6-0.20200529100950-7c765ddd0476/go.mod h1:xDhTyuFIujYiN3DKWC/H/83xcfHp+UE/IzWWampG7Zc= github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d h1:4J9HCZVpvDmj2tiKGSTUnb3Ok/9CEQb9oqu9LHKQQpc= github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0= +github.com/syndtr/goleveldb v1.0.1-0.20190625010220-02440ea7a285 h1:uSDYjYejelKyceA6DiCsngFof9jAyeaSyX9XC5a1a7Q= github.com/syndtr/goleveldb v1.0.1-0.20190625010220-02440ea7a285/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 h1:mbAskLJ0oJfDRtkanvQPiooDH8HvJ2FBh+iKT/OmiQQ= github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2/go.mod h1:2PfKggNGDuadAa0LElHrByyrz4JPZ9fFx6Gs7nx7ZZU= @@ -786,8 +796,8 @@ github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgq github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xitongsys/parquet-go v1.5.1/go.mod h1:xUxwM8ELydxh4edHGegYq1pA8NnMKDx0K/GyB0o2bww= -github.com/xitongsys/parquet-go v1.5.4-0.20201010004835-f51647f24120 h1:Vq8/hS6jg6KTWQlEaHvnLZMRKkK6amkAtBMjB7UUrBE= -github.com/xitongsys/parquet-go v1.5.4-0.20201010004835-f51647f24120/go.mod h1:pheqtXeHQFzxJk45lRQ0UIGIivKnLXvialZSFWs81A8= +github.com/xitongsys/parquet-go v1.5.5-0.20201110004701-b09c49d6d457 h1:tBbuFCtyJNKT+BFAv6qjvTFpVdy97IYNaBwGUXifIUs= +github.com/xitongsys/parquet-go v1.5.5-0.20201110004701-b09c49d6d457/go.mod h1:pheqtXeHQFzxJk45lRQ0UIGIivKnLXvialZSFWs81A8= github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5 h1:XmN4NA9133N6OvDEAR6TVVhFq5NgetYTyeKl1EMNazs= github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod h1:xxCx7Wpym/3QCo6JhujJX51dzSXrwmb0oH6FQb39SEA= github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0 h1:a742S4V5A15F93smuVxA60LQWsrCnN8bKeWDBARU1/k= @@ -805,6 +815,7 @@ github.com/zhangjinpeng1987/raft v0.0.0-20200819064223-df31bb68a018/go.mod h1:rT go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 h1:lWF4f9Nypl1ZqSb4gLeh/DGvBYVaUYHuiB93teOmwgc= @@ -895,6 +906,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180406214816-61147c48b25b/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -948,7 +960,10 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 h1:qwRHBd0NqMbJxfbotnDhm2ByMI1Shq4Y6oRJo21SGJA= golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1057,6 +1072,7 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= @@ -1097,6 +1113,7 @@ google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150 h1:VPpdpQkGvFicX9y google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63 h1:YzfoEYWbODU5Fbt37+h7X16BWQbad7Q4S6gclTKFXM8= google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= @@ -1110,6 +1127,7 @@ google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQ google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1 h1:zvIju4sqAGvwKspUQOhwnpcqSbzi7/H6QomNNjTL4sk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -1119,6 +1137,7 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= gopkg.in/alecthomas/gometalinter.v2 v2.0.12/go.mod h1:NDRytsqEZyolNuAgTzJkZMkSQM7FIKyzVzGhjB/qfYo= +gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c/go.mod h1:3HH7i1SgMqlzxCcBmUHW657sD4Kvv9sC3HpL3YukzwA= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1160,6 +1179,8 @@ gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= @@ -1168,6 +1189,7 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4 h1:UoveltGrhghAA7ePc+e+QYDHXrBps2PqFZiHkGR/xK8= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +honnef.co/go/tools v0.0.1-2020.1.5 h1:nI5egYTGJakVyOryqLs1cQO5dO0ksin5XXs2pspk75k= honnef.co/go/tools v0.0.1-2020.1.5/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= modernc.org/mathutil v1.0.0 h1:93vKjrJopTPrtTNpZ8XIovER7iCIH1QU7wNbOQXC60I= diff --git a/lightning/backend/backend.go b/lightning/backend/backend.go index 29d7a791d..643a1f688 100644 --- a/lightning/backend/backend.go +++ b/lightning/backend/backend.go @@ -18,13 +18,13 @@ import ( "fmt" "time" + "github.com/google/uuid" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/store/tikv/oracle" "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/types" - uuid "github.com/satori/go.uuid" "go.uber.org/zap" "github.com/pingcap/tidb-lightning/lightning/common" @@ -76,11 +76,11 @@ func makeLogger(tag string, engineUUID uuid.UUID) log.Logger { func MakeUUID(tableName string, engineID int32) (string, uuid.UUID) { tag := makeTag(tableName, engineID) - engineUUID := uuid.NewV5(engineNamespace, tag) + engineUUID := uuid.NewSHA1(engineNamespace, []byte(tag)) return tag, engineUUID } -var engineNamespace = uuid.Must(uuid.FromString("d68d6abe-c59e-45d6-ade8-e2b0ceb7bedf")) +var engineNamespace = uuid.MustParse("d68d6abe-c59e-45d6-ade8-e2b0ceb7bedf") // AbstractBackend is the abstract interface behind Backend. // Implementations of this interface must be goroutine safe: you can share an diff --git a/lightning/backend/backend_test.go b/lightning/backend/backend_test.go index e06c23b30..de718158f 100644 --- a/lightning/backend/backend_test.go +++ b/lightning/backend/backend_test.go @@ -5,10 +5,10 @@ import ( "time" "github.com/golang/mock/gomock" + "github.com/google/uuid" . "github.com/pingcap/check" "github.com/pingcap/errors" "github.com/pingcap/parser/mysql" - uuid "github.com/satori/go.uuid" kv "github.com/pingcap/tidb-lightning/lightning/backend" "github.com/pingcap/tidb-lightning/mock" @@ -40,7 +40,7 @@ func (s *backendSuite) TestOpenCloseImportCleanUpEngine(c *C) { defer s.tearDownTest() ctx := context.Background() - engineUUID := uuid.FromStringOrNil("902efee3-a3f9-53d4-8c82-f12fb1900cd1") + engineUUID := uuid.MustParse("902efee3-a3f9-53d4-8c82-f12fb1900cd1") openCall := s.mockBackend.EXPECT(). OpenEngine(ctx, engineUUID). @@ -73,7 +73,7 @@ func (s *backendSuite) TestUnsafeCloseEngine(c *C) { defer s.tearDownTest() ctx := context.Background() - engineUUID := uuid.FromStringOrNil("7e3f3a3c-67ce-506d-af34-417ec138fbcb") + engineUUID := uuid.MustParse("7e3f3a3c-67ce-506d-af34-417ec138fbcb") closeCall := s.mockBackend.EXPECT(). CloseEngine(ctx, engineUUID). @@ -94,7 +94,7 @@ func (s *backendSuite) TestUnsafeCloseEngineWithUUID(c *C) { defer s.tearDownTest() ctx := context.Background() - engineUUID := uuid.FromStringOrNil("f1240229-79e0-4d8d-bda0-a211bf493796") + engineUUID := uuid.MustParse("f1240229-79e0-4d8d-bda0-a211bf493796") closeCall := s.mockBackend.EXPECT(). CloseEngine(ctx, engineUUID). @@ -115,7 +115,7 @@ func (s *backendSuite) TestWriteEngine(c *C) { defer s.tearDownTest() ctx := context.Background() - engineUUID := uuid.FromStringOrNil("902efee3-a3f9-53d4-8c82-f12fb1900cd1") + engineUUID := uuid.MustParse("902efee3-a3f9-53d4-8c82-f12fb1900cd1") rows0 := mock.NewMockRows(s.controller) rows1 := mock.NewMockRows(s.controller) diff --git a/lightning/backend/importer.go b/lightning/backend/importer.go index afa1260f8..0b9ec80d0 100644 --- a/lightning/backend/importer.go +++ b/lightning/backend/importer.go @@ -21,11 +21,12 @@ import ( "time" "github.com/coreos/go-semver/semver" + "github.com/google/uuid" "github.com/pingcap/errors" kv "github.com/pingcap/kvproto/pkg/import_kvpb" "github.com/pingcap/parser/model" + "github.com/pingcap/tidb-lightning/lightning/glue" "github.com/pingcap/tidb/table" - uuid "github.com/satori/go.uuid" "go.uber.org/zap" "google.golang.org/grpc" @@ -119,7 +120,7 @@ func isIgnorableOpenCloseEngineError(err error) bool { func (importer *importer) OpenEngine(ctx context.Context, engineUUID uuid.UUID) error { req := &kv.OpenEngineRequest{ - Uuid: engineUUID.Bytes(), + Uuid: engineUUID[:], } _, err := importer.cli.OpenEngine(ctx, req) @@ -131,7 +132,7 @@ func (importer *importer) OpenEngine(ctx context.Context, engineUUID uuid.UUID) func (importer *importer) CloseEngine(ctx context.Context, engineUUID uuid.UUID) error { req := &kv.CloseEngineRequest{ - Uuid: engineUUID.Bytes(), + Uuid: engineUUID[:], } _, err := importer.cli.CloseEngine(ctx, req) @@ -143,7 +144,7 @@ func (importer *importer) CloseEngine(ctx context.Context, engineUUID uuid.UUID) func (importer *importer) ImportEngine(ctx context.Context, engineUUID uuid.UUID) error { req := &kv.ImportEngineRequest{ - Uuid: engineUUID.Bytes(), + Uuid: engineUUID[:], PdAddr: importer.pdAddr, } @@ -153,7 +154,7 @@ func (importer *importer) ImportEngine(ctx context.Context, engineUUID uuid.UUID func (importer *importer) CleanupEngine(ctx context.Context, engineUUID uuid.UUID) error { req := &kv.CleanupEngineRequest{ - Uuid: engineUUID.Bytes(), + Uuid: engineUUID[:], } _, err := importer.cli.CleanupEngine(ctx, req) @@ -195,7 +196,7 @@ func (importer *importer) WriteRows( req := &kv.WriteEngineRequest{ Chunk: &kv.WriteEngineRequest_Head{ Head: &kv.WriteHead{ - Uuid: engineUUID.Bytes(), + Uuid: engineUUID[:], }, }, } @@ -267,6 +268,22 @@ func checkTiDBVersion(tls *common.TLS, requiredVersion semver.Version) error { return checkVersion("TiDB", requiredVersion, *version) } +func checkTiDBVersionBySQL(g glue.Glue, requiredVersion semver.Version) error { + versionStr, err := g.GetSQLExecutor().ObtainStringWithLog( + context.Background(), + "SELECT version();", + "check TiDB version", + log.L()) + if err != nil { + return errors.Trace(err) + } + version, err := common.ExtractTiDBVersion(versionStr) + if err != nil { + return errors.Trace(err) + } + return checkVersion("TiDB", requiredVersion, *version) +} + func checkPDVersion(tls *common.TLS, pdAddr string, requiredVersion semver.Version) error { version, err := common.FetchPDVersion(tls, pdAddr) if err != nil { diff --git a/lightning/backend/importer_test.go b/lightning/backend/importer_test.go index 00b9b6193..22702c76d 100644 --- a/lightning/backend/importer_test.go +++ b/lightning/backend/importer_test.go @@ -6,10 +6,10 @@ import ( "testing" "github.com/golang/mock/gomock" + "github.com/google/uuid" . "github.com/pingcap/check" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/import_kvpb" - uuid "github.com/satori/go.uuid" kvpb "github.com/pingcap/kvproto/pkg/import_kvpb" @@ -42,7 +42,8 @@ func (s *importerSuite) setUpTest(c *C) { importer := kv.NewMockImporter(s.mockClient, testPDAddr) s.ctx = context.Background() - s.engineUUID = uuid.FromStringOrNil("7e3f3a3c-67ce-506d-af34-417ec138fbcb").Bytes() + engineUUID := uuid.MustParse("7e3f3a3c-67ce-506d-af34-417ec138fbcb") + s.engineUUID = engineUUID[:] s.kvPairs = kv.MakeRowsFromKvPairs([]common.KvPair{ { Key: []byte("k1"), diff --git a/lightning/backend/local.go b/lightning/backend/local.go index 76e179001..c162c37e4 100644 --- a/lightning/backend/local.go +++ b/lightning/backend/local.go @@ -18,6 +18,7 @@ import ( "context" "encoding/binary" "encoding/json" + "io" "io/ioutil" "math" "os" @@ -30,6 +31,8 @@ import ( "github.com/cockroachdb/pebble" "github.com/coreos/go-semver/semver" + "github.com/google/btree" + "github.com/google/uuid" split "github.com/pingcap/br/pkg/restore" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -38,10 +41,10 @@ import ( "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/parser/model" + "github.com/pingcap/tidb-lightning/lightning/glue" "github.com/pingcap/tidb/table" - "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/util/codec" - uuid "github.com/satori/go.uuid" + "github.com/pingcap/tidb/util/hack" pd "github.com/tikv/pd/client" "go.uber.org/zap" "google.golang.org/grpc" @@ -68,6 +71,11 @@ const ( // See: https://github.com/tikv/tikv/blob/e030a0aae9622f3774df89c62f21b2171a72a69e/etc/config-template.toml#L360 regionMaxKeyCount = 1_440_000 + + propRangeIndex = "tikv.range_index" + + defaultPropSizeIndexDistance = 4 * 1024 * 1024 // 4MB + defaultPropKeysIndexDistance = 40 * 1024 ) var ( @@ -113,6 +121,33 @@ func (e *LocalFile) Cleanup(dataDir string) error { return os.RemoveAll(dbPath) } +func (e *LocalFile) getSizeProperties() (*sizeProperties, error) { + sstables, err := e.db.SSTables(pebble.WithProperties()) + if err != nil { + log.L().Warn("get table properties failed", zap.Stringer("engine", e.Uuid), log.ShortError(err)) + return nil, errors.Trace(err) + } + + sizeProps := newSizeProperties() + for _, level := range sstables { + for _, info := range level { + if prop, ok := info.Properties.UserProperties[propRangeIndex]; ok { + data := hack.Slice(prop) + rangeProps, err := decodeRangeProperties(data) + if err != nil { + log.L().Warn("decodeRangeProperties failed", zap.Stringer("engine", e.Uuid), + zap.Stringer("fileNum", info.FileNum), log.ShortError(err)) + return nil, errors.Trace(err) + } + + sizeProps.addAll(rangeProps) + } + } + } + + return sizeProps, nil +} + type gRPCConns struct { mu sync.Mutex conns map[uint64]*connPool @@ -133,6 +168,7 @@ type local struct { splitCli split.SplitClient tls *common.TLS pdAddr string + g glue.Glue localStoreDir string regionSplitSize int64 @@ -214,6 +250,7 @@ func NewLocalBackend( rangeConcurrency int, sendKVPairs int, enableCheckpoint bool, + g glue.Glue, ) (Backend, error) { pdCli, err := pd.NewClient([]string{pdAddr}, tls.ToPDSecurityOption()) if err != nil { @@ -244,6 +281,7 @@ func NewLocalBackend( splitCli: splitCli, tls: tls, pdAddr: pdAddr, + g: g, localStoreDir: localFile, regionSplitSize: regionSplitSize, @@ -351,12 +389,16 @@ func (local *local) openEngineDB(engineUUID uuid.UUID, readOnly bool) (*pebble.D opt := &pebble.Options{ MemTableSize: LocalMemoryTableSize, MaxConcurrentCompactions: 16, - MinCompactionRate: 1 << 30, L0CompactionThreshold: math.MaxInt32, // set to max try to disable compaction L0StopWritesThreshold: math.MaxInt32, // set to max try to disable compaction MaxOpenFiles: 10000, DisableWAL: true, ReadOnly: readOnly, + TablePropertyCollectors: []func() pebble.TablePropertyCollector{ + func() pebble.TablePropertyCollector { + return newRangePropertiesCollector() + }, + }, } dbPath := filepath.Join(local.localStoreDir, engineUUID.String()) return pebble.Open(dbPath, opt) @@ -482,8 +524,9 @@ func (local *local) WriteToTiKV( iter.Last() lastKey := codec.EncodeBytes([]byte{}, iter.Key()) + u := uuid.New() meta := &sst.SSTMeta{ - Uuid: uuid.NewV4().Bytes(), + Uuid: u[:], RegionId: region.Region.GetId(), RegionEpoch: region.Region.GetRegionEpoch(), Range: &sst.Range{ @@ -549,7 +592,7 @@ func (local *local) WriteToTiKV( count++ totalCount++ - if count >= local.batchWriteKVPairs || size >= regionMaxSize || totalCount >= regionMaxKeyCount { + if count >= local.batchWriteKVPairs { for i := range clients { requests[i].Chunk.(*sst.WriteRequest_Batch).Batch.Pairs = pairs[:count] if err := clients[i].Send(requests[i]); err != nil { @@ -645,11 +688,32 @@ func (local *local) Ingest(ctx context.Context, meta *sst.SSTMeta, region *split return resp, nil } -func (local *local) readAndSplitIntoRange(engineFile *LocalFile, engineUUID uuid.UUID) ([]Range, error) { - if engineFile.Length == 0 { - return nil, nil +func splitRangeBySizeProps(fullRange Range, sizeProps *sizeProperties, sizeLimit int64, keysLimit int64) []Range { + ranges := make([]Range, 0, sizeProps.totalSize/uint64(sizeLimit)) + curSize := uint64(0) + curKeys := uint64(0) + curKey := fullRange.start + sizeProps.iter(func(p *rangeProperty) bool { + curSize += p.Size + curKeys += p.Keys + if int64(curSize) >= sizeLimit || int64(curKeys) >= keysLimit { + ranges = append(ranges, Range{start: curKey, end: p.Key}) + curKey = p.Key + curSize = 0 + curKeys = 0 + } + return true + }) + + if curKeys > 0 { + ranges = append(ranges, Range{start: curKey, end: fullRange.end}) + } else { + ranges[len(ranges)-1].end = fullRange.end } + return ranges +} +func (local *local) readAndSplitIntoRange(engineFile *LocalFile) ([]Range, error) { iter := engineFile.db.NewIter(nil) defer iter.Close() @@ -667,77 +731,24 @@ func (local *local) readAndSplitIntoRange(engineFile *LocalFile, engineUUID uuid endKey := nextKey(lastKey) // <= 96MB no need to split into range - if engineFile.TotalSize <= local.regionSplitSize { + if engineFile.TotalSize <= local.regionSplitSize && engineFile.Length <= regionMaxKeyCount { ranges := []Range{{start: firstKey, end: endKey, length: int(engineFile.Length)}} return ranges, nil } - log.L().Info("doReadAndSplitIntoRange", zap.Binary("firstKey", firstKey), zap.Binary("lastKey", lastKey)) - - // split data into n * 4/3 ranges, then seek n times to get n + 1 ranges - // because we don't split very accurate, so wo try to split 1/4 more regions to avoid region to be too big - // estimiate regions size by the bigger of region size in bytes and kv count - splitTargetSize := (local.regionSplitSize*3 + 3) / 4 - n := (engineFile.TotalSize + splitTargetSize - 1) / splitTargetSize - numByKeyCount := engineFile.Length / (regionMaxKeyCount * 3 / 4) - if n < numByKeyCount { - n = numByKeyCount + sizeProps, err := engineFile.getSizeProperties() + if err != nil { + return nil, errors.Trace(err) } - ranges := make([]Range, 0, n+1) - appendRanges := func(ranges []Range, start []byte, ends [][]byte) []Range { - for _, e := range ends { - ranges = append(ranges, Range{start: start, end: e}) - start = e - } - return ranges - } - if tablecodec.IsIndexKey(firstKey) { - type tblIndexRange struct { - tblID int64 - indexID int64 - startKey []byte - endKey []byte - } - - // for partitioned table, there will be multiple physical tables and each physical table contains multiple indices - indexRanges := make([]*tblIndexRange, 0) - iter.First() - for iter.Valid() { - startKey := append([]byte{}, iter.Key()...) + ranges := splitRangeBySizeProps(Range{start: firstKey, end: endKey}, sizeProps, + local.regionSplitSize, regionMaxKeyCount*2/3) - tableID, indexID, _, err := tablecodec.DecodeKeyHead(startKey) - if err != nil { - return nil, err - } - - k := tablecodec.EncodeTableIndexPrefix(tableID, indexID+1) - iter.SeekLT(k) - - endKey := append([]byte{}, iter.Key()...) - indexRanges = append(indexRanges, &tblIndexRange{tableID, indexID, startKey, endKey}) - log.L().Debug("index key range", zap.Int64("tableID", tableID), zap.Int64("index", indexID), - zap.Binary("startKey", startKey), zap.Binary("endKey", endKey)) - - iter.Next() - } + log.L().Info("split engine key ranges", zap.Stringer("engine", engineFile.Uuid), + zap.Int64("totalSize", engineFile.TotalSize), zap.Int64("totalCount", engineFile.Length), + zap.Binary("firstKey", firstKey), zap.Binary("lastKey", lastKey), + zap.Int("ranges", len(ranges))) - indexRangeCount := (int(n) + len(indexRanges)) / len(indexRanges) - - log.L().Info("split table index kv to range", - zap.Int("total index count", len(indexRanges)), zap.Int64("ranges", n), - zap.Int("index range count", indexRangeCount)) - - for _, indexRange := range indexRanges { - values := engineFile.splitValuesToRange(indexRange.startKey, nextKey(indexRange.endKey), int64(indexRangeCount), len(indexRanges)) - ranges = appendRanges(ranges, indexRange.startKey, values) - } - } else { - // data engine, we split keys by sample keys instead of by handle - // because handles are also not distributed evenly - values := engineFile.splitValuesToRange(firstKey, endKey, n, 1) - ranges = appendRanges(ranges, firstKey, values) - } return ranges, nil } @@ -897,7 +908,7 @@ WriteAndIngest: if len(regions) == 1 { w := local.ingestConcurrency.Apply() - rg, err1 := local.WriteAndIngestPairs(ctx, engineFile, region, start, end) + rg, err1 := local.writeAndIngestPairs(ctx, engineFile, region, start, end) local.ingestConcurrency.Recycle(w) if err1 != nil { err = err1 @@ -910,7 +921,7 @@ WriteAndIngest: shouldWait = true go func(r *split.RegionInfo) { w := local.ingestConcurrency.Apply() - rg, err := local.WriteAndIngestPairs(ctx, engineFile, r, start, end) + rg, err := local.writeAndIngestPairs(ctx, engineFile, r, start, end) local.ingestConcurrency.Recycle(w) errChan <- err if err == nil && rg != nil { @@ -951,7 +962,7 @@ const ( retryIngest ) -func (local *local) WriteAndIngestPairs( +func (local *local) writeAndIngestPairs( ctx context.Context, engineFile *LocalFile, region *split.RegionInfo, @@ -1023,7 +1034,7 @@ loopWrite: return remainRange, errors.Trace(err) } -func (local *local) WriteAndIngestByRanges(ctx context.Context, engineFile *LocalFile, ranges []Range, remainRanges *syncdRanges) error { +func (local *local) writeAndIngestByRanges(ctx context.Context, engineFile *LocalFile, ranges []Range, remainRanges *syncdRanges) error { if engineFile.Length == 0 { // engine is empty, this is likes because it's a index engine but the table contains no index log.L().Warn("engine contains no data", zap.Stringer("uuid", engineFile.Uuid)) @@ -1093,14 +1104,24 @@ func (local *local) ImportEngine(ctx context.Context, engineUUID uuid.UUID) erro // skip if engine not exist. See the comment of `CloseEngine` for more detail. return nil } + + lf := engineFile.(*LocalFile) + if lf.TotalSize == 0 { + log.L().Info("engine contains no kv, skip import", zap.Stringer("engine", engineUUID)) + return nil + } + // split sorted file into range by 96MB size per file - ranges, err := local.readAndSplitIntoRange(engineFile.(*LocalFile), engineUUID) + ranges, err := local.readAndSplitIntoRange(lf) if err != nil { return err } remains := &syncdRanges{} for { + log.L().Info("start import engine", zap.Stringer("uuid", engineUUID), + zap.Int("ranges", len(ranges))) + // split region by given ranges for i := 0; i < maxRetryTimes; i++ { err = local.SplitAndScatterRegionByRanges(ctx, ranges) @@ -1109,12 +1130,12 @@ func (local *local) ImportEngine(ctx context.Context, engineUUID uuid.UUID) erro } } if err != nil { - log.L().Error("split & scatter ranges failed", log.ShortError(err)) + log.L().Error("split & scatter ranges failed", zap.Stringer("uuid", engineUUID), log.ShortError(err)) return err } // start to write to kv and ingest - err = local.WriteAndIngestByRanges(ctx, engineFile.(*LocalFile), ranges, remains) + err = local.writeAndIngestByRanges(ctx, engineFile.(*LocalFile), ranges, remains) if err != nil { log.L().Error("write and ingest engine failed", log.ShortError(err)) return err @@ -1153,7 +1174,7 @@ func (local *local) CleanupEngine(ctx context.Context, engineUUID uuid.UUID) err } func (local *local) CheckRequirements() error { - if err := checkTiDBVersion(local.tls, localMinTiDBVersion); err != nil { + if err := checkTiDBVersionBySQL(local.g, localMinTiDBVersion); err != nil { return err } if err := checkPDVersion(local.tls, local.pdAddr, localMinPDVersion); err != nil { @@ -1313,123 +1334,167 @@ func nextKey(key []byte) []byte { return res } -// splitValuesToRange try to cut [start, end) to count range approximately -// just like [start, v1), [v1, v2)... [vCount-1, end) -// return value []{v1, v2... vCount-1, End} -func (l *LocalFile) splitValuesToRange(start []byte, end []byte, count int64, sampleFactor int) [][]byte { - opt := &pebble.IterOptions{LowerBound: start, UpperBound: end} - iter := l.db.NewIter(opt) - defer iter.Close() +type rangeOffsets struct { + Size uint64 + Keys uint64 +} - iter.First() - start = append([]byte{}, iter.Key()...) - iter.Last() - end = nextKey(iter.Key()) +type rangeProperty struct { + Key []byte + rangeOffsets +} - startBytes := make([]byte, 8) - endBytes := make([]byte, 8) +func (r *rangeProperty) Less(than btree.Item) bool { + ta := than.(*rangeProperty) + return bytes.Compare(r.Key, ta.Key) < 0 +} - minLen := len(start) - if minLen > len(end) { - minLen = len(end) - } +var _ btree.Item = &rangeProperty{} - offset := 0 - for i := 0; i < minLen; i++ { - if start[i] != end[i] { - offset = i - break +type rangeProperties []rangeProperty + +func decodeRangeProperties(data []byte) (rangeProperties, error) { + r := make(rangeProperties, 0, 16) + for len(data) > 0 { + if len(data) < 4 { + return nil, io.ErrUnexpectedEOF + } + keyLen := int(binary.BigEndian.Uint32(data[:4])) + data = data[4:] + if len(data) < keyLen+8*2 { + return nil, io.ErrUnexpectedEOF } + key := data[:keyLen] + data = data[keyLen:] + size := binary.BigEndian.Uint64(data[:8]) + keys := binary.BigEndian.Uint64(data[8:]) + data = data[16:] + r = append(r, rangeProperty{Key: key, rangeOffsets: rangeOffsets{Size: size, Keys: keys}}) } - copy(startBytes, start[offset:]) - copy(endBytes, end[offset:]) - - sValue := binary.BigEndian.Uint64(startBytes) - eValue := binary.BigEndian.Uint64(endBytes) + return r, nil +} - naiveFn := func() [][]byte { - step := (eValue - sValue) / uint64(count) - if step == uint64(0) { - step = uint64(1) - } +func (r rangeProperties) Encode() []byte { + b := make([]byte, 0, 1024) + idx := 0 + for _, p := range r { + b = append(b, 0, 0, 0, 0) + binary.BigEndian.PutUint32(b[idx:], uint32(len(p.Key))) + idx += 4 + b = append(b, p.Key...) + idx += len(p.Key) + + b = append(b, 0, 0, 0, 0, 0, 0, 0, 0) + binary.BigEndian.PutUint64(b[idx:], p.Size) + idx += 8 + + b = append(b, 0, 0, 0, 0, 0, 0, 0, 0) + binary.BigEndian.PutUint64(b[idx:], p.Keys) + idx += 8 + } + return b +} - res := make([][]byte, 0, count) - var curBytes []byte - iter.First() - for cur := sValue + step; cur <= eValue-step; cur += step { - curBytes = make([]byte, offset+8) - copy(curBytes, start[:offset]) - binary.BigEndian.PutUint64(curBytes[offset:], cur) - // if range is empty, skip range - if bytes.Compare(curBytes, iter.Key()) < 0 { - continue - } - // move to next range - iter.SeekGE(curBytes) - res = append(res, curBytes) - } - res = append(res, end) +func (r rangeProperties) get(key []byte) rangeOffsets { + idx := sort.Search(len(r), func(i int) bool { + return bytes.Compare(r[i].Key, key) >= 0 + }) + return r[idx].rangeOffsets +} - log.L().Info("split value naively", zap.Int64("count", count), - zap.Int("ranges", len(res))) +type RangePropertiesCollector struct { + props rangeProperties + lastOffsets rangeOffsets + lastKey []byte + currentOffsets rangeOffsets + propSizeIdxDistance uint64 + propKeysIdxDistance uint64 +} - return res +func newRangePropertiesCollector() *RangePropertiesCollector { + return &RangePropertiesCollector{ + props: make([]rangeProperty, 0, 1024), + propSizeIdxDistance: defaultPropSizeIndexDistance, + propKeysIdxDistance: defaultPropKeysIndexDistance, } +} + +func (c *RangePropertiesCollector) sizeInLastRange() uint64 { + return c.currentOffsets.Size - c.lastOffsets.Size +} + +func (c *RangePropertiesCollector) keysInLastRange() uint64 { + return c.currentOffsets.Keys - c.lastOffsets.Keys +} + +func (c *RangePropertiesCollector) insertNewPoint(key []byte) { + c.lastOffsets = c.currentOffsets + c.props = append(c.props, rangeProperty{Key: append([]byte{}, key...), rangeOffsets: c.currentOffsets}) +} - sampleCount := uint64(count) * 200 / uint64(sampleFactor) +// implement `pebble.TablePropertyCollector` +// implement `TablePropertyCollector.Add` +func (c *RangePropertiesCollector) Add(key pebble.InternalKey, value []byte) error { + c.currentOffsets.Size += uint64(len(value)) + uint64(len(key.UserKey)) + c.currentOffsets.Keys += 1 + if len(c.lastKey) == 0 || c.sizeInLastRange() >= c.propSizeIdxDistance || + c.keysInLastRange() >= c.propKeysIdxDistance { + c.insertNewPoint(key.UserKey) + } + c.lastKey = append(c.lastKey[:0], key.UserKey...) + return nil +} - if sampleCount == 0 || eValue-sValue < sampleCount*20 { - return naiveFn() +func (c *RangePropertiesCollector) Finish(userProps map[string]string) error { + if c.sizeInLastRange() > 0 || c.keysInLastRange() > 0 { + c.insertNewPoint(c.lastKey) } - step := (eValue - sValue) / sampleCount + userProps[propRangeIndex] = string(c.props.Encode()) + return nil +} + +// The name of the property collector. +func (c *RangePropertiesCollector) Name() string { + return propRangeIndex +} - sampleValues := make([]uint64, 0, sampleCount/10) +type sizeProperties struct { + totalSize uint64 + indexHandles *btree.BTree +} - lastValue := uint64(0) - valueBuf := make([]byte, 8) - seekKey := make([]byte, offset+8) - copy(seekKey, start[:offset]) - for i := sValue; i < eValue; i += step { - if i <= lastValue { - continue - } - binary.BigEndian.PutUint64(seekKey[offset:], i) - iter.SeekGE(seekKey) - copy(valueBuf, iter.Key()[offset:]) - value := binary.BigEndian.Uint64(valueBuf) - sampleValues = append(sampleValues, value) - lastValue = value - } - - // if too few sample values, fall back to naive func - if len(sampleValues) < int(count)*20 { - log.L().Info("too few samples, fallback to naive split", zap.Int64("count", count), - zap.Int("samples", len(sampleValues)), zap.Binary("start", start), - zap.Binary("end", end), zap.Int64("engine_kv", l.Length)) - return naiveFn() - } - - s := float64(len(sampleValues)) / float64(count) - - res := make([][]byte, 0, count) - for i := s - 1; int(i) < len(sampleValues); i += s { - curBytes := make([]byte, offset+8) - copy(curBytes, start[:offset]) - binary.BigEndian.PutUint64(curBytes[offset:], sampleValues[int(i)]) - res = append(res, curBytes) - } - // adjust last value - if bytes.Compare(res[len(res)-1], end) < 0 { - if len(res) < int(count) { - res = append(res, end) - } else { - res[len(res)-1] = end - } +func newSizeProperties() *sizeProperties { + return &sizeProperties{indexHandles: btree.New(32)} +} + +func (s *sizeProperties) add(item *rangeProperty) { + if old := s.indexHandles.ReplaceOrInsert(item); old != nil { + o := old.(*rangeProperty) + item.Keys += o.Keys + item.Size += o.Size } - log.L().Info("split value with sample", zap.Int64("count", count), - zap.Int("ranges", len(res)), zap.Int("samples", len(sampleValues)), - zap.Int64("engine_kv", l.Length)) - return res +} + +func (s *sizeProperties) addAll(props rangeProperties) { + prevRange := rangeOffsets{} + for _, r := range props { + s.add(&rangeProperty{ + Key: r.Key, + rangeOffsets: rangeOffsets{Keys: r.Keys - prevRange.Keys, Size: r.Size - prevRange.Size}, + }) + prevRange = r.rangeOffsets + } + if len(props) > 0 { + s.totalSize = props[len(props)-1].Size + } +} + +// iter the tree until f return false +func (s *sizeProperties) iter(f func(p *rangeProperty) bool) { + s.indexHandles.Ascend(func(i btree.Item) bool { + prop := i.(*rangeProperty) + return f(prop) + }) } diff --git a/lightning/backend/local_test.go b/lightning/backend/local_test.go index ec33119f7..1f6f89612 100644 --- a/lightning/backend/local_test.go +++ b/lightning/backend/local_test.go @@ -2,8 +2,14 @@ package backend import ( "bytes" + "encoding/binary" + "math" + "math/rand" + "path/filepath" + "github.com/cockroachdb/pebble" . "github.com/pingcap/check" + "github.com/pingcap/tidb/util/hack" ) type localSuite struct{} @@ -31,3 +37,198 @@ func (s *localSuite) TestNextKey(c *C) { next = nextKey([]byte{1, 255}) c.Assert(bytes.Compare(next, []byte{1, 255, 0, 1, 2}), Equals, -1) } + +// The first half of this test is same as the test in tikv: +// https://github.com/tikv/tikv/blob/dbfe7730dd0fddb34cb8c3a7f8a079a1349d2d41/components/engine_rocks/src/properties.rs#L572 +func (s *localSuite) TestRangeProperties(c *C) { + type testCase struct { + key []byte + vLen int + count int + } + cases := []testCase{ + // handle "a": size(size = 1, offset = 1),keys(1,1) + {[]byte("a"), 0, 1}, + {[]byte("b"), defaultPropSizeIndexDistance / 8, 1}, + {[]byte("c"), defaultPropSizeIndexDistance / 4, 1}, + {[]byte("d"), defaultPropSizeIndexDistance / 2, 1}, + {[]byte("e"), defaultPropSizeIndexDistance / 8, 1}, + // handle "e": size(size = DISTANCE + 4, offset = DISTANCE + 5),keys(4,5) + {[]byte("f"), defaultPropSizeIndexDistance / 4, 1}, + {[]byte("g"), defaultPropSizeIndexDistance / 2, 1}, + {[]byte("h"), defaultPropSizeIndexDistance / 8, 1}, + {[]byte("i"), defaultPropSizeIndexDistance / 4, 1}, + // handle "i": size(size = DISTANCE / 8 * 9 + 4, offset = DISTANCE / 8 * 17 + 9),keys(4,5) + {[]byte("j"), defaultPropSizeIndexDistance / 2, 1}, + {[]byte("k"), defaultPropSizeIndexDistance / 2, 1}, + // handle "k": size(size = DISTANCE + 2, offset = DISTANCE / 8 * 25 + 11),keys(2,11) + {[]byte("l"), 0, defaultPropKeysIndexDistance / 2}, + {[]byte("m"), 0, defaultPropKeysIndexDistance / 2}, + //handle "m": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE,offset = 11+DEFAULT_PROP_KEYS_INDEX_DISTANCE + {[]byte("n"), 1, defaultPropKeysIndexDistance}, + //handle "n": keys = DEFAULT_PROP_KEYS_INDEX_DISTANCE, offset = 11+2*DEFAULT_PROP_KEYS_INDEX_DISTANCE + {[]byte("o"), 1, 1}, + // handle "o": keys = 1, offset = 12 + 2*DEFAULT_PROP_KEYS_INDEX_DISTANCE + } + + collector := newRangePropertiesCollector() + for _, p := range cases { + v := make([]byte, p.vLen) + for i := 0; i < p.count; i++ { + _ = collector.Add(pebble.InternalKey{UserKey: p.key}, v) + } + } + + userProperties := make(map[string]string, 1) + _ = collector.Finish(userProperties) + + props, err := decodeRangeProperties(hack.Slice(userProperties[propRangeIndex])) + c.Assert(err, IsNil) + + // Smallest key in props. + c.Assert(props[0].Key, DeepEquals, cases[0].key) + // Largest key in props. + c.Assert(props[len(props)-1].Key, DeepEquals, cases[len(cases)-1].key) + c.Assert(len(props), Equals, 7) + + a := props.get([]byte("a")) + c.Assert(a.Size, Equals, uint64(1)) + e := props.get([]byte("e")) + c.Assert(e.Size, Equals, uint64(defaultPropSizeIndexDistance+5)) + i := props.get([]byte("i")) + c.Assert(i.Size, Equals, uint64(defaultPropSizeIndexDistance/8*17+9)) + k := props.get([]byte("k")) + c.Assert(k.Size, Equals, uint64(defaultPropSizeIndexDistance/8*25+11)) + m := props.get([]byte("m")) + c.Assert(m.Keys, Equals, uint64(defaultPropKeysIndexDistance+11)) + n := props.get([]byte("n")) + c.Assert(n.Keys, Equals, uint64(defaultPropKeysIndexDistance*2+11)) + o := props.get([]byte("o")) + c.Assert(o.Keys, Equals, uint64(defaultPropKeysIndexDistance*2+12)) + + props2 := rangeProperties([]rangeProperty{ + {[]byte("b"), rangeOffsets{defaultPropSizeIndexDistance + 10, defaultPropKeysIndexDistance / 2}}, + {[]byte("h"), rangeOffsets{defaultPropSizeIndexDistance * 3 / 2, defaultPropKeysIndexDistance * 3 / 2}}, + {[]byte("k"), rangeOffsets{defaultPropSizeIndexDistance * 3, defaultPropKeysIndexDistance * 7 / 4}}, + {[]byte("mm"), rangeOffsets{defaultPropSizeIndexDistance * 5, defaultPropKeysIndexDistance * 2}}, + {[]byte("q"), rangeOffsets{defaultPropSizeIndexDistance * 7, defaultPropKeysIndexDistance*9/4 + 10}}, + {[]byte("y"), rangeOffsets{defaultPropSizeIndexDistance*7 + 100, defaultPropKeysIndexDistance*9/4 + 1010}}, + }) + + sizeProps := newSizeProperties() + sizeProps.addAll(props) + sizeProps.addAll(props2) + + res := []*rangeProperty{ + {[]byte("a"), rangeOffsets{1, 1}}, + {[]byte("b"), rangeOffsets{defaultPropSizeIndexDistance + 10, defaultPropKeysIndexDistance / 2}}, + {[]byte("e"), rangeOffsets{defaultPropSizeIndexDistance + 4, 4}}, + {[]byte("h"), rangeOffsets{defaultPropSizeIndexDistance/2 - 10, defaultPropKeysIndexDistance}}, + {[]byte("i"), rangeOffsets{defaultPropSizeIndexDistance*9/8 + 4, 4}}, + {[]byte("k"), rangeOffsets{defaultPropSizeIndexDistance*5/2 + 2, defaultPropKeysIndexDistance/4 + 2}}, + {[]byte("m"), rangeOffsets{defaultPropKeysIndexDistance, defaultPropKeysIndexDistance}}, + {[]byte("mm"), rangeOffsets{defaultPropSizeIndexDistance * 2, defaultPropKeysIndexDistance / 4}}, + {[]byte("n"), rangeOffsets{defaultPropKeysIndexDistance * 2, defaultPropKeysIndexDistance}}, + {[]byte("o"), rangeOffsets{2, 1}}, + {[]byte("q"), rangeOffsets{defaultPropSizeIndexDistance * 2, defaultPropKeysIndexDistance/4 + 10}}, + {[]byte("y"), rangeOffsets{100, 1000}}, + } + + c.Assert(sizeProps.indexHandles.Len(), Equals, 12) + idx := 0 + sizeProps.iter(func(p *rangeProperty) bool { + c.Assert(p, DeepEquals, res[idx]) + idx++ + return true + }) + + fullRange := Range{start: []byte("a"), end: []byte("z")} + ranges := splitRangeBySizeProps(fullRange, sizeProps, 2*defaultPropSizeIndexDistance, defaultPropKeysIndexDistance*5/2) + + c.Assert(ranges, DeepEquals, []Range{ + {start: []byte("a"), end: []byte("e")}, + {start: []byte("e"), end: []byte("k")}, + {start: []byte("k"), end: []byte("mm")}, + {start: []byte("mm"), end: []byte("q")}, + {start: []byte("q"), end: []byte("z")}, + }) + + ranges = splitRangeBySizeProps(fullRange, sizeProps, 2*defaultPropSizeIndexDistance, defaultPropKeysIndexDistance) + c.Assert(ranges, DeepEquals, []Range{ + {start: []byte("a"), end: []byte("e")}, + {start: []byte("e"), end: []byte("h")}, + {start: []byte("h"), end: []byte("k")}, + {start: []byte("k"), end: []byte("m")}, + {start: []byte("m"), end: []byte("mm")}, + {start: []byte("mm"), end: []byte("n")}, + {start: []byte("n"), end: []byte("q")}, + {start: []byte("q"), end: []byte("z")}, + }) +} + +func (s *localSuite) TestRangePropertiesWithPebble(c *C) { + dir := c.MkDir() + + sizeDistance := uint64(500) + keysDistance := uint64(20) + opt := &pebble.Options{ + MemTableSize: LocalMemoryTableSize, + MaxConcurrentCompactions: 16, + L0CompactionThreshold: math.MaxInt32, // set to max try to disable compaction + L0StopWritesThreshold: math.MaxInt32, // set to max try to disable compaction + MaxOpenFiles: 10000, + DisableWAL: true, + ReadOnly: false, + TablePropertyCollectors: []func() pebble.TablePropertyCollector{ + func() pebble.TablePropertyCollector { + return &RangePropertiesCollector{ + props: make([]rangeProperty, 0, 1024), + propSizeIdxDistance: sizeDistance, + propKeysIdxDistance: keysDistance, + } + }, + }, + } + db, err := pebble.Open(filepath.Join(dir, "test"), opt) + c.Assert(err, IsNil) + defer db.Close() + + // local collector + collector := &RangePropertiesCollector{ + props: make([]rangeProperty, 0, 1024), + propSizeIdxDistance: sizeDistance, + propKeysIdxDistance: keysDistance, + } + writeOpt := &pebble.WriteOptions{Sync: false} + value := make([]byte, 100) + for i := 0; i < 10; i++ { + wb := db.NewBatch() + for j := 0; j < 100; j++ { + key := make([]byte, 8) + valueLen := rand.Intn(50) + binary.BigEndian.PutUint64(key, uint64(i*100+j)) + err = wb.Set(key, value[:valueLen], writeOpt) + c.Assert(err, IsNil) + err = collector.Add(pebble.InternalKey{UserKey: key}, value[:valueLen]) + c.Assert(err, IsNil) + } + c.Assert(wb.Commit(writeOpt), IsNil) + } + // flush one sst + c.Assert(db.Flush(), IsNil) + + props := make(map[string]string, 1) + c.Assert(collector.Finish(props), IsNil) + + sstMetas, err := db.SSTables(pebble.WithProperties()) + c.Assert(err, IsNil) + for i, level := range sstMetas { + if i == 0 { + c.Assert(len(level), Equals, 1) + } else { + c.Assert(len(level), Equals, 0) + } + } + + c.Assert(sstMetas[0][0].Properties.UserProperties, DeepEquals, props) +} diff --git a/lightning/backend/localhelper.go b/lightning/backend/localhelper.go index 8ca6fe244..72f384508 100644 --- a/lightning/backend/localhelper.go +++ b/lightning/backend/localhelper.go @@ -78,11 +78,6 @@ func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges [] } log.L().Warn("split regions", log.ShortError(errSplit), zap.Int("retry time", i+1), zap.Uint64("region_id", regionID)) - select { - case <-time.After(time.Second): - case <-ctx.Done(): - return ctx.Err() - } retryKeys = append(retryKeys, keys...) } else { scatterRegions = append(scatterRegions, newRegions...) @@ -96,6 +91,11 @@ func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges [] }) minKey = retryKeys[0] maxKey = nextKey(retryKeys[len(retryKeys)-1]) + select { + case <-time.After(time.Second): + case <-ctx.Done(): + return ctx.Err() + } } } if errSplit != nil { diff --git a/lightning/backend/session.go b/lightning/backend/session.go index a6c7dbad7..1285eb774 100644 --- a/lightning/backend/session.go +++ b/lightning/backend/session.go @@ -176,6 +176,8 @@ type SessionOptions struct { SQLMode mysql.SQLMode Timestamp int64 RowFormatVersion string + // a seed used for tableKvEncoder's auto random bits value + AutoRandomSeed int64 } func newSession(options *SessionOptions) *session { diff --git a/lightning/backend/sql2kv.go b/lightning/backend/sql2kv.go index fe84dbf8d..01b283ec0 100644 --- a/lightning/backend/sql2kv.go +++ b/lightning/backend/sql2kv.go @@ -14,6 +14,8 @@ package backend import ( + "math/rand" + "github.com/pingcap/errors" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" @@ -37,6 +39,8 @@ type tableKVEncoder struct { tbl table.Table se *session recordCache []types.Datum + // auto random bits value for this chunk + autoRandomHeaderBits int64 } func NewTableKVEncoder(tbl table.Table, options *SessionOptions) Encoder { @@ -45,12 +49,35 @@ func NewTableKVEncoder(tbl table.Table, options *SessionOptions) Encoder { // Set CommonAddRecordCtx to session to reuse the slices and BufStore in AddRecord recordCtx := tables.NewCommonAddRecordCtx(len(tbl.Cols())) tables.SetAddRecordCtx(se, recordCtx) + + var autoRandomBits int64 + if tbl.Meta().PKIsHandle && tbl.Meta().ContainsAutoRandomBits() { + for _, col := range tbl.Cols() { + if mysql.HasPriKeyFlag(col.Flag) { + incrementalBits := autoRandomIncrementBits(col, int(tbl.Meta().AutoRandomBits)) + autoRandomBits = rand.New(rand.NewSource(options.AutoRandomSeed)).Int63n(1<= 1.0 { diff --git a/lightning/config/config_test.go b/lightning/config/config_test.go index 0110fcf2c..a848daf73 100644 --- a/lightning/config/config_test.go +++ b/lightning/config/config_test.go @@ -132,6 +132,25 @@ func (s *configTestSuite) TestAdjustInvalidBackend(c *C) { c.Assert(err, ErrorMatches, "invalid config: unsupported `tikv-importer\\.backend` \\(no_such_backend\\)") } +func (s *configTestSuite) TestAdjustFileRoutePath(c *C) { + cfg := config.NewConfig() + assignMinimalLegalValue(cfg) + + tmpDir := c.MkDir() + cfg.Mydumper.SourceDir = tmpDir + invalidPath := filepath.Join(tmpDir, "../test123/1.sql") + rule := &config.FileRouteRule{Path: invalidPath, Type: "sql", Schema: "test", Table: "tbl"} + cfg.Mydumper.FileRouters = []*config.FileRouteRule{rule} + err := cfg.Adjust() + c.Assert(err, ErrorMatches, fmt.Sprintf("file route path '%s' is not in source dir '%s'", invalidPath, tmpDir)) + + relPath := "test_dir/1.sql" + rule.Path = filepath.Join(tmpDir, relPath) + err = cfg.Adjust() + c.Assert(err, IsNil) + c.Assert(cfg.Mydumper.FileRouters[0].Path, Equals, relPath) +} + func (s *configTestSuite) TestDecodeError(c *C) { ts, host, port := startMockServer(c, http.StatusOK, "invalid-string") defer ts.Close() @@ -612,3 +631,21 @@ func (s *configTestSuite) TestCronEncodeDecode(c *C) { c.Assert(cfg2.LoadFromTOML([]byte(confStr)), IsNil) c.Assert(cfg2.Cron, DeepEquals, cfg.Cron) } + +func (s *configTestSuite) TestAdjustWithLegacyBlackWhiteList(c *C) { + cfg := config.NewConfig() + assignMinimalLegalValue(cfg) + c.Assert(cfg.Mydumper.Filter, DeepEquals, config.DefaultFilter) + c.Assert(cfg.HasLegacyBlackWhiteList(), IsFalse) + + cfg.Mydumper.Filter = []string{"test.*"} + c.Assert(cfg.Adjust(), IsNil) + c.Assert(cfg.HasLegacyBlackWhiteList(), IsFalse) + + cfg.BWList.DoDBs = []string{"test"} + c.Assert(cfg.Adjust(), ErrorMatches, "invalid config: `mydumper\\.filter` and `black-white-list` cannot be simultaneously defined") + + cfg.Mydumper.Filter = config.DefaultFilter + c.Assert(cfg.Adjust(), IsNil) + c.Assert(cfg.HasLegacyBlackWhiteList(), IsTrue) +} diff --git a/lightning/config/const.go b/lightning/config/const.go index ad7863708..240d37e27 100644 --- a/lightning/config/const.go +++ b/lightning/config/const.go @@ -13,18 +13,20 @@ package config -const ( - _K = int64(1 << 10) - _M = _K << 10 - _G = _M << 10 +import ( + "github.com/docker/go-units" +) +const ( // mydumper - ReadBlockSize int64 = 64 * _K - MinRegionSize int64 = 256 * _M - MaxRegionSize int64 = 256 * _M - SplitRegionSize int64 = 96 * _M + ReadBlockSize ByteSize = 64 * units.KiB + MinRegionSize ByteSize = 256 * units.MiB + MaxRegionSize ByteSize = 256 * units.MiB + SplitRegionSize ByteSize = 96 * units.MiB BufferSizeScale = 5 - defaultMaxAllowedPacket = 64 * 1024 * 1024 + defaultMaxAllowedPacket = 64 * units.MiB + + defaultBatchSize ByteSize = 100 * units.GiB ) diff --git a/lightning/config/global.go b/lightning/config/global.go index 76ded7d30..eb1075fda 100644 --- a/lightning/config/global.go +++ b/lightning/config/global.go @@ -99,7 +99,7 @@ func NewGlobalConfig() *GlobalConfig { LogLevel: "error", }, Mydumper: GlobalMydumper{ - Filter: []string{"*.*"}, + Filter: DefaultFilter, }, TikvImporter: GlobalImporter{ Backend: "importer", diff --git a/lightning/glue/glue.go b/lightning/glue/glue.go new file mode 100644 index 000000000..c23b43951 --- /dev/null +++ b/lightning/glue/glue.go @@ -0,0 +1,115 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package glue + +import ( + "context" + "database/sql" + "errors" + + "github.com/pingcap/parser" + "github.com/pingcap/parser/model" + "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb-lightning/lightning/checkpoints" + "github.com/pingcap/tidb-lightning/lightning/common" + "github.com/pingcap/tidb-lightning/lightning/config" + "github.com/pingcap/tidb-lightning/lightning/log" +) + +type Glue interface { + OwnsSQLExecutor() bool + GetSQLExecutor() SQLExecutor + GetDB() (*sql.DB, error) + GetParser() *parser.Parser + GetTables(context.Context, string) ([]*model.TableInfo, error) + GetSession() (checkpoints.Session, error) + OpenCheckpointsDB(context.Context, *config.Config) (checkpoints.CheckpointsDB, error) + // Record is used to report some information (key, value) to host TiDB, including progress, stage currently + Record(string, uint64) +} + +type SQLExecutor interface { + ExecuteWithLog(ctx context.Context, query string, purpose string, logger log.Logger) error + ObtainStringWithLog(ctx context.Context, query string, purpose string, logger log.Logger) (string, error) + Close() +} + +type ExternalTiDBGlue struct { + db *sql.DB + parser *parser.Parser +} + +func NewExternalTiDBGlue(db *sql.DB, sqlMode mysql.SQLMode) *ExternalTiDBGlue { + p := parser.New() + p.SetSQLMode(sqlMode) + + return &ExternalTiDBGlue{db: db, parser: p} +} + +func (e *ExternalTiDBGlue) GetSQLExecutor() SQLExecutor { + return e +} + +func (e *ExternalTiDBGlue) ExecuteWithLog(ctx context.Context, query string, purpose string, logger log.Logger) error { + sql := common.SQLWithRetry{ + DB: e.db, + Logger: logger, + } + return sql.Exec(ctx, purpose, query) +} + +func (e *ExternalTiDBGlue) ObtainStringWithLog(ctx context.Context, query string, purpose string, logger log.Logger) (string, error) { + var s string + err := common.SQLWithRetry{ + DB: e.db, + Logger: logger, + }.QueryRow(ctx, purpose, query, &s) + return s, err +} + +func (e *ExternalTiDBGlue) GetDB() (*sql.DB, error) { + return e.db, nil +} + +func (e *ExternalTiDBGlue) GetParser() *parser.Parser { + return e.parser +} + +func (e ExternalTiDBGlue) GetTables(context.Context, string) ([]*model.TableInfo, error) { + return nil, errors.New("ExternalTiDBGlue doesn't have a valid GetTables function") +} + +func (e ExternalTiDBGlue) GetSession() (checkpoints.Session, error) { + return nil, errors.New("ExternalTiDBGlue doesn't have a valid GetSession function") +} + +func (e *ExternalTiDBGlue) OpenCheckpointsDB(ctx context.Context, cfg *config.Config) (checkpoints.CheckpointsDB, error) { + return checkpoints.OpenCheckpointsDB(ctx, cfg) +} + +func (e *ExternalTiDBGlue) OwnsSQLExecutor() bool { + return true +} + +func (e *ExternalTiDBGlue) Close() { + e.db.Close() +} + +func (e *ExternalTiDBGlue) Record(string, uint64) { +} + +const ( + RecordEstimatedChunk = "EstimatedChunk" + RecordFinishedChunk = "FinishedChunk" +) diff --git a/lightning/lightning.go b/lightning/lightning.go index 39401509a..ed5a74df8 100755 --- a/lightning/lightning.go +++ b/lightning/lightning.go @@ -33,6 +33,7 @@ import ( "github.com/pingcap/br/pkg/storage" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/tidb-lightning/lightning/glue" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/shurcooL/httpgzip" "go.uber.org/zap" @@ -55,14 +56,14 @@ type Lightning struct { // taskCfgs is the list of task configurations enqueued in the server mode taskCfgs *config.ConfigList ctx context.Context - shutdown context.CancelFunc + shutdown context.CancelFunc // for whole lightning context server http.Server serverAddr net.Addr serverLock sync.Mutex cancelLock sync.Mutex curTask *config.Config - cancel context.CancelFunc + cancel context.CancelFunc // for per task context, which maybe different from lightning context } func initEnv(cfg *config.GlobalConfig) error { @@ -168,21 +169,26 @@ func (l *Lightning) goServe(statusAddr string, realAddrWriter io.Writer) error { return nil } -// Run Lightning using the global config as the same as the task config. -func (l *Lightning) RunOnce() error { - cfg := config.NewConfig() - if err := cfg.LoadFromGlobal(l.globalCfg); err != nil { - return err - } - if err := cfg.Adjust(); err != nil { +// RunOnce is used by binary lightning and host when using lightning as a library. +// - for binary lightning, taskCtx could be context.Background which means taskCtx wouldn't be canceled directly by its +// cancel function, but only by Lightning.Stop or HTTP DELETE using l.cancel. and glue could be nil to let lightning +// use a default glue later. +// - for lightning as a library, taskCtx could be a meaningful context that get canceled outside, and glue could be a +// caller implemented glue. +func (l *Lightning) RunOnce(taskCtx context.Context, taskCfg *config.Config, glue glue.Glue, replaceLogger *zap.Logger) error { + if err := taskCfg.Adjust(); err != nil { return err } - cfg.TaskID = time.Now().UnixNano() + taskCfg.TaskID = time.Now().UnixNano() failpoint.Inject("SetTaskID", func(val failpoint.Value) { - cfg.TaskID = int64(val.(int)) + taskCfg.TaskID = int64(val.(int)) }) - return l.run(cfg) + + if replaceLogger != nil { + log.SetAppLogger(replaceLogger) + } + return l.run(taskCtx, taskCfg, glue) } func (l *Lightning) RunServer() error { @@ -197,7 +203,7 @@ func (l *Lightning) RunServer() error { if err != nil { return err } - err = l.run(task) + err = l.run(context.Background(), task, nil) if err != nil { restore.DeliverPauser.Pause() // force pause the progress on error log.L().Error("tidb lightning encountered error", zap.Error(err)) @@ -207,14 +213,14 @@ func (l *Lightning) RunServer() error { var taskCfgRecorderKey struct{} -func (l *Lightning) run(taskCfg *config.Config) (err error) { +func (l *Lightning) run(taskCtx context.Context, taskCfg *config.Config, g glue.Glue) (err error) { common.PrintInfo("lightning", func() { log.L().Info("cfg", zap.Stringer("cfg", taskCfg)) }) logEnvVariables() - ctx, cancel := context.WithCancel(l.ctx) + ctx, cancel := context.WithCancel(taskCtx) l.cancelLock.Lock() l.cancel = cancel l.curTask = taskCfg @@ -229,17 +235,39 @@ func (l *Lightning) run(taskCfg *config.Config) (err error) { web.BroadcastEndTask(err) }() - failpoint.Inject("SkipRunTask", func() error { + failpoint.Inject("SkipRunTask", func() { if recorder, ok := l.ctx.Value(&taskCfgRecorderKey).(chan *config.Config); ok { select { case recorder <- taskCfg: case <-ctx.Done(): - return ctx.Err() + failpoint.Return(ctx.Err()) } } - return nil + failpoint.Return(nil) }) + if err := taskCfg.TiDB.Security.RegisterMySQL(); err != nil { + return err + } + defer func() { + // deregister TLS config with name "cluster" + if taskCfg.TiDB.Security == nil { + return + } + taskCfg.TiDB.Security.CAPath = "" + taskCfg.TiDB.Security.RegisterMySQL() + }() + + // initiation of default glue should be after RegisterMySQL, which is ready to be called after taskCfg.Adjust + // and also put it here could avoid injecting another two SkipRunTask failpoint to caller + if g == nil { + db, err := restore.DBFromConfig(taskCfg.TiDB) + if err != nil { + return err + } + g = glue.NewExternalTiDBGlue(db, taskCfg.TiDB.SQLMode) + } + u, err := storage.ParseBackend(taskCfg.Mydumper.SourceDir, &storage.BackendOptions{}) if err != nil { return errors.Trace(err) @@ -272,7 +300,7 @@ func (l *Lightning) run(taskCfg *config.Config) (err error) { web.BroadcastInitProgress(dbMetas) var procedure *restore.RestoreController - procedure, err = restore.NewRestoreController(ctx, dbMetas, taskCfg, s) + procedure, err = restore.NewRestoreController(ctx, dbMetas, taskCfg, s, g) if err != nil { log.L().Error("restore failed", log.ShortError(err)) return errors.Trace(err) @@ -284,6 +312,11 @@ func (l *Lightning) run(taskCfg *config.Config) (err error) { } func (l *Lightning) Stop() { + l.cancelLock.Lock() + if l.cancel != nil { + l.cancel() + } + l.cancelLock.Unlock() if err := l.server.Shutdown(l.ctx); err != nil { log.L().Warn("failed to shutdown HTTP server", log.ShortError(err)) } diff --git a/lightning/lightning_test.go b/lightning/lightning_test.go index 6624989dd..939e9333a 100644 --- a/lightning/lightning_test.go +++ b/lightning/lightning_test.go @@ -25,7 +25,7 @@ import ( "time" "github.com/pingcap/tidb-lightning/lightning/checkpoints" - + "github.com/pingcap/tidb-lightning/lightning/glue" "github.com/pingcap/tidb-lightning/lightning/mydump" . "github.com/pingcap/check" @@ -55,16 +55,22 @@ func (s *lightningSuite) TestInitEnv(c *C) { } func (s *lightningSuite) TestRun(c *C) { - cfg := config.NewGlobalConfig() - cfg.TiDB.Host = "test.invalid" - cfg.TiDB.Port = 4000 - cfg.TiDB.PdAddr = "test.invalid:2379" - cfg.Mydumper.SourceDir = "not-exists" - lightning := New(cfg) - err := lightning.RunOnce() + globalConfig := config.NewGlobalConfig() + globalConfig.TiDB.Host = "test.invalid" + globalConfig.TiDB.Port = 4000 + globalConfig.TiDB.PdAddr = "test.invalid:2379" + globalConfig.Mydumper.SourceDir = "not-exists" + lightning := New(globalConfig) + cfg := config.NewConfig() + err := cfg.LoadFromGlobal(globalConfig) + c.Assert(err, IsNil) + err = lightning.RunOnce(context.Background(), cfg, nil, nil) c.Assert(err, ErrorMatches, ".*mydumper dir does not exist") + path, _ := filepath.Abs(".") - err = lightning.run(&config.Config{ + ctx := context.Background() + invalidGlue := glue.NewExternalTiDBGlue(nil, 0) + err = lightning.run(ctx, &config.Config{ Mydumper: config.MydumperRuntime{ SourceDir: "file://" + filepath.ToSlash(path), Filter: []string{"*.*"}, @@ -74,10 +80,10 @@ func (s *lightningSuite) TestRun(c *C) { Enable: true, Driver: "invalid", }, - }) + }, invalidGlue) c.Assert(err, ErrorMatches, "Unknown checkpoint driver invalid") - err = lightning.run(&config.Config{ + err = lightning.run(ctx, &config.Config{ Mydumper: config.MydumperRuntime{ SourceDir: ".", Filter: []string{"*.*"}, @@ -87,7 +93,7 @@ func (s *lightningSuite) TestRun(c *C) { Driver: "file", DSN: "any-file", }, - }) + }, invalidGlue) c.Assert(err, NotNil) } @@ -240,6 +246,7 @@ func (s *lightningServerSuite) TestGetDeleteTask(c *C) { // Check `GET /tasks` returns all tasks currently running + time.Sleep(100 * time.Millisecond) c.Assert(getAllTasks(), DeepEquals, getAllResultType{ Current: first, Queue: []int64{second, third}, @@ -333,6 +340,7 @@ func (s *lightningServerSuite) TestGetDeleteTask(c *C) { c.Assert(resp.StatusCode, Equals, http.StatusOK) resp.Body.Close() + time.Sleep(100 * time.Millisecond) c.Assert(getAllTasks(), DeepEquals, getAllResultType{ Current: third, Queue: []int64{}, @@ -345,8 +353,11 @@ func (s *lightningServerSuite) TestHTTPAPIOutsideServerMode(c *C) { url := "http://" + s.lightning.serverAddr.String() + "/tasks" errCh := make(chan error) + cfg := config.NewConfig() + err := cfg.LoadFromGlobal(s.lightning.globalCfg) + c.Assert(err, IsNil) go func() { - errCh <- s.lightning.RunOnce() + errCh <- s.lightning.RunOnce(s.lightning.ctx, cfg, nil, nil) }() time.Sleep(100 * time.Millisecond) @@ -362,7 +373,7 @@ func (s *lightningServerSuite) TestHTTPAPIOutsideServerMode(c *C) { err = json.NewDecoder(resp.Body).Decode(&curTask) resp.Body.Close() c.Assert(err, IsNil) - c.Assert(curTask.Current, Not(Equals), 0) + c.Assert(curTask.Current, Not(Equals), int64(0)) c.Assert(curTask.Queue, HasLen, 0) // `POST /tasks` should return 501 diff --git a/lightning/log/log.go b/lightning/log/log.go index 7d07f69cc..e3b8f3328 100644 --- a/lightning/log/log.go +++ b/lightning/log/log.go @@ -106,6 +106,11 @@ func L() Logger { return appLogger } +// SetAppLogger replaces the default logger in this package to given one +func SetAppLogger(l *zap.Logger) { + appLogger = Logger{l.WithOptions(zap.AddStacktrace(zap.DPanicLevel))} +} + // Level returns the current global log level. func Level() zapcore.Level { return appLevel.Level() diff --git a/lightning/mydump/csv_parser_test.go b/lightning/mydump/csv_parser_test.go index 144a5facb..2e42e9b72 100644 --- a/lightning/mydump/csv_parser_test.go +++ b/lightning/mydump/csv_parser_test.go @@ -140,7 +140,7 @@ func (s *testMydumpCSVParserSuite) TestTPCH(c *C) { TrimLastSep: true, } - parser := mydump.NewCSVParser(&cfg, reader, config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, reader, int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -195,7 +195,7 @@ func (s *testMydumpCSVParserSuite) TestTPCHMultiBytes(c *C) { } reader := mydump.NewStringReader(inputStr) - parser := mydump.NewCSVParser(&cfg, reader, config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, reader, int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ RowID: 1, @@ -230,7 +230,7 @@ func (s *testMydumpCSVParserSuite) TestRFC4180(c *C) { // example 1, trailing new lines - parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader("aaa,bbb,ccc\nzzz,yyy,xxx\n"), config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader("aaa,bbb,ccc\nzzz,yyy,xxx\n"), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -258,7 +258,7 @@ func (s *testMydumpCSVParserSuite) TestRFC4180(c *C) { // example 2, no trailing new lines - parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader("aaa,bbb,ccc\nzzz,yyy,xxx"), config.ReadBlockSize, s.ioWorkers, false) + parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader("aaa,bbb,ccc\nzzz,yyy,xxx"), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -286,7 +286,7 @@ func (s *testMydumpCSVParserSuite) TestRFC4180(c *C) { // example 5, quoted fields - parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(`"aaa","bbb","ccc"`+"\nzzz,yyy,xxx"), config.ReadBlockSize, s.ioWorkers, false) + parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(`"aaa","bbb","ccc"`+"\nzzz,yyy,xxx"), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -316,7 +316,7 @@ func (s *testMydumpCSVParserSuite) TestRFC4180(c *C) { parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(`"aaa","b bb","ccc" -zzz,yyy,xxx`), config.ReadBlockSize, s.ioWorkers, false) +zzz,yyy,xxx`), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -344,7 +344,7 @@ zzz,yyy,xxx`), config.ReadBlockSize, s.ioWorkers, false) // example 7, quote escaping - parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(`"aaa","b""bb","ccc"`), config.ReadBlockSize, s.ioWorkers, false) + parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(`"aaa","b""bb","ccc"`), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -371,7 +371,7 @@ func (s *testMydumpCSVParserSuite) TestMySQL(c *C) { parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(`"\"","\\","\?" "\ -",\N,\\N`), config.ReadBlockSize, s.ioWorkers, false) +",\N,\\N`), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -417,10 +417,10 @@ func (s *testMydumpCSVParserSuite) TestSyntaxError(c *C) { "\"\x01", } - s.runFailingTestCases(c, &cfg, config.ReadBlockSize, inputs) + s.runFailingTestCases(c, &cfg, int64(config.ReadBlockSize), inputs) cfg.BackslashEscape = false - s.runFailingTestCases(c, &cfg, config.ReadBlockSize, []string{`"\`}) + s.runFailingTestCases(c, &cfg, int64(config.ReadBlockSize), []string{`"\`}) } func (s *testMydumpCSVParserSuite) TestTSV(c *C) { @@ -436,7 +436,7 @@ func (s *testMydumpCSVParserSuite) TestTSV(c *C) { parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(`a b c d e f 0 foo 0000-00-00 0 foo 0000-00-00 -0 abc def ghi bar 1999-12-31`), config.ReadBlockSize, s.ioWorkers, true) +0 abc def ghi bar 1999-12-31`), int64(config.ReadBlockSize), s.ioWorkers, true) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -490,7 +490,7 @@ func (s *testMydumpCSVParserSuite) TestCsvWithWhiteSpaceLine(c *C) { Delimiter: `"`, } data := " \r\n\r\n0,,abc\r\n \r\n123,1999-12-31,test\r\n" - parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(data), config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(data), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ RowID: 1, @@ -515,7 +515,7 @@ func (s *testMydumpCSVParserSuite) TestCsvWithWhiteSpaceLine(c *C) { cfg.Header = true data = " \r\na,b,c\r\n0,,abc\r\n" - parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(data), config.ReadBlockSize, s.ioWorkers, true) + parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(data), int64(config.ReadBlockSize), s.ioWorkers, true) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.Columns(), DeepEquals, []string{"a", "b", "c"}) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -537,17 +537,17 @@ func (s *testMydumpCSVParserSuite) TestEmpty(c *C) { Delimiter: `"`, } - parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(""), config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(""), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF) // Try again with headers. cfg.Header = true - parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(""), config.ReadBlockSize, s.ioWorkers, true) + parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader(""), int64(config.ReadBlockSize), s.ioWorkers, true) c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF) - parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader("h\n"), config.ReadBlockSize, s.ioWorkers, true) + parser = mydump.NewCSVParser(&cfg, mydump.NewStringReader("h\n"), int64(config.ReadBlockSize), s.ioWorkers, true) c.Assert(errors.Cause(parser.ReadRow()), Equals, io.EOF) } @@ -556,7 +556,7 @@ func (s *testMydumpCSVParserSuite) TestCRLF(c *C) { Separator: ",", Delimiter: `"`, } - parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader("a\rb\r\nc\n\n\n\nd"), config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader("a\rb\r\nc\n\n\n\nd"), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -591,7 +591,7 @@ func (s *testMydumpCSVParserSuite) TestQuotedSeparator(c *C) { Delimiter: `"`, } - parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(`",",','`), config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, mydump.NewStringReader(`",",','`), int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ RowID: 1, @@ -624,7 +624,7 @@ func (s *testMydumpCSVParserSuite) TestConsecutiveFields(c *C) { "\"\"\v", } - s.runFailingTestCases(c, &cfg, config.ReadBlockSize, testCases) + s.runFailingTestCases(c, &cfg, int64(config.ReadBlockSize), testCases) } func (s *testMydumpCSVParserSuite) TestSpecialChars(c *C) { @@ -660,7 +660,7 @@ func (s *testMydumpCSVParserSuite) TestSpecialChars(c *C) { }, } - s.runTestCases(c, &cfg, config.ReadBlockSize, testCases) + s.runTestCases(c, &cfg, int64(config.ReadBlockSize), testCases) } func (s *testMydumpCSVParserSuite) TestContinuation(c *C) { @@ -762,7 +762,7 @@ func (s *testMydumpCSVParserSuite) TestReadError(c *C) { Delimiter: `"`, } - parser := mydump.NewCSVParser(&cfg, &errorReader{}, config.ReadBlockSize, s.ioWorkers, false) + parser := mydump.NewCSVParser(&cfg, &errorReader{}, int64(config.ReadBlockSize), s.ioWorkers, false) c.Assert(parser.ReadRow(), ErrorMatches, "fake read error") } diff --git a/lightning/mydump/parquet_parser.go b/lightning/mydump/parquet_parser.go index 64470e54c..b1808fc9c 100644 --- a/lightning/mydump/parquet_parser.go +++ b/lightning/mydump/parquet_parser.go @@ -2,18 +2,19 @@ package mydump import ( "context" + "fmt" "io" "reflect" - - "go.uber.org/zap" + "time" "github.com/pingcap/br/pkg/storage" - "github.com/pingcap/errors" "github.com/pingcap/tidb-lightning/lightning/log" "github.com/pingcap/tidb/types" + "github.com/xitongsys/parquet-go/parquet" preader "github.com/xitongsys/parquet-go/reader" "github.com/xitongsys/parquet-go/source" + "go.uber.org/zap" ) const ( @@ -21,14 +22,15 @@ const ( ) type ParquetParser struct { - Reader *preader.ParquetReader - columns []string - rows []interface{} - readRows int64 - curStart int64 - curIndex int - lastRow Row - logger log.Logger + Reader *preader.ParquetReader + columns []string + columnMetas []*parquet.SchemaElement + rows []interface{} + readRows int64 + curStart int64 + curIndex int + lastRow Row + logger log.Logger } // readerWrapper is a used for implement `source.ParquetFile` @@ -82,18 +84,21 @@ func NewParquetParser( return nil, errors.Trace(err) } - columns := make([]string, 0, len(reader.Footer.Schema)) - for i, c := range reader.Footer.Schema { + columns := make([]string, 0, len(reader.Footer.Schema)-1) + columnMetas := make([]*parquet.SchemaElement, 0, len(reader.Footer.Schema)-1) + for i, c := range reader.SchemaHandler.SchemaElements { if c.GetNumChildren() == 0 { // the SchemaElement.Name is capitalized, we should use the original name columns = append(columns, reader.SchemaHandler.Infos[i].ExName) + columnMetas = append(columnMetas, c) } } return &ParquetParser{ - Reader: reader, - columns: columns, - logger: log.L(), + Reader: reader, + columns: columns, + columnMetas: columnMetas, + logger: log.L(), }, nil } @@ -166,17 +171,22 @@ func (pp *ParquetParser) ReadRow() error { pp.lastRow.Row = pp.lastRow.Row[:length] } for i := 0; i < length; i++ { - setDatumValue(&pp.lastRow.Row[i], v.Field(i)) + setDatumValue(&pp.lastRow.Row[i], v.Field(i), pp.columnMetas[i]) } return nil } -func setDatumValue(d *types.Datum, v reflect.Value) { +// convert a parquet value to Datum +// +// See: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md +func setDatumValue(d *types.Datum, v reflect.Value, meta *parquet.SchemaElement) { switch v.Kind() { case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: d.SetUint64(v.Uint()) - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + case reflect.Int8, reflect.Int16: d.SetInt64(v.Int()) + case reflect.Int32, reflect.Int64: + setDatumByInt(d, v.Int(), meta) case reflect.String: d.SetString(v.String(), "") case reflect.Float32, reflect.Float64: @@ -185,7 +195,7 @@ func setDatumValue(d *types.Datum, v reflect.Value) { if v.IsNil() { d.SetNull() } else { - setDatumValue(d, v.Elem()) + setDatumValue(d, v.Elem(), meta) } default: log.L().Fatal("unknown value", zap.Stringer("kind", v.Kind()), @@ -193,6 +203,49 @@ func setDatumValue(d *types.Datum, v reflect.Value) { } } +// when the value type is int32/int64, convert to value to target logical type in tidb +func setDatumByInt(d *types.Datum, v int64, meta *parquet.SchemaElement) { + if meta.ConvertedType == nil { + d.SetInt64(v) + return + } + switch *meta.ConvertedType { + // decimal + case parquet.ConvertedType_DECIMAL: + minLen := *meta.Scale + 1 + if v < 0 { + minLen++ + } + val := fmt.Sprintf("%0*d", minLen, v) + dotIndex := len(val) - int(*meta.Scale) + d.SetString(val[:dotIndex]+"."+val[dotIndex:], "") + case parquet.ConvertedType_DATE: + dateStr := time.Unix(v*86400, 0).Format("2006-01-02") + d.SetString(dateStr, "") + // convert all timestamp types (datetime/timestamp) to string + case parquet.ConvertedType_TIMESTAMP_MICROS: + dateStr := time.Unix(v/1e6, (v%1e6)*1e3).Format("2006-01-02 15:04:05.999") + d.SetString(dateStr, "") + case parquet.ConvertedType_TIMESTAMP_MILLIS: + dateStr := time.Unix(v/1e3, (v%1e3)*1e6).Format("2006-01-02 15:04:05.999") + d.SetString(dateStr, "") + // covert time types to string + case parquet.ConvertedType_TIME_MILLIS, parquet.ConvertedType_TIME_MICROS: + if *meta.ConvertedType == parquet.ConvertedType_TIME_MICROS { + v /= 1e3 + } + millis := v % 1e3 + v /= 1e3 + sec := v % 60 + v /= 60 + min := v % 60 + v /= 60 + d.SetString(fmt.Sprintf("%d:%d:%d.%3d", v, min, sec, millis), "") + default: + d.SetInt64(v) + } +} + func (pp *ParquetParser) LastRow() Row { return pp.lastRow } diff --git a/lightning/mydump/parquet_parser_test.go b/lightning/mydump/parquet_parser_test.go index 58bca7367..de117d044 100644 --- a/lightning/mydump/parquet_parser_test.go +++ b/lightning/mydump/parquet_parser_test.go @@ -7,10 +7,8 @@ import ( "strconv" "github.com/pingcap/br/pkg/storage" - - "github.com/pingcap/tidb/types" - . "github.com/pingcap/check" + "github.com/pingcap/tidb/types" "github.com/xitongsys/parquet-go-source/local" writer2 "github.com/xitongsys/parquet-go/writer" ) @@ -81,3 +79,128 @@ func (s testParquetParserSuite) TestParquetParser(c *C) { c.Assert(reader.ReadRow(), Equals, io.EOF) } + +func (s testParquetParserSuite) TestParquetVariousTypes(c *C) { + type Test struct { + Date int32 `parquet:"name=date, type=DATE"` + TimeMillis int32 `parquet:"name=timemillis, type=TIME_MILLIS"` + TimeMicros int64 `parquet:"name=timemicros, type=TIME_MICROS"` + TimestampMillis int64 `parquet:"name=timestampmillis, type=TIMESTAMP_MILLIS"` + TimestampMicros int64 `parquet:"name=timestampmicros, type=TIMESTAMP_MICROS"` + + Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=2, precision=9, basetype=INT32"` + Decimal2 int32 `parquet:"name=decimal2, type=DECIMAL, scale=4, precision=4, basetype=INT32"` + Decimal3 int64 `parquet:"name=decimal3, type=DECIMAL, scale=2, precision=18, basetype=INT64"` + Decimal4 string `parquet:"name=decimal4, type=DECIMAL, scale=2, precision=10, basetype=FIXED_LEN_BYTE_ARRAY, length=12"` + Decimal5 string `parquet:"name=decimal5, type=DECIMAL, scale=2, precision=20, basetype=BYTE_ARRAY"` + Decimal6 int32 `parquet:"name=decimal6, type=DECIMAL, scale=4, precision=4, basetype=INT32"` + } + + dir := c.MkDir() + // prepare data + name := "test123.parquet" + testPath := filepath.Join(dir, name) + pf, err := local.NewLocalFileWriter(testPath) + c.Assert(err, IsNil) + test := &Test{} + writer, err := writer2.NewParquetWriter(pf, test, 2) + c.Assert(err, IsNil) + + v := &Test{ + Date: 18564, //2020-10-29 + TimeMillis: 62775123, // 17:26:15.123 + TimeMicros: 62775123000, // 17:26:15.123 + TimestampMillis: 1603963672356, // 2020-10-29T17:27:52.356 + TimestampMicros: 1603963672356956, //2020-10-29T17:27:52.356956 + Decimal1: -12345678, // -123456.78 + Decimal2: 456, // 0.0456 + Decimal3: 123456789012345678, //1234567890123456.78 + Decimal4: "-12345678.09", + Decimal5: "-1234567890123456.78", + Decimal6: -1, // -0.0001 + } + c.Assert(writer.Write(v), IsNil) + c.Assert(writer.WriteStop(), IsNil) + c.Assert(pf.Close(), IsNil) + + store, err := storage.NewLocalStorage(dir) + c.Assert(err, IsNil) + r, err := store.Open(context.TODO(), name) + c.Assert(err, IsNil) + reader, err := NewParquetParser(context.TODO(), store, r, name) + c.Assert(err, IsNil) + defer reader.Close() + + c.Assert(len(reader.columns), Equals, 11) + + c.Assert(reader.ReadRow(), IsNil) + c.Assert(reader.lastRow.Row, DeepEquals, []types.Datum{ + types.NewCollationStringDatum("2020-10-29", "", 0), + types.NewCollationStringDatum("17:26:15.123", "", 0), + types.NewCollationStringDatum("17:26:15.123", "", 0), + types.NewCollationStringDatum("2020-10-29 17:27:52.356", "", 0), + types.NewCollationStringDatum("2020-10-29 17:27:52.356", "", 0), + types.NewCollationStringDatum("-123456.78", "", 0), + types.NewCollationStringDatum("0.0456", "", 0), + types.NewCollationStringDatum("1234567890123456.78", "", 0), + types.NewCollationStringDatum("-12345678.09", "", 0), + types.NewCollationStringDatum("-1234567890123456.78", "", 0), + types.NewCollationStringDatum("-0.0001", "", 0), + }) + + type TestDecimal struct { + Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=3, precision=5, basetype=INT32"` + DecimalRef *int32 `parquet:"name=decimal2, type=DECIMAL, scale=3, precision=5, basetype=INT32"` + } + + cases := [][]interface{}{ + {int32(0), "0.000"}, + {int32(1000), "1.000"}, + {int32(-1000), "-1.000"}, + {int32(999), "0.999"}, + {int32(-999), "-0.999"}, + {int32(1), "0.001"}, + {int32(-1), "-0.001"}, + } + + fileName := "test.02.parquet" + testPath = filepath.Join(dir, fileName) + pf, err = local.NewLocalFileWriter(testPath) + td := &TestDecimal{} + c.Assert(err, IsNil) + writer, err = writer2.NewParquetWriter(pf, td, 2) + c.Assert(err, IsNil) + for i, testCase := range cases { + val := testCase[0].(int32) + td.Decimal1 = val + if i%2 == 0 { + td.DecimalRef = &val + } else { + td.DecimalRef = nil + } + c.Assert(writer.Write(td), IsNil) + } + c.Assert(writer.WriteStop(), IsNil) + c.Assert(pf.Close(), IsNil) + + r, err = store.Open(context.TODO(), fileName) + c.Assert(err, IsNil) + reader, err = NewParquetParser(context.TODO(), store, r, fileName) + c.Assert(err, IsNil) + defer reader.Close() + + for i, testCase := range cases { + c.Assert(reader.ReadRow(), IsNil) + vals := []types.Datum{types.NewCollationStringDatum(testCase[1].(string), "", 0)} + if i%2 == 0 { + vals = append(vals, vals[0]) + } else { + vals = append(vals, types.Datum{}) + } + // because we always reuse the datums in reader.lastRow.Row, so we can't directly + // compare will `DeepEqual` here + eq, err := types.EqualDatums(nil, reader.lastRow.Row, vals) + c.Assert(err, IsNil) + c.Assert(eq, IsTrue) + } +} diff --git a/lightning/mydump/parser_test.go b/lightning/mydump/parser_test.go index 63f36db15..a0b285a92 100644 --- a/lightning/mydump/parser_test.go +++ b/lightning/mydump/parser_test.go @@ -66,7 +66,7 @@ func (s *testMydumpParserSuite) TestReadRow(c *C) { "insert another_table values (10,11e1,12, '(13)', '(', 14, ')');", ) - parser := mydump.NewChunkParser(mysql.ModeNone, reader, config.ReadBlockSize, s.ioWorkers) + parser := mydump.NewChunkParser(mysql.ModeNone, reader, int64(config.ReadBlockSize), s.ioWorkers) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.LastRow(), DeepEquals, mydump.Row{ @@ -136,7 +136,7 @@ func (s *testMydumpParserSuite) TestReadChunks(c *C) { INSERT foo VALUES (29,30,31,32),(33,34,35,36); `) - parser := mydump.NewChunkParser(mysql.ModeNone, reader, config.ReadBlockSize, s.ioWorkers) + parser := mydump.NewChunkParser(mysql.ModeNone, reader, int64(config.ReadBlockSize), s.ioWorkers) chunks, err := mydump.ReadChunks(parser, 32) c.Assert(err, IsNil) @@ -182,7 +182,7 @@ func (s *testMydumpParserSuite) TestNestedRow(c *C) { ("789",CONVERT("[]" USING UTF8MB4)); `) - parser := mydump.NewChunkParser(mysql.ModeNone, reader, config.ReadBlockSize, s.ioWorkers) + parser := mydump.NewChunkParser(mysql.ModeNone, reader, int64(config.ReadBlockSize), s.ioWorkers) chunks, err := mydump.ReadChunks(parser, 96) c.Assert(err, IsNil) @@ -349,7 +349,7 @@ func (s *testMydumpParserSuite) TestVariousSyntax(c *C) { }, } - s.runTestCases(c, mysql.ModeNone, config.ReadBlockSize, testCases) + s.runTestCases(c, mysql.ModeNone, int64(config.ReadBlockSize), testCases) } func (s *testMydumpParserSuite) TestContinuation(c *C) { @@ -415,7 +415,7 @@ func (s *testMydumpParserSuite) TestPseudoKeywords(c *C) { ) VALUES (); `) - parser := mydump.NewChunkParser(mysql.ModeNone, reader, config.ReadBlockSize, s.ioWorkers) + parser := mydump.NewChunkParser(mysql.ModeNone, reader, int64(config.ReadBlockSize), s.ioWorkers) c.Assert(parser.ReadRow(), IsNil) c.Assert(parser.Columns(), DeepEquals, []string{ "c", "c", @@ -482,7 +482,7 @@ func (s *testMydumpParserSuite) TestSyntaxError(c *C) { "/* ...", } - s.runFailingTestCases(c, mysql.ModeNone, config.ReadBlockSize, inputs) + s.runFailingTestCases(c, mysql.ModeNone, int64(config.ReadBlockSize), inputs) } // Various syntax error cases collected via fuzzing. diff --git a/lightning/mydump/region.go b/lightning/mydump/region.go index 54736f3e0..d40c213b0 100644 --- a/lightning/mydump/region.go +++ b/lightning/mydump/region.go @@ -160,7 +160,7 @@ func MakeTableRegions( // If a csv file is overlarge, we need to split it into multiple regions. // Note: We can only split a csv file whose format is strict. - if isCsvFile && dataFileSize > cfg.Mydumper.MaxRegionSize && cfg.Mydumper.StrictFormat { + if isCsvFile && dataFileSize > int64(cfg.Mydumper.MaxRegionSize) && cfg.Mydumper.StrictFormat { var ( regions []*TableRegion subFileSizes []float64 @@ -197,7 +197,7 @@ func MakeTableRegions( } log.L().Debug("in makeTableRegions", - zap.Int64("maxRegionSize", cfg.Mydumper.MaxRegionSize), + zap.Int64("maxRegionSize", int64(cfg.Mydumper.MaxRegionSize)), zap.Int("len fileRegions", len(filesRegions))) AllocateEngineIDs(filesRegions, dataFileSizes, float64(cfg.Mydumper.BatchSize), cfg.Mydumper.BatchImportRatio, float64(cfg.App.TableConcurrency)) @@ -257,7 +257,7 @@ func SplitLargeFile( ioWorker *worker.Pool, store storage.ExternalStorage, ) (prevRowIdMax int64, regions []*TableRegion, dataFileSizes []float64, err error) { - maxRegionSize := cfg.Mydumper.MaxRegionSize + maxRegionSize := int64(cfg.Mydumper.MaxRegionSize) dataFileSizes = make([]float64, 0, dataFile.Size/maxRegionSize+1) startOffset, endOffset := int64(0), maxRegionSize var columns []string @@ -266,7 +266,7 @@ func SplitLargeFile( if err != nil { return 0, nil, nil, err } - parser := NewCSVParser(&cfg.Mydumper.CSV, r, cfg.Mydumper.ReadBlockSize, ioWorker, true) + parser := NewCSVParser(&cfg.Mydumper.CSV, r, int64(cfg.Mydumper.ReadBlockSize), ioWorker, true) if err = parser.ReadColumns(); err != nil { return 0, nil, nil, err } @@ -282,7 +282,7 @@ func SplitLargeFile( if err != nil { return 0, nil, nil, err } - parser := NewCSVParser(&cfg.Mydumper.CSV, r, cfg.Mydumper.ReadBlockSize, ioWorker, false) + parser := NewCSVParser(&cfg.Mydumper.CSV, r, int64(cfg.Mydumper.ReadBlockSize), ioWorker, false) if err = parser.SetPos(endOffset, prevRowIdMax); err != nil { return 0, nil, nil, err } diff --git a/lightning/mydump/region_test.go b/lightning/mydump/region_test.go index bfb41beaa..4abe0ba1c 100644 --- a/lightning/mydump/region_test.go +++ b/lightning/mydump/region_test.go @@ -216,7 +216,7 @@ func (s *testMydumpRegionSuite) TestSplitLargeFile(c *C) { colCnt := int64(3) columns := []string{"a", "b", "c"} for _, tc := range []struct { - maxRegionSize int64 + maxRegionSize config.ByteSize chkCnt int offsets [][]int64 }{ diff --git a/lightning/mydump/router.go b/lightning/mydump/router.go index 93e5b9240..74c687bfa 100644 --- a/lightning/mydump/router.go +++ b/lightning/mydump/router.go @@ -176,6 +176,9 @@ type regexRouterParser struct{} func (p regexRouterParser) Parse(r *config.FileRouteRule) (*RegexRouter, error) { rule := &RegexRouter{} + if r.Path == "" && r.Pattern == "" { + return nil, errors.New("`path` and `pattern` must not be both empty in [[mydumper.files]]") + } if r.Path != "" && r.Pattern != "" { return nil, errors.New("can't set both `path` and `pattern` field in [[mydumper.files]]") } diff --git a/lightning/mydump/router_test.go b/lightning/mydump/router_test.go index 3be8e6e66..9b06e2fb0 100644 --- a/lightning/mydump/router_test.go +++ b/lightning/mydump/router_test.go @@ -40,6 +40,37 @@ func (t *testFileRouterSuite) TestRouteParser(c *C) { } } +func (t *testFileRouterSuite) TestInvalidRouteRule(c *C) { + rule := &config.FileRouteRule{} + rules := []*config.FileRouteRule{rule} + _, err := NewFileRouter(rules) + c.Assert(err, ErrorMatches, "`path` and `pattern` must not be both empty in \\[\\[mydumper.files\\]\\]") + + rule.Pattern = `^(?:[^/]*/)*([^/.]+)\.(?P[^./]+)(?:\.(?P[0-9]+))?\.(?Pcsv|sql)(?:\.(?P[A-Za-z0-9]+))?$` + _, err = NewFileRouter(rules) + c.Assert(err, ErrorMatches, "field 'type' match pattern can't be empty") + + rule.Type = "$type" + _, err = NewFileRouter(rules) + c.Assert(err, ErrorMatches, "field 'schema' match pattern can't be empty") + + rule.Schema = "$schema" + _, err = NewFileRouter(rules) + c.Assert(err, ErrorMatches, "invalid named capture '\\$schema'") + + rule.Schema = "$1" + _, err = NewFileRouter(rules) + c.Assert(err, ErrorMatches, "field 'table' match pattern can't be empty") + + rule.Table = "$table" + _, err = NewFileRouter(rules) + c.Assert(err, IsNil) + + rule.Path = "/tmp/1.sql" + _, err = NewFileRouter(rules) + c.Assert(err, ErrorMatches, "can't set both `path` and `pattern` field in \\[\\[mydumper.files\\]\\]") +} + func (t *testFileRouterSuite) TestSingleRouteRule(c *C) { rules := []*config.FileRouteRule{ {Pattern: `^(?:[^/]*/)*([^/.]+)\.(?P
[^./]+)(?:\.(?P[0-9]+))?\.(?Pcsv|sql)(?:\.(?P[A-Za-z0-9]+))?$`, Schema: "$1", Table: "$table", Type: "$type", Key: "$key", Compression: "$cp"}, diff --git a/lightning/restore/checksum.go b/lightning/restore/checksum.go index ce07032bc..6ba0a781d 100644 --- a/lightning/restore/checksum.go +++ b/lightning/restore/checksum.go @@ -6,8 +6,11 @@ import ( "database/sql" "fmt" "sync" + "sync/atomic" "time" + "github.com/google/uuid" + "github.com/pingcap/br/pkg/checksum" "github.com/pingcap/errors" "github.com/pingcap/failpoint" @@ -27,7 +30,10 @@ import ( const ( preUpdateServiceSafePointFactor = 3 - serviceSafePointTTL = 10 * 60 // 10 min in seconds +) + +var ( + serviceSafePointTTL int64 = 10 * 60 // 10 min in seconds ) // RemoteChecksum represents a checksum result got from tidb. @@ -78,7 +84,11 @@ func newChecksumManager(rc *RestoreController) (ChecksumManager, error) { manager = newTiKVChecksumManager(store.(tikv.Storage).GetClient(), pdCli) } else { - manager = newTiDBChecksumExecutor(rc.tidbMgr.db) + db, err := rc.tidbGlue.GetDB() + if err != nil { + return nil, errors.Trace(err) + } + manager = newTiDBChecksumExecutor(db) } return manager, nil @@ -132,7 +142,7 @@ func (e *tidbChecksumExecutor) Checksum(ctx context.Context, tableInfo *TidbTabl // DoChecksum do checksum for tables. // table should be in .
, format. e.g. foo.bar -func DoChecksum(ctx context.Context, db *sql.DB, table *TidbTableInfo) (*RemoteChecksum, error) { +func DoChecksum(ctx context.Context, table *TidbTableInfo) (*RemoteChecksum, error) { var err error manager, ok := ctx.Value(&checksumManagerKey).(ChecksumManager) if !ok { @@ -242,10 +252,8 @@ type tikvChecksumManager struct { // newTiKVChecksumManager return a new tikv checksum manager func newTiKVChecksumManager(client kv.Client, pdClient pd.Client) *tikvChecksumManager { return &tikvChecksumManager{ - client: client, - manager: gcTTLManager{ - pdClient: pdClient, - }, + client: client, + manager: newGCTTLManager(pdClient), } } @@ -316,9 +324,22 @@ type gcTTLManager struct { tableGCSafeTS []*tableChecksumTS currentTs uint64 serviceID string + // 0 for not start, otherwise started + started uint32 +} + +func newGCTTLManager(pdClient pd.Client) gcTTLManager { + return gcTTLManager{ + pdClient: pdClient, + serviceID: fmt.Sprintf("lightning-%s", uuid.New()), + } } func (m *gcTTLManager) addOneJob(ctx context.Context, table string, ts uint64) error { + // start gc ttl loop if not started yet. + if atomic.CompareAndSwapUint32(&m.started, 0, 1) { + m.start(ctx) + } m.lock.Lock() defer m.lock.Unlock() var curTs uint64 diff --git a/lightning/restore/checksum_test.go b/lightning/restore/checksum_test.go index ba1ae234d..241080dcb 100644 --- a/lightning/restore/checksum_test.go +++ b/lightning/restore/checksum_test.go @@ -4,9 +4,21 @@ import ( "context" "database/sql" "fmt" + "sort" + "strings" "sync" + "sync/atomic" "time" + "github.com/pingcap/parser" + "github.com/pingcap/parser/ast" + "github.com/pingcap/tidb/ddl" + "github.com/pingcap/tidb/store/tikv/oracle" + tmock "github.com/pingcap/tidb/util/mock" + + "github.com/pingcap/tidb/kv" + "github.com/pingcap/tipb/go-tipb" + pd "github.com/tikv/pd/client" "github.com/DATA-DOG/go-sqlmock" @@ -46,7 +58,7 @@ func (s *checksumSuite) TestDoChecksum(c *C) { mock.ExpectClose() ctx := MockDoChecksumCtx(db) - checksum, err := DoChecksum(ctx, db, &TidbTableInfo{DB: "test", Name: "t"}) + checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) c.Assert(err, IsNil) c.Assert(*checksum, DeepEquals, RemoteChecksum{ Schema: "test", @@ -91,7 +103,7 @@ func (s *checksumSuite) TestDoChecksumParallel(c *C) { for i := 0; i < 5; i++ { go func() { defer wg.Done() - checksum, err := DoChecksum(ctx, db, &TidbTableInfo{DB: "test", Name: "t"}) + checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) c.Assert(err, IsNil) c.Assert(*checksum, DeepEquals, RemoteChecksum{ Schema: "test", @@ -130,7 +142,7 @@ func (s *checksumSuite) TestIncreaseGCLifeTimeFail(c *C) { wg.Add(5) for i := 0; i < 5; i++ { go func() { - _, err = DoChecksum(ctx, db, &TidbTableInfo{DB: "test", Name: "t"}) + _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) c.Assert(err, ErrorMatches, "update GC lifetime failed: update gc error: context canceled") wg.Done() }() @@ -144,6 +156,33 @@ func (s *checksumSuite) TestIncreaseGCLifeTimeFail(c *C) { c.Assert(mock.ExpectationsWereMet(), IsNil) } +func (s *checksumSuite) TestDoChecksumWithTikv(c *C) { + // set up mock tikv checksum manager + pdClient := &testPDClient{} + resp := tipb.ChecksumResponse{Checksum: 123, TotalKvs: 10, TotalBytes: 1000} + kvClient := &mockChecksumKVClient{checksum: resp, respDur: time.Second * 5} + checksumExec := &tikvChecksumManager{manager: newGCTTLManager(pdClient), client: kvClient} + + // mock a table info + p := parser.New() + se := tmock.NewContext() + node, err := p.ParseOneStmt("CREATE TABLE `t1` (`c1` varchar(5) NOT NULL)", "utf8mb4", "utf8mb4_bin") + c.Assert(err, IsNil) + tableInfo, err := ddl.MockTableInfo(se, node.(*ast.CreateTableStmt), 999) + c.Assert(err, IsNil) + + startTs := oracle.ComposeTS(time.Now().Unix()*1000, 0) + ctx := context.WithValue(context.Background(), &checksumManagerKey, checksumExec) + _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t", Core: tableInfo}) + c.Assert(err, IsNil) + + // after checksum, safepint should be small than start ts + ts := pdClient.currentSafePoint() + // 1ms for the schedule deviation + c.Assert(ts <= startTs+1, IsTrue) + c.Assert(atomic.LoadUint32(&checksumExec.manager.started) > 0, IsTrue) +} + func (s *checksumSuite) TestDoChecksumWithErrorAndLongOriginalLifetime(c *C) { db, mock, err := sqlmock.New() c.Assert(err, IsNil) @@ -158,23 +197,96 @@ func (s *checksumSuite) TestDoChecksumWithErrorAndLongOriginalLifetime(c *C) { mock.ExpectClose() ctx := MockDoChecksumCtx(db) - _, err = DoChecksum(ctx, db, &TidbTableInfo{DB: "test", Name: "t"}) + _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) c.Assert(err, ErrorMatches, "compute remote checksum failed: mock syntax error.*") c.Assert(db.Close(), IsNil) c.Assert(mock.ExpectationsWereMet(), IsNil) } +type safePointTTL struct { + safePoint uint64 + // ttl is the last timestamp this safe point is valid + ttl int64 +} + type testPDClient struct { + sync.Mutex pd.Client + count int32 + gcSafePoint []safePointTTL +} + +func (c *testPDClient) currentSafePoint() uint64 { + ts := time.Now().Unix() + c.Lock() + defer c.Unlock() + for _, s := range c.gcSafePoint { + if s.ttl > ts { + return s.safePoint + } + } + return 0 } func (c *testPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) { - return 0, nil + if !strings.HasPrefix(serviceID, "lightning") { + panic("service ID must start with 'lightning'") + } + atomic.AddInt32(&c.count, 1) + c.Lock() + idx := sort.Search(len(c.gcSafePoint), func(i int) bool { + return c.gcSafePoint[i].safePoint >= safePoint + }) + sp := c.gcSafePoint + ttlEnd := time.Now().Unix() + ttl + spTTL := safePointTTL{safePoint: safePoint, ttl: ttlEnd} + switch { + case idx >= len(sp): + c.gcSafePoint = append(c.gcSafePoint, spTTL) + case sp[idx].safePoint == safePoint: + if ttlEnd > sp[idx].ttl { + sp[idx].ttl = ttlEnd + } + default: + c.gcSafePoint = append(append(sp[:idx], spTTL), sp[idx:]...) + } + c.Unlock() + return c.currentSafePoint(), nil +} + +func (s *checksumSuite) TestGcTTLManagerSingle(c *C) { + pdClient := &testPDClient{} + manager := newGCTTLManager(pdClient) + c.Assert(manager.serviceID, Not(Equals), "") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + oldTTL := serviceSafePointTTL + // set serviceSafePointTTL to 3 second, so lightning will update it in each 1 seconds. + serviceSafePointTTL = 3 + defer func() { + serviceSafePointTTL = oldTTL + }() + + err := manager.addOneJob(ctx, "test", uint64(time.Now().Unix())) + c.Assert(err, IsNil) + + time.Sleep(6*time.Second + 10*time.Millisecond) + + // after 6 seconds, must at least update 5 times + val := atomic.LoadInt32(&pdClient.count) + c.Assert(val, GreaterEqual, int32(5)) + + // after remove the job, there are no job remain, gc ttl needn't to be updated + manager.removeOneJob("test") + time.Sleep(10 * time.Millisecond) + val = atomic.LoadInt32(&pdClient.count) + time.Sleep(3*time.Second + 10*time.Millisecond) + c.Assert(atomic.LoadInt32(&pdClient.count), Equals, val) } -func (s *checksumSuite) TestGcTTLManager(c *C) { - manager := gcTTLManager{pdClient: &testPDClient{}} +func (s *checksumSuite) TestGcTTLManagerMulti(c *C) { + manager := newGCTTLManager(&testPDClient{}) ctx := context.Background() for i := uint64(1); i <= 5; i++ { @@ -198,3 +310,60 @@ func (s *checksumSuite) TestGcTTLManager(c *C) { manager.removeOneJob("test5") c.Assert(manager.currentTs, Equals, uint64(0)) } + +func (s *checksumSuite) TestPdServiceID(c *C) { + pdCli := &testPDClient{} + gcTTLManager1 := newGCTTLManager(pdCli) + c.Assert(gcTTLManager1.serviceID, Matches, "lightning-.*") + gcTTLManager2 := newGCTTLManager(pdCli) + c.Assert(gcTTLManager2.serviceID, Matches, "lightning-.*") + + c.Assert(gcTTLManager1.serviceID != gcTTLManager2.serviceID, IsTrue) +} + +type mockResponse struct { + finished bool + data []byte +} + +func (r *mockResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) { + if r.finished { + return nil, nil + } + r.finished = true + return &mockResultSubset{data: r.data}, nil +} +func (r *mockResponse) Close() error { + return nil +} + +type mockResultSubset struct { + data []byte +} + +func (r *mockResultSubset) GetData() []byte { + return r.data +} + +func (r *mockResultSubset) GetStartKey() kv.Key { + return []byte{} +} +func (r *mockResultSubset) MemSize() int64 { + return 0 +} +func (r *mockResultSubset) RespTime() time.Duration { + return time.Millisecond +} + +type mockChecksumKVClient struct { + kv.Client + checksum tipb.ChecksumResponse + respDur time.Duration +} + +// a mock client for checksum request +func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars *kv.Variables) kv.Response { + data, _ := c.checksum.Marshal() + time.Sleep(c.respDur) + return &mockResponse{data: data} +} diff --git a/lightning/restore/restore.go b/lightning/restore/restore.go index 7df375da8..011d7c4b7 100644 --- a/lightning/restore/restore.go +++ b/lightning/restore/restore.go @@ -15,7 +15,6 @@ package restore import ( "context" - "database/sql" "fmt" "io" "math" @@ -31,6 +30,7 @@ import ( "github.com/pingcap/failpoint" sstpb "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/parser/model" + "github.com/pingcap/tidb-lightning/lightning/glue" tidbcfg "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/meta/autoid" "github.com/pingcap/tidb/table" @@ -138,7 +138,7 @@ type RestoreController struct { ioWorkers *worker.Pool pauser *common.Pauser backend kv.Backend - tidbMgr *TiDBManager + tidbGlue glue.Glue postProcessLock sync.Mutex // a simple way to ensure post-processing is not concurrent without using complicated goroutines alterTableLock sync.Mutex compactState int32 @@ -156,8 +156,14 @@ type RestoreController struct { checksumManager ChecksumManager } -func NewRestoreController(ctx context.Context, dbMetas []*mydump.MDDatabaseMeta, cfg *config.Config, s storage.ExternalStorage) (*RestoreController, error) { - return NewRestoreControllerWithPauser(ctx, dbMetas, cfg, s, DeliverPauser) +func NewRestoreController( + ctx context.Context, + dbMetas []*mydump.MDDatabaseMeta, + cfg *config.Config, + s storage.ExternalStorage, + g glue.Glue, +) (*RestoreController, error) { + return NewRestoreControllerWithPauser(ctx, dbMetas, cfg, s, DeliverPauser, g) } func NewRestoreControllerWithPauser( @@ -166,16 +172,14 @@ func NewRestoreControllerWithPauser( cfg *config.Config, s storage.ExternalStorage, pauser *common.Pauser, + g glue.Glue, ) (*RestoreController, error) { tls, err := cfg.ToTLS() if err != nil { return nil, err } - if err = cfg.TiDB.Security.RegisterMySQL(); err != nil { - return nil, err - } - cpdb, err := OpenCheckpointsDB(ctx, cfg) + cpdb, err := g.OpenCheckpointsDB(ctx, cfg) if err != nil { return nil, errors.Trace(err) } @@ -188,11 +192,6 @@ func NewRestoreControllerWithPauser( return nil, errors.Trace(err) } - tidbMgr, err := NewTiDBManager(cfg.TiDB, tls) - if err != nil { - return nil, errors.Trace(err) - } - var backend kv.Backend switch cfg.TikvImporter.Backend { case config.BackendImporter: @@ -202,11 +201,15 @@ func NewRestoreControllerWithPauser( return nil, err } case config.BackendTiDB: - backend = kv.NewTiDBBackend(tidbMgr.db, cfg.TikvImporter.OnDuplicate) + db, err := DBFromConfig(cfg.TiDB) + if err != nil { + return nil, errors.Trace(err) + } + backend = kv.NewTiDBBackend(db, cfg.TikvImporter.OnDuplicate) case config.BackendLocal: - backend, err = kv.NewLocalBackend(ctx, tls, cfg.TiDB.PdAddr, cfg.TikvImporter.RegionSplitSize, + backend, err = kv.NewLocalBackend(ctx, tls, cfg.TiDB.PdAddr, int64(cfg.TikvImporter.RegionSplitSize), cfg.TikvImporter.SortedKVDir, cfg.TikvImporter.RangeConcurrency, cfg.TikvImporter.SendKVPairs, - cfg.Checkpoint.Enable) + cfg.Checkpoint.Enable, g) if err != nil { return nil, err } @@ -223,7 +226,7 @@ func NewRestoreControllerWithPauser( ioWorkers: worker.NewPool(ctx, cfg.App.IOConcurrency, "io"), pauser: pauser, backend: backend, - tidbMgr: tidbMgr, + tidbGlue: g, rowFormatVer: "1", tls: tls, @@ -238,35 +241,9 @@ func NewRestoreControllerWithPauser( return rc, nil } -func OpenCheckpointsDB(ctx context.Context, cfg *config.Config) (CheckpointsDB, error) { - if !cfg.Checkpoint.Enable { - return NewNullCheckpointsDB(), nil - } - - switch cfg.Checkpoint.Driver { - case config.CheckpointDriverMySQL: - db, err := sql.Open("mysql", cfg.Checkpoint.DSN) - if err != nil { - return nil, errors.Trace(err) - } - cpdb, err := NewMySQLCheckpointsDB(ctx, db, cfg.Checkpoint.Schema, cfg.TaskID) - if err != nil { - db.Close() - return nil, errors.Trace(err) - } - return cpdb, nil - - case config.CheckpointDriverFile: - return NewFileCheckpointsDB(cfg.Checkpoint.DSN), nil - - default: - return nil, errors.Errorf("Unknown checkpoint driver %s", cfg.Checkpoint.Driver) - } -} - func (rc *RestoreController) Close() { rc.backend.Close() - rc.tidbMgr.Close() + rc.tidbGlue.GetSQLExecutor().Close() } func (rc *RestoreController) Run(ctx context.Context) error { @@ -317,14 +294,15 @@ outside: } func (rc *RestoreController) restoreSchema(ctx context.Context) error { - tidbMgr, err := NewTiDBManager(rc.cfg.TiDB, rc.tls) - if err != nil { - return errors.Trace(err) - } - defer tidbMgr.Close() - if !rc.cfg.Mydumper.NoSchema { - tidbMgr.db.ExecContext(ctx, "SET SQL_MODE = ?", rc.cfg.TiDB.StrSQLMode) + if rc.tidbGlue.OwnsSQLExecutor() { + db, err := DBFromConfig(rc.cfg.TiDB) + if err != nil { + return errors.Trace(err) + } + defer db.Close() + db.ExecContext(ctx, "SET SQL_MODE = ?", rc.cfg.TiDB.StrSQLMode) + } for _, dbMeta := range rc.dbMetas { task := log.With(zap.String("db", dbMeta.Name)).Begin(zap.InfoLevel, "restore table schema") @@ -333,7 +311,7 @@ func (rc *RestoreController) restoreSchema(ctx context.Context) error { for _, tblMeta := range dbMeta.Tables { tablesSchema[tblMeta.Name] = tblMeta.GetSchema(ctx, rc.store) } - err = tidbMgr.InitSchema(ctx, dbMeta.Name, tablesSchema) + err := InitSchema(ctx, rc.tidbGlue, dbMeta.Name, tablesSchema) task.End(zap.ErrorLevel, err) if err != nil { @@ -341,7 +319,11 @@ func (rc *RestoreController) restoreSchema(ctx context.Context) error { } } } - dbInfos, err := tidbMgr.LoadSchemaInfo(ctx, rc.dbMetas, rc.backend.FetchRemoteTableModels) + getTableFunc := rc.backend.FetchRemoteTableModels + if !rc.tidbGlue.OwnsSQLExecutor() { + getTableFunc = rc.tidbGlue.GetTables + } + dbInfos, err := LoadSchemaInfo(ctx, rc.dbMetas, getTableFunc) if err != nil { return errors.Trace(err) } @@ -359,7 +341,7 @@ func (rc *RestoreController) restoreSchema(ctx context.Context) error { go rc.listenCheckpointUpdates() - rc.rowFormatVer = ObtainRowFormatVersion(ctx, tidbMgr.db) + rc.rowFormatVer = ObtainRowFormatVersion(ctx, rc.tidbGlue.GetSQLExecutor()) // Estimate the number of chunks for progress reporting err = rc.estimateChunkCountIntoMetrics(ctx) @@ -451,7 +433,7 @@ func (rc *RestoreController) estimateChunkCountIntoMetrics(ctx context.Context) } if fileMeta.FileMeta.Type == mydump.SourceTypeCSV { cfg := rc.cfg.Mydumper - if fileMeta.Size > cfg.MaxRegionSize && cfg.StrictFormat && !cfg.CSV.Header { + if fileMeta.Size > int64(cfg.MaxRegionSize) && cfg.StrictFormat && !cfg.CSV.Header { estimatedChunkCount += math.Round(float64(fileMeta.Size) / float64(cfg.MaxRegionSize)) } else { estimatedChunkCount += 1 @@ -463,6 +445,7 @@ func (rc *RestoreController) estimateChunkCountIntoMetrics(ctx context.Context) } } metric.ChunkCounter.WithLabelValues(metric.ChunkStateEstimated).Add(estimatedChunkCount) + rc.tidbGlue.Record(glue.RecordEstimatedChunk, uint64(estimatedChunkCount)) return nil } @@ -571,6 +554,9 @@ func (rc *RestoreController) runPeriodicActions(ctx context.Context, stop <-chan logProgressTicker := time.NewTicker(rc.cfg.Cron.LogProgress.Duration) defer logProgressTicker.Stop() + glueProgressTicker := time.NewTicker(3 * time.Second) + defer glueProgressTicker.Stop() + var switchModeChan <-chan time.Time // tide backend don't need to switch tikv to import mode if rc.cfg.TikvImporter.Backend != config.BackendTiDB { @@ -630,6 +616,9 @@ func (rc *RestoreController) runPeriodicActions(ctx context.Context, stop <-chan zap.String("state", state), remaining, ) + case <-glueProgressTicker.C: + finished := metric.ReadCounter(metric.ChunkCounter.WithLabelValues(metric.ChunkStateFinished)) + rc.tidbGlue.Record(glue.RecordFinishedChunk, uint64(finished)) } } } @@ -1167,10 +1156,10 @@ func (t *TableRestore) postProcess(ctx context.Context, rc *RestoreController, c tblInfo := t.tableInfo.Core var err error if tblInfo.PKIsHandle && tblInfo.ContainsAutoRandomBits() { - err = AlterAutoRandom(ctx, rc.tidbMgr.db, t.tableName, t.alloc.Get(autoid.AutoRandomType).Base()+1) + err = AlterAutoRandom(ctx, rc.tidbGlue.GetSQLExecutor(), t.tableName, t.alloc.Get(autoid.AutoRandomType).Base()+1) } else if common.TableHasAutoRowID(tblInfo) || tblInfo.GetAutoIncrementColInfo() != nil { // only alter auto increment id iff table contains auto-increment column or generated handle - err = AlterAutoIncrement(ctx, rc.tidbMgr.db, t.tableName, t.alloc.Get(autoid.RowIDAllocType).Base()+1) + err = AlterAutoIncrement(ctx, rc.tidbGlue.GetSQLExecutor(), t.tableName, t.alloc.Get(autoid.RowIDAllocType).Base()+1) } rc.alterTableLock.Unlock() rc.saveStatusCheckpoint(t.tableName, WholeTableEngineID, err, CheckpointStatusAlteredAutoInc) @@ -1200,7 +1189,7 @@ func (t *TableRestore) postProcess(ctx context.Context, rc *RestoreController, c t.logger.Info("skip checksum") rc.saveStatusCheckpoint(t.tableName, WholeTableEngineID, nil, CheckpointStatusChecksumSkipped) } else { - err := t.compareChecksum(ctx, rc.tidbMgr.db, localChecksum) + err := t.compareChecksum(ctx, localChecksum) // witch post restore level 'optional', we will skip checksum error if rc.cfg.PostRestore.Checksum == config.OpLevelOptional { if err != nil { @@ -1221,7 +1210,7 @@ func (t *TableRestore) postProcess(ctx context.Context, rc *RestoreController, c t.logger.Info("skip analyze") rc.saveStatusCheckpoint(t.tableName, WholeTableEngineID, nil, CheckpointStatusAnalyzeSkipped) } else { - err := t.analyzeTable(ctx, rc.tidbMgr.db) + err := t.analyzeTable(ctx, rc.tidbGlue.GetSQLExecutor()) // witch post restore level 'optional', we will skip analyze error if rc.cfg.PostRestore.Analyze == config.OpLevelOptional { if err != nil { @@ -1311,7 +1300,7 @@ func (rc *RestoreController) checkRequirements(_ context.Context) error { func (rc *RestoreController) setGlobalVariables(ctx context.Context) error { // set new collation flag base on tidb config - enabled := ObtainNewCollationEnabled(ctx, rc.tidbMgr.db) + enabled := ObtainNewCollationEnabled(ctx, rc.tidbGlue.GetSQLExecutor()) // we should enable/disable new collation here since in server mode, tidb config // may be different in different tasks collate.SetNewCollationEnabledForTest(enabled) @@ -1366,7 +1355,7 @@ func newChunkRestore( store storage.ExternalStorage, tableInfo *TidbTableInfo, ) (*chunkRestore, error) { - blockBufSize := cfg.Mydumper.ReadBlockSize + blockBufSize := int64(cfg.Mydumper.ReadBlockSize) reader, err := store.Open(ctx, chunk.Key.Path) if err != nil { @@ -1620,8 +1609,8 @@ func (tr *TableRestore) importKV(ctx context.Context, closedEngine *kv.ClosedEng } // do checksum for each table. -func (tr *TableRestore) compareChecksum(ctx context.Context, db *sql.DB, localChecksum verify.KVChecksum) error { - remoteChecksum, err := DoChecksum(ctx, db, tr.tableInfo) +func (tr *TableRestore) compareChecksum(ctx context.Context, localChecksum verify.KVChecksum) error { + remoteChecksum, err := DoChecksum(ctx, tr.tableInfo) if err != nil { return errors.Trace(err) } @@ -1640,10 +1629,9 @@ func (tr *TableRestore) compareChecksum(ctx context.Context, db *sql.DB, localCh return nil } -func (tr *TableRestore) analyzeTable(ctx context.Context, db *sql.DB) error { +func (tr *TableRestore) analyzeTable(ctx context.Context, g glue.SQLExecutor) error { task := tr.logger.Begin(zap.InfoLevel, "analyze") - err := common.SQLWithRetry{DB: db, Logger: tr.logger}. - Exec(ctx, "analyze table", "ANALYZE TABLE "+tr.tableName) + err := g.ExecuteWithLog(ctx, "ANALYZE TABLE "+tr.tableName, "analyze table", tr.logger) task.End(zap.ErrorLevel, err) return err } @@ -1904,6 +1892,8 @@ func (cr *chunkRestore) restore( SQLMode: rc.cfg.TiDB.SQLMode, Timestamp: cr.chunk.Timestamp, RowFormatVersion: rc.rowFormatVer, + // use chunk.PrevRowIDMax as the auto random seed, so it can stay the same value after recover from checkpoint. + AutoRandomSeed: cr.chunk.Chunk.PrevRowIDMax, }) kvsCh := make(chan []deliveredKVs, maxKVQueueSize) deliverCompleteCh := make(chan deliverResult) diff --git a/lightning/restore/restore_test.go b/lightning/restore/restore_test.go index e5afbd676..e94613f01 100644 --- a/lightning/restore/restore_test.go +++ b/lightning/restore/restore_test.go @@ -22,6 +22,7 @@ import ( "github.com/DATA-DOG/go-sqlmock" "github.com/golang/mock/gomock" + "github.com/google/uuid" "github.com/pingcap/br/pkg/storage" . "github.com/pingcap/check" "github.com/pingcap/errors" @@ -31,10 +32,10 @@ import ( "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb-lightning/lightning/glue" filter "github.com/pingcap/tidb-tools/pkg/table-filter" "github.com/pingcap/tidb/ddl" tmock "github.com/pingcap/tidb/util/mock" - uuid "github.com/satori/go.uuid" kv "github.com/pingcap/tidb-lightning/lightning/backend" "github.com/pingcap/tidb-lightning/lightning/checkpoints" @@ -660,7 +661,7 @@ func (s *tableRestoreSuite) TestCompareChecksumSuccess(c *C) { mock.ExpectClose() ctx := MockDoChecksumCtx(db) - err = s.tr.compareChecksum(ctx, db, verification.MakeKVChecksum(1234567, 12345, 1234567890)) + err = s.tr.compareChecksum(ctx, verification.MakeKVChecksum(1234567, 12345, 1234567890)) c.Assert(err, IsNil) c.Assert(db.Close(), IsNil) @@ -688,7 +689,7 @@ func (s *tableRestoreSuite) TestCompareChecksumFailure(c *C) { mock.ExpectClose() ctx := MockDoChecksumCtx(db) - err = s.tr.compareChecksum(ctx, db, verification.MakeKVChecksum(9876543, 54321, 1357924680)) + err = s.tr.compareChecksum(ctx, verification.MakeKVChecksum(9876543, 54321, 1357924680)) c.Assert(err, ErrorMatches, "checksum mismatched.*") c.Assert(db.Close(), IsNil) @@ -704,7 +705,10 @@ func (s *tableRestoreSuite) TestAnalyzeTable(c *C) { mock.ExpectClose() ctx := context.Background() - err = s.tr.analyzeTable(ctx, db) + defaultSQLMode, err := mysql.GetSQLMode(mysql.DefaultSQLMode) + c.Assert(err, IsNil) + g := glue.NewExternalTiDBGlue(db, defaultSQLMode) + err = s.tr.analyzeTable(ctx, g) c.Assert(err, IsNil) c.Assert(db.Close(), IsNil) @@ -718,7 +722,7 @@ func (s *tableRestoreSuite) TestImportKVSuccess(c *C) { importer := kv.MakeBackend(mockBackend) ctx := context.Background() - engineUUID := uuid.NewV4() + engineUUID := uuid.New() mockBackend.EXPECT(). CloseEngine(ctx, engineUUID). @@ -743,7 +747,7 @@ func (s *tableRestoreSuite) TestImportKVFailure(c *C) { importer := kv.MakeBackend(mockBackend) ctx := context.Background() - engineUUID := uuid.NewV4() + engineUUID := uuid.New() mockBackend.EXPECT(). CloseEngine(ctx, engineUUID). diff --git a/lightning/restore/tidb.go b/lightning/restore/tidb.go index 9f1493436..4d3b09166 100644 --- a/lightning/restore/tidb.go +++ b/lightning/restore/tidb.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/parser/terror" + "github.com/pingcap/tidb-lightning/lightning/glue" . "github.com/pingcap/tidb-lightning/lightning/checkpoints" "github.com/pingcap/tidb-lightning/lightning/common" @@ -62,7 +63,7 @@ func isUnknownSystemVariableErr(err error) bool { return code == mysql.ErrUnknownSystemVariable } -func NewTiDBManager(dsn config.DBStore, tls *common.TLS) (*TiDBManager, error) { +func DBFromConfig(dsn config.DBStore) (*sql.DB, error) { param := common.MySQLConnectParam{ Host: dsn.Host, Port: dsn.Port, @@ -95,6 +96,14 @@ func NewTiDBManager(dsn config.DBStore, tls *common.TLS) (*TiDBManager, error) { return nil, errors.Trace(err) } } + return db, nil +} + +func NewTiDBManager(dsn config.DBStore, tls *common.TLS) (*TiDBManager, error) { + db, err := DBFromConfig(dsn) + if err != nil { + return nil, errors.Trace(err) + } return NewTiDBManagerWithDB(db, dsn.SQLMode), nil } @@ -115,41 +124,40 @@ func (timgr *TiDBManager) Close() { timgr.db.Close() } -func (timgr *TiDBManager) InitSchema(ctx context.Context, database string, tablesSchema map[string]string) error { - sql := common.SQLWithRetry{ - DB: timgr.db, - Logger: log.With(zap.String("db", database)), - } +func InitSchema(ctx context.Context, g glue.Glue, database string, tablesSchema map[string]string) error { + logger := log.With(zap.String("db", database)) + sqlExecutor := g.GetSQLExecutor() var createDatabase strings.Builder createDatabase.WriteString("CREATE DATABASE IF NOT EXISTS ") common.WriteMySQLIdentifier(&createDatabase, database) - err := sql.Exec(ctx, "create database", createDatabase.String()) + err := sqlExecutor.ExecuteWithLog(ctx, createDatabase.String(), "create database", logger) if err != nil { return errors.Trace(err) } var useDB strings.Builder useDB.WriteString("USE ") common.WriteMySQLIdentifier(&useDB, database) - err = sql.Exec(ctx, "use database", useDB.String()) + err = sqlExecutor.ExecuteWithLog(ctx, useDB.String(), "use database", logger) if err != nil { return errors.Trace(err) } - task := sql.Logger.Begin(zap.InfoLevel, "create tables") + task := logger.Begin(zap.InfoLevel, "create tables") for tbl, sqlCreateTable := range tablesSchema { task.Debug("create table", zap.String("schema", sqlCreateTable)) - sqlCreateTable, err = timgr.createTableIfNotExistsStmt(sqlCreateTable, tbl) + sqlCreateTable, err = createTableIfNotExistsStmt(g.GetParser(), sqlCreateTable, tbl) if err != nil { break } - sql2 := common.SQLWithRetry{ - DB: timgr.db, - Logger: sql.Logger.With(zap.String("table", common.UniqueTable(database, tbl))), - HideQueryLog: true, - } - err = sql2.Exec(ctx, "create table", sqlCreateTable) + + err = sqlExecutor.ExecuteWithLog( + ctx, + sqlCreateTable, + "create table", + logger.With(zap.String("table", common.UniqueTable(database, tbl))), + ) if err != nil { break } @@ -159,8 +167,8 @@ func (timgr *TiDBManager) InitSchema(ctx context.Context, database string, table return errors.Trace(err) } -func (timgr *TiDBManager) createTableIfNotExistsStmt(createTable, tblName string) (string, error) { - stmts, _, err := timgr.parser.Parse(createTable, "", "") +func createTableIfNotExistsStmt(p *parser.Parser, createTable, tblName string) (string, error) { + stmts, _, err := p.Parse(createTable, "", "") if err != nil { return "", err } @@ -192,7 +200,7 @@ func (timgr *TiDBManager) DropTable(ctx context.Context, tableName string) error return sql.Exec(ctx, "drop table", "DROP TABLE "+tableName) } -func (timgr *TiDBManager) LoadSchemaInfo( +func LoadSchemaInfo( ctx context.Context, schemas []*mydump.MDDatabaseMeta, getTables func(context.Context, string) ([]*model.TableInfo, error), @@ -236,7 +244,9 @@ func (timgr *TiDBManager) LoadSchemaInfo( func ObtainGCLifeTime(ctx context.Context, db *sql.DB) (string, error) { var gcLifeTime string - err := common.SQLWithRetry{DB: db, Logger: log.L()}.QueryRow(ctx, "obtain GC lifetime", + err := common.SQLWithRetry{DB: db, Logger: log.L()}.QueryRow( + ctx, + "obtain GC lifetime", "SELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'", &gcLifeTime, ) @@ -254,24 +264,27 @@ func UpdateGCLifeTime(ctx context.Context, db *sql.DB, gcLifeTime string) error ) } -func ObtainRowFormatVersion(ctx context.Context, db *sql.DB) (rowFormatVersion string) { - err := common.SQLWithRetry{DB: db, Logger: log.L()}.QueryRow(ctx, "obtain row format version", +func ObtainRowFormatVersion(ctx context.Context, g glue.SQLExecutor) string { + rowFormatVersion, err := g.ObtainStringWithLog( + ctx, "SELECT @@tidb_row_format_version", - &rowFormatVersion, + "obtain row format version", + log.L(), ) if err != nil { rowFormatVersion = "1" } - return + return rowFormatVersion } -func ObtainNewCollationEnabled(ctx context.Context, db *sql.DB) bool { - var newCollationVal string - err := common.SQLWithRetry{DB: db, Logger: log.L()}.QueryRow(ctx, "obtain new collation enabled", +func ObtainNewCollationEnabled(ctx context.Context, g glue.SQLExecutor) bool { + newCollationEnabled := false + newCollationVal, err := g.ObtainStringWithLog( + ctx, "SELECT variable_value FROM mysql.tidb WHERE variable_name = 'new_collation_enabled'", - &newCollationVal, + "obtain new collation enabled", + log.L(), ) - newCollationEnabled := false if err == nil && newCollationVal == "True" { newCollationEnabled = true } @@ -284,14 +297,11 @@ func ObtainNewCollationEnabled(ctx context.Context, db *sql.DB) bool { // NOTE: since tidb can make sure the auto id is always be rebase even if the `incr` value is smaller // the the auto incremanet base in tidb side, we needn't fetch currently auto increment value here. // See: https://github.com/pingcap/tidb/blob/64698ef9a3358bfd0fdc323996bb7928a56cadca/ddl/ddl_api.go#L2528-L2533 -func AlterAutoIncrement(ctx context.Context, db *sql.DB, tableName string, incr int64) error { - sql := common.SQLWithRetry{ - DB: db, - Logger: log.With(zap.String("table", tableName), zap.Int64("auto_increment", incr)), - } +func AlterAutoIncrement(ctx context.Context, g glue.SQLExecutor, tableName string, incr int64) error { + logger := log.With(zap.String("table", tableName), zap.Int64("auto_increment", incr)) query := fmt.Sprintf("ALTER TABLE %s AUTO_INCREMENT=%d", tableName, incr) - task := sql.Logger.Begin(zap.InfoLevel, "alter table auto_increment") - err := sql.Exec(ctx, "alter table auto_increment", query) + task := logger.Begin(zap.InfoLevel, "alter table auto_increment") + err := g.ExecuteWithLog(ctx, query, "alter table auto_increment", logger) task.End(zap.ErrorLevel, err) if err != nil { task.Error( @@ -302,14 +312,11 @@ func AlterAutoIncrement(ctx context.Context, db *sql.DB, tableName string, incr return errors.Annotatef(err, "%s", query) } -func AlterAutoRandom(ctx context.Context, db *sql.DB, tableName string, randomBase int64) error { - sql := common.SQLWithRetry{ - DB: db, - Logger: log.With(zap.String("table", tableName), zap.Int64("auto_random", randomBase)), - } +func AlterAutoRandom(ctx context.Context, g glue.SQLExecutor, tableName string, randomBase int64) error { + logger := log.With(zap.String("table", tableName), zap.Int64("auto_random", randomBase)) query := fmt.Sprintf("ALTER TABLE %s AUTO_RANDOM_BASE=%d", tableName, randomBase) - task := sql.Logger.Begin(zap.InfoLevel, "alter table auto_random") - err := sql.Exec(ctx, "alter table auto_random_base", query) + task := logger.Begin(zap.InfoLevel, "alter table auto_random") + err := g.ExecuteWithLog(ctx, query, "alter table auto_random_base", logger) task.End(zap.ErrorLevel, err) if err != nil { task.Error( diff --git a/lightning/restore/tidb_test.go b/lightning/restore/tidb_test.go index b1c8a84ef..3397ad110 100644 --- a/lightning/restore/tidb_test.go +++ b/lightning/restore/tidb_test.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" tmysql "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb-lightning/lightning/glue" "github.com/pingcap/tidb/ddl" "github.com/pingcap/tidb/util/mock" @@ -38,6 +39,7 @@ type tidbSuite struct { mockDB sqlmock.Sqlmock handler http.Handler timgr *TiDBManager + tiGlue glue.Glue } func TestTiDB(t *testing.T) { @@ -53,6 +55,7 @@ func (s *tidbSuite) SetUpTest(c *C) { c.Assert(err, IsNil) s.timgr = NewTiDBManagerWithDB(db, defaultSQLMode) + s.tiGlue = glue.NewExternalTiDBGlue(db, defaultSQLMode) } func (s *tidbSuite) TearDownTest(c *C) { @@ -62,7 +65,7 @@ func (s *tidbSuite) TearDownTest(c *C) { func (s *tidbSuite) TestCreateTableIfNotExistsStmt(c *C) { createTableIfNotExistsStmt := func(createTable, tableName string) string { - res, err := s.timgr.createTableIfNotExistsStmt(createTable, tableName) + res, err := createTableIfNotExistsStmt(s.tiGlue.GetParser(), createTable, tableName) c.Assert(err, IsNil) return res } @@ -158,7 +161,7 @@ func (s *tidbSuite) TestInitSchema(c *C) { ExpectClose() s.mockDB.MatchExpectationsInOrder(false) // maps are unordered. - err := s.timgr.InitSchema(ctx, "db", map[string]string{ + err := InitSchema(ctx, s.tiGlue, "db", map[string]string{ "t1": "create table t1 (a int primary key, b varchar(200));", "t2": "/*!40014 SET FOREIGN_KEY_CHECKS=0*/;CREATE TABLE `db`.`t2` (xx TEXT) AUTO_INCREMENT=11203;", }) @@ -178,7 +181,7 @@ func (s *tidbSuite) TestInitSchemaSyntaxError(c *C) { s.mockDB. ExpectClose() - err := s.timgr.InitSchema(ctx, "db", map[string]string{ + err := InitSchema(ctx, s.tiGlue, "db", map[string]string{ "t1": "create table `t1` with invalid syntax;", }) c.Assert(err, NotNil) @@ -202,7 +205,7 @@ func (s *tidbSuite) TestInitSchemaUnsupportedSchemaError(c *C) { s.mockDB. ExpectClose() - err := s.timgr.InitSchema(ctx, "db", map[string]string{ + err := InitSchema(ctx, s.tiGlue, "db", map[string]string{ "t1": "create table `t1` (a VARCHAR(999999999));", }) c.Assert(err, ErrorMatches, ".*Column length too big.*") @@ -240,7 +243,7 @@ func (s *tidbSuite) TestLoadSchemaInfo(c *C) { tableInfos = append(tableInfos, info) } - loaded, err := s.timgr.LoadSchemaInfo(ctx, []*mydump.MDDatabaseMeta{{Name: "db"}}, func(ctx context.Context, schema string) ([]*model.TableInfo, error) { + loaded, err := LoadSchemaInfo(ctx, []*mydump.MDDatabaseMeta{{Name: "db"}}, func(ctx context.Context, schema string) ([]*model.TableInfo, error) { c.Assert(schema, Equals, "db") return tableInfos, nil }) @@ -269,7 +272,7 @@ func (s *tidbSuite) TestLoadSchemaInfo(c *C) { func (s *tidbSuite) TestLoadSchemaInfoMissing(c *C) { ctx := context.Background() - _, err := s.timgr.LoadSchemaInfo(ctx, []*mydump.MDDatabaseMeta{{Name: "asdjalsjdlas"}}, func(ctx context.Context, schema string) ([]*model.TableInfo, error) { + _, err := LoadSchemaInfo(ctx, []*mydump.MDDatabaseMeta{{Name: "asdjalsjdlas"}}, func(ctx context.Context, schema string) ([]*model.TableInfo, error) { return nil, errors.Errorf("[schema:1049]Unknown database '%s'", schema) }) c.Assert(err, ErrorMatches, ".*Unknown database.*") @@ -312,7 +315,7 @@ func (s *tidbSuite) TestAlterAutoInc(c *C) { s.mockDB. ExpectClose() - err := AlterAutoIncrement(ctx, s.timgr.db, "`db`.`table`", 12345) + err := AlterAutoIncrement(ctx, s.tiGlue.GetSQLExecutor(), "`db`.`table`", 12345) c.Assert(err, IsNil) } @@ -325,7 +328,7 @@ func (s *tidbSuite) TestAlterAutoRandom(c *C) { s.mockDB. ExpectClose() - err := AlterAutoRandom(ctx, s.timgr.db, "`db`.`table`", 12345) + err := AlterAutoRandom(ctx, s.tiGlue.GetSQLExecutor(), "`db`.`table`", 12345) c.Assert(err, IsNil) } @@ -338,7 +341,7 @@ func (s *tidbSuite) TestObtainRowFormatVersionSucceed(c *C) { s.mockDB. ExpectClose() - version := ObtainRowFormatVersion(ctx, s.timgr.db) + version := ObtainRowFormatVersion(ctx, s.tiGlue.GetSQLExecutor()) c.Assert(version, Equals, "2") } @@ -351,7 +354,7 @@ func (s *tidbSuite) TestObtainRowFormatVersionFailure(c *C) { s.mockDB. ExpectClose() - version := ObtainRowFormatVersion(ctx, s.timgr.db) + version := ObtainRowFormatVersion(ctx, s.tiGlue.GetSQLExecutor()) c.Assert(version, Equals, "1") } @@ -360,7 +363,7 @@ func (s *tidbSuite) TestObtainNewCollationEnabled(c *C) { s.mockDB. ExpectQuery("\\QSELECT variable_value FROM mysql.tidb WHERE variable_name = 'new_collation_enabled'\\E") - version := ObtainNewCollationEnabled(ctx, s.timgr.db) + version := ObtainNewCollationEnabled(ctx, s.tiGlue.GetSQLExecutor()) c.Assert(version, Equals, false) kvMap := map[string]bool{ @@ -372,7 +375,7 @@ func (s *tidbSuite) TestObtainNewCollationEnabled(c *C) { ExpectQuery("\\QSELECT variable_value FROM mysql.tidb WHERE variable_name = 'new_collation_enabled'\\E"). WillReturnRows(sqlmock.NewRows([]string{"variable_value"}).AddRow(k)) - version := ObtainNewCollationEnabled(ctx, s.timgr.db) + version := ObtainNewCollationEnabled(ctx, s.tiGlue.GetSQLExecutor()) c.Assert(version, Equals, v) } s.mockDB. diff --git a/mock/backend.go b/mock/backend.go index 273cf3210..cebc7d476 100644 --- a/mock/backend.go +++ b/mock/backend.go @@ -12,10 +12,10 @@ import ( time "time" gomock "github.com/golang/mock/gomock" + uuid "github.com/google/uuid" model "github.com/pingcap/parser/model" table "github.com/pingcap/tidb/table" types "github.com/pingcap/tidb/types" - uuid "github.com/satori/go.uuid" backend "github.com/pingcap/tidb-lightning/lightning/backend" log "github.com/pingcap/tidb-lightning/lightning/log" diff --git a/tests/alter_random/run.sh b/tests/alter_random/run.sh index 08967bc87..79fd2735c 100644 --- a/tests/alter_random/run.sh +++ b/tests/alter_random/run.sh @@ -26,6 +26,9 @@ for backend in tidb importer local; do run_sql 'DROP DATABASE IF EXISTS alter_random;' run_lightning --backend $backend + run_sql "SELECT count(*) from alter_random.t" + check_contains "count(*): 3" + run_sql "SELECT id & b'000001111111111111111111111111111111111111111111111111111111111' as inc FROM alter_random.t" check_contains 'inc: 1' check_contains 'inc: 2' diff --git a/tests/auto_random_default/config.toml b/tests/auto_random_default/config.toml new file mode 100644 index 000000000..56616c750 --- /dev/null +++ b/tests/auto_random_default/config.toml @@ -0,0 +1,2 @@ +[mydumper] +max-region-size = 200 \ No newline at end of file diff --git a/tests/auto_random_default/data/auto_random-schema-create.sql b/tests/auto_random_default/data/auto_random-schema-create.sql new file mode 100644 index 000000000..122f86301 --- /dev/null +++ b/tests/auto_random_default/data/auto_random-schema-create.sql @@ -0,0 +1 @@ +CREATE DATABASE `auto_random` /*!40100 DEFAULT CHARACTER SET utf8mb4 */; diff --git a/tests/auto_random_default/data/auto_random.t-schema.sql b/tests/auto_random_default/data/auto_random.t-schema.sql new file mode 100644 index 000000000..95054b9fa --- /dev/null +++ b/tests/auto_random_default/data/auto_random.t-schema.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +CREATE TABLE `t` ( + `id` bigint unsigned primary key auto_random, + `s` varchar(32) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin; diff --git a/tests/auto_random_default/data/auto_random.t.0.sql b/tests/auto_random_default/data/auto_random.t.0.sql new file mode 100644 index 000000000..58a3bf22b --- /dev/null +++ b/tests/auto_random_default/data/auto_random.t.0.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `t` (`s`) VALUES +("test1"), +("test2"), +("test3"); diff --git a/tests/auto_random_default/data/auto_random.t.1.sql b/tests/auto_random_default/data/auto_random.t.1.sql new file mode 100644 index 000000000..6b9409b1b --- /dev/null +++ b/tests/auto_random_default/data/auto_random.t.1.sql @@ -0,0 +1,5 @@ +/*!40103 SET TIME_ZONE='+00:00' */; +INSERT INTO `t` (`s`) VALUES +(""), +(""), +(""); diff --git a/tests/auto_random_default/run.sh b/tests/auto_random_default/run.sh new file mode 100644 index 000000000..859c55efa --- /dev/null +++ b/tests/auto_random_default/run.sh @@ -0,0 +1,58 @@ +#!/bin/sh +# +# Copyright 2020 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu + +# FIXME: auto-random is only stable on master currently. +check_cluster_version 4 0 0 AUTO_RANDOM || exit 0 + +for backend in tidb importer local; do + if [ "$backend" = 'local' ]; then + check_cluster_version 4 0 0 'local backend' || continue + fi + + run_sql 'DROP DATABASE IF EXISTS auto_random;' + run_lightning --backend $backend + + run_sql "SELECT count(*) from auto_random.t" + check_contains "count(*): 6" + + run_sql "SELECT id & b'000001111111111111111111111111111111111111111111111111111111111' as inc FROM auto_random.t" + check_contains 'inc: 1' + check_contains 'inc: 2' + check_contains 'inc: 3' + if [ "$backend" = 'tidb' ]; then + check_contains 'inc: 4' + check_contains 'inc: 5' + check_contains 'inc: 6' + else + check_contains 'inc: 25' + check_contains 'inc: 26' + check_contains 'inc: 27' + fi + + + run_sql "select count(distinct id >> 58) as count from auto_random.t" + check_contains "count: 2" + + # auto random base is 4 + run_sql "INSERT INTO auto_random.t VALUES ();" + run_sql "SELECT id & b'000001111111111111111111111111111111111111111111111111111111111' as inc FROM auto_random.t" + if [ "$backend" = 'tidb' ]; then + check_contains 'inc: 2000001' + else + check_contains 'inc: 28' + fi +done diff --git a/tests/parquet/run.sh b/tests/parquet/run.sh index 6809612e9..3d2fd56cc 100755 --- a/tests/parquet/run.sh +++ b/tests/parquet/run.sh @@ -45,4 +45,8 @@ for BACKEND in local importer tidb; do run_sql 'select w_name from test.warehouse;' check_contains "w_name: eLNEDIW" + + run_sql 'select c_since, c_discount from test.customer where c_id = 20;' + check_contains "c_since: 2020-09-10 20:17:16" + check_contains "c_discount: 0.0585" done diff --git a/tests/s3/run.sh b/tests/s3/run.sh index 07bb7452d..99fffc920 100755 --- a/tests/s3/run.sh +++ b/tests/s3/run.sh @@ -28,7 +28,7 @@ DBPATH="$TEST_DIR/s3.mydump" export MINIO_ACCESS_KEY=s3accesskey export MINIO_SECRET_KEY=s3secretkey export MINIO_BROWSER=off -export S3_ENDPOINT=127.0.0.1:9000 +export S3_ENDPOINT=127.0.0.1:9900 rm -rf "$TEST_DIR/$DB" mkdir -p "$TEST_DIR/$DB" bin/minio server --address $S3_ENDPOINT "$DBPATH" & diff --git a/tests/various_types/data/vt.double-schema.sql b/tests/various_types/data/vt.double-schema.sql index 77762b03d..1735efc7e 100644 --- a/tests/various_types/data/vt.double-schema.sql +++ b/tests/various_types/data/vt.double-schema.sql @@ -1,4 +1,4 @@ CREATE TABLE `double` ( `ref` INT NOT NULL, - `pk` DOUBLE(16) NOT NULL PRIMARY KEY + `pk` DOUBLE NOT NULL PRIMARY KEY ); \ No newline at end of file diff --git a/tidb-lightning.toml b/tidb-lightning.toml index d7d5d1abb..12067df66 100644 --- a/tidb-lightning.toml +++ b/tidb-lightning.toml @@ -83,7 +83,7 @@ addr = "127.0.0.1:8287" #on-duplicate = "replace" # Maximum KV size of SST files produced in the 'local' backend. This should be the same as # the TiKV region size to avoid further region splitting. The default value is 96 MiB. -#region-split-size = 100_663_296 +#region-split-size = '96MiB' # write key-values pairs to tikv batch size #send-kv-pairs = 32768 # local storage directory used in "local" backend. @@ -95,10 +95,10 @@ addr = "127.0.0.1:8287" [mydumper] # block size of file reading -read-block-size = 65536 # Byte (default = 64 KB) +read-block-size = '64KiB' # minimum size (in terms of source data file) of each batch of import. # Lightning will split a large table into multiple engine files according to this size. -#batch-size = 107_374_182_400 # Byte (default = 100 GiB) +#batch-size = '100GiB' # Engine file needs to be imported sequentially. Due to table-concurrency, multiple engines will be # imported nearly the same time, and this will create a queue and this wastes resources. Therefore, @@ -131,7 +131,7 @@ case-sensitive = false strict-format = false # if strict-format is true, large CSV files will be split to multiple chunks, which Lightning # will restore in parallel. The size of each chunk is `max-region-size`, where the default is 256 MiB. -#max-region-size = 268_435_456 +#max-region-size = '256MiB' # enable file router to use the default rules. By default, it will be set to true if no `mydumper.files` # rule is provided, else false. You can explicitly set it to `true` to enable the default rules, they will @@ -144,7 +144,7 @@ strict-format = false #default-file-rules = false # only import tables if the wildcard rules are matched. See documention for details. -filter = ['*.*'] +filter = ['*.*', '!mysql.*', '!sys.*', '!INFORMATION_SCHEMA.*', '!PERFORMANCE_SCHEMA.*', '!METRICS_SCHEMA.*', '!INSPECTION_SCHEMA.*'] # CSV files are imported according to MySQL's LOAD DATA INFILE rules. [mydumper.csv] diff --git a/tools/go.sum b/tools/go.sum index de9bc29c2..5946c576c 100644 --- a/tools/go.sum +++ b/tools/go.sum @@ -203,6 +203,7 @@ github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8 h1:USx2/E1bX46VG32FI github.com/pingcap/check v0.0.0-20190102082844-67f458068fc8/go.mod h1:B1+S9LNcuMyLH/4HMTViQOJevkGiik3wW2AN9zb2fNQ= github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMtVcOkjUcuQKh+YrluSo7+7YMCQSzy30= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= From 9664ad9f52eb900267ff7c0a39500335c625e5a6 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Thu, 19 Nov 2020 19:51:42 +0800 Subject: [PATCH 2/4] address comment --- lightning/checkpoints/checkpoints.go | 10 ++++------ lightning/checkpoints/glue_checkpoint.go | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/lightning/checkpoints/checkpoints.go b/lightning/checkpoints/checkpoints.go index 42eeb3ceb..f75314fdd 100644 --- a/lightning/checkpoints/checkpoints.go +++ b/lightning/checkpoints/checkpoints.go @@ -171,9 +171,7 @@ const ( UPDATE %s.%s SET status = ? WHERE table_name = ?;` UpdateEngineTemplate = ` UPDATE %s.%s SET status = ? WHERE (table_name, engine_id) = (?, ?);` - DeleteChunkTemplate = "DELETE FROM %s.%s WHERE table_name = ?;" - DeleteEngineTemplate = "DELETE FROM %s.%s WHERE table_name = ?;" - DeleteTableTemplate = "DELETE FROM %s.%s WHERE table_name = ?;" + DeleteCheckpointRecordTemplate = "DELETE FROM %s.%s WHERE table_name = ?;" ) func IsCheckpointTable(name string) bool { @@ -1157,9 +1155,9 @@ func (cpdb *MySQLCheckpointsDB) RemoveCheckpoint(ctx context.Context, tableName return s.Exec(ctx, "remove all checkpoints", "DROP SCHEMA "+cpdb.schema) } - deleteChunkQuery := fmt.Sprintf(DeleteChunkTemplate, cpdb.schema, CheckpointTableNameChunk) - deleteEngineQuery := fmt.Sprintf(DeleteEngineTemplate, cpdb.schema, CheckpointTableNameEngine) - deleteTableQuery := fmt.Sprintf(DeleteTableTemplate, cpdb.schema, CheckpointTableNameTable) + deleteChunkQuery := fmt.Sprintf(DeleteCheckpointRecordTemplate, cpdb.schema, CheckpointTableNameChunk) + deleteEngineQuery := fmt.Sprintf(DeleteCheckpointRecordTemplate, cpdb.schema, CheckpointTableNameEngine) + deleteTableQuery := fmt.Sprintf(DeleteCheckpointRecordTemplate, cpdb.schema, CheckpointTableNameTable) return s.Transact(ctx, "remove checkpoints", func(c context.Context, tx *sql.Tx) error { if _, e := tx.ExecContext(c, deleteChunkQuery, tableName); e != nil { diff --git a/lightning/checkpoints/glue_checkpoint.go b/lightning/checkpoints/glue_checkpoint.go index 0ea0fb33e..54b0c8964 100644 --- a/lightning/checkpoints/glue_checkpoint.go +++ b/lightning/checkpoints/glue_checkpoint.go @@ -512,11 +512,11 @@ func (g GlueCheckpointsDB) RemoveCheckpoint(ctx context.Context, tableName strin var tableNameBuilder strings.Builder common.EscapeMySQLSingleQuote(&tableNameBuilder, tableName) tableName = tableNameBuilder.String() - deleteChunkQuery := fmt.Sprintf(DeleteChunkTemplate, g.schema, CheckpointTableNameChunk) + deleteChunkQuery := fmt.Sprintf(DeleteCheckpointRecordTemplate, g.schema, CheckpointTableNameChunk) deleteChunkQuery = strings.ReplaceAll(deleteChunkQuery, "?", tableName) - deleteEngineQuery := fmt.Sprintf(DeleteEngineTemplate, g.schema, CheckpointTableNameEngine) + deleteEngineQuery := fmt.Sprintf(DeleteCheckpointRecordTemplate, g.schema, CheckpointTableNameEngine) deleteEngineQuery = strings.ReplaceAll(deleteEngineQuery, "?", tableName) - deleteTableQuery := fmt.Sprintf(DeleteTableTemplate, g.schema, CheckpointTableNameTable) + deleteTableQuery := fmt.Sprintf(DeleteCheckpointRecordTemplate, g.schema, CheckpointTableNameTable) deleteTableQuery = strings.ReplaceAll(deleteTableQuery, "?", tableName) return errors.Trace(Transact(ctx, "remove checkpoints", se, logger, func(c context.Context, s Session) error { From 766d6a204963779dc4497ce1b4e60f29dcbdceb7 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 20 Nov 2020 18:26:52 +0800 Subject: [PATCH 3/4] add some comments --- lightning/checkpoints/checkpoints.go | 8 +++----- lightning/checkpoints/checkpoints_sql_test.go | 2 +- lightning/checkpoints/glue_checkpoint.go | 7 +++---- lightning/glue/glue.go | 1 + 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lightning/checkpoints/checkpoints.go b/lightning/checkpoints/checkpoints.go index f75314fdd..4b8f1d896 100644 --- a/lightning/checkpoints/checkpoints.go +++ b/lightning/checkpoints/checkpoints.go @@ -489,7 +489,7 @@ func OpenCheckpointsDB(ctx context.Context, cfg *config.Config) (CheckpointsDB, if err != nil { return nil, errors.Trace(err) } - cpdb, err := NewMySQLCheckpointsDB(ctx, db, cfg.Checkpoint.Schema, cfg.TaskID) + cpdb, err := NewMySQLCheckpointsDB(ctx, db, cfg.Checkpoint.Schema) if err != nil { db.Close() return nil, errors.Trace(err) @@ -539,10 +539,9 @@ func (*NullCheckpointsDB) Update(map[string]*TableCheckpointDiff) {} type MySQLCheckpointsDB struct { db *sql.DB schema string - taskID int64 } -func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string, taskID int64) (*MySQLCheckpointsDB, error) { +func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string) (*MySQLCheckpointsDB, error) { var escapedSchemaName strings.Builder common.WriteMySQLIdentifier(&escapedSchemaName, schemaName) schema := escapedSchemaName.String() @@ -580,7 +579,6 @@ func NewMySQLCheckpointsDB(ctx context.Context, db *sql.DB, schemaName string, t return &MySQLCheckpointsDB{ db: db, schema: schema, - taskID: taskID, }, nil } @@ -616,7 +614,7 @@ func (cpdb *MySQLCheckpointsDB) Initialize(ctx context.Context, cfg *config.Conf for _, db := range dbInfo { for _, table := range db.Tables { tableName := common.UniqueTable(db.Name, table.Name) - _, err = stmt.ExecContext(c, cpdb.taskID, tableName, 0, table.ID) + _, err = stmt.ExecContext(c, cfg.TaskID, tableName, 0, table.ID) if err != nil { return errors.Trace(err) } diff --git a/lightning/checkpoints/checkpoints_sql_test.go b/lightning/checkpoints/checkpoints_sql_test.go index a7b15cc5d..0a9777dbd 100644 --- a/lightning/checkpoints/checkpoints_sql_test.go +++ b/lightning/checkpoints/checkpoints_sql_test.go @@ -46,7 +46,7 @@ func (s *cpSQLSuite) SetUpTest(c *C) { ExpectExec("CREATE TABLE IF NOT EXISTS `mock-schema`\\.chunk_v\\d+ .+"). WillReturnResult(sqlmock.NewResult(5, 1)) - cpdb, err := checkpoints.NewMySQLCheckpointsDB(context.Background(), s.db, "mock-schema", 1234) + cpdb, err := checkpoints.NewMySQLCheckpointsDB(context.Background(), s.db, "mock-schema") c.Assert(err, IsNil) c.Assert(s.mock.ExpectationsWereMet(), IsNil) s.cpdb = cpdb diff --git a/lightning/checkpoints/glue_checkpoint.go b/lightning/checkpoints/glue_checkpoint.go index 54b0c8964..e154ea00d 100644 --- a/lightning/checkpoints/glue_checkpoint.go +++ b/lightning/checkpoints/glue_checkpoint.go @@ -48,12 +48,12 @@ type Session interface { // TODO: Encapsulate Begin/Commit/Rollback txn, form SQL with args and query/iter/scan TiDB's RecordSet into a interface // to reuse MySQLCheckpointsDB. type GlueCheckpointsDB struct { + // getSessionFunc will get a new session from TiDB getSessionFunc func() (Session, error) schema string - taskID int64 // TODO(lance6716): rename or make it clear that taskID is different from cfg.TaskID } -func NewGlueCheckpointsDB(ctx context.Context, se Session, f func() (Session, error), schemaName string, taskID int64) (*GlueCheckpointsDB, error) { +func NewGlueCheckpointsDB(ctx context.Context, se Session, f func() (Session, error), schemaName string) (*GlueCheckpointsDB, error) { var escapedSchemaName strings.Builder common.WriteMySQLIdentifier(&escapedSchemaName, schemaName) schema := escapedSchemaName.String() @@ -107,7 +107,6 @@ func NewGlueCheckpointsDB(ctx context.Context, se Session, f func() (Session, er return &GlueCheckpointsDB{ getSessionFunc: f, schema: schema, - taskID: taskID, }, nil } @@ -150,7 +149,7 @@ func (g GlueCheckpointsDB) Initialize(ctx context.Context, cfg *config.Config, d for _, table := range db.Tables { tableName := common.UniqueTable(db.Name, table.Name) _, err = s.ExecutePreparedStmt(c, stmtID2, []types.Datum{ - types.NewIntDatum(g.taskID), + types.NewIntDatum(cfg.TaskID), types.NewStringDatum(tableName), types.NewIntDatum(0), types.NewIntDatum(table.ID), diff --git a/lightning/glue/glue.go b/lightning/glue/glue.go index c23b43951..17c4ad51f 100644 --- a/lightning/glue/glue.go +++ b/lightning/glue/glue.go @@ -40,6 +40,7 @@ type Glue interface { } type SQLExecutor interface { + // ExecuteWithLog and ObtainStringWithLog should support concurrently call ExecuteWithLog(ctx context.Context, query string, purpose string, logger log.Logger) error ObtainStringWithLog(ctx context.Context, query string, purpose string, logger log.Logger) (string, error) Close() From 021e51b2862aff7671d8379a193caa9be5d2ae03 Mon Sep 17 00:00:00 2001 From: lance6716 Date: Fri, 20 Nov 2020 19:46:49 +0800 Subject: [PATCH 4/4] fix CI and change CREATE TABLE --- lightning/checkpoints/checkpoints_sql_test.go | 6 +-- lightning/glue/glue.go | 3 +- lightning/restore/tidb.go | 13 ++----- lightning/restore/tidb_test.go | 38 ++++++++----------- 4 files changed, 23 insertions(+), 37 deletions(-) diff --git a/lightning/checkpoints/checkpoints_sql_test.go b/lightning/checkpoints/checkpoints_sql_test.go index 0a9777dbd..1595f1feb 100644 --- a/lightning/checkpoints/checkpoints_sql_test.go +++ b/lightning/checkpoints/checkpoints_sql_test.go @@ -73,13 +73,13 @@ func (s *cpSQLSuite) TestNormalOperations(c *C) { initializeStmt = s.mock. ExpectPrepare("INSERT INTO `mock-schema`\\.table_v\\d+") initializeStmt.ExpectExec(). - WithArgs(1234, "`db1`.`t1`", sqlmock.AnyArg(), int64(1)). + WithArgs(123, "`db1`.`t1`", sqlmock.AnyArg(), int64(1)). WillReturnResult(sqlmock.NewResult(7, 1)) initializeStmt.ExpectExec(). - WithArgs(1234, "`db1`.`t2`", sqlmock.AnyArg(), int64(2)). + WithArgs(123, "`db1`.`t2`", sqlmock.AnyArg(), int64(2)). WillReturnResult(sqlmock.NewResult(8, 1)) initializeStmt.ExpectExec(). - WithArgs(1234, "`db2`.`t3`", sqlmock.AnyArg(), int64(3)). + WithArgs(123, "`db2`.`t3`", sqlmock.AnyArg(), int64(3)). WillReturnResult(sqlmock.NewResult(9, 1)) s.mock.ExpectCommit() diff --git a/lightning/glue/glue.go b/lightning/glue/glue.go index 17c4ad51f..e4e045844 100644 --- a/lightning/glue/glue.go +++ b/lightning/glue/glue.go @@ -40,7 +40,8 @@ type Glue interface { } type SQLExecutor interface { - // ExecuteWithLog and ObtainStringWithLog should support concurrently call + // ExecuteWithLog and ObtainStringWithLog should support concurrently call and can't assure different calls goes to + // same underlying connection ExecuteWithLog(ctx context.Context, query string, purpose string, logger log.Logger) error ObtainStringWithLog(ctx context.Context, query string, purpose string, logger log.Logger) (string, error) Close() diff --git a/lightning/restore/tidb.go b/lightning/restore/tidb.go index 4d3b09166..4d7517863 100644 --- a/lightning/restore/tidb.go +++ b/lightning/restore/tidb.go @@ -135,19 +135,12 @@ func InitSchema(ctx context.Context, g glue.Glue, database string, tablesSchema if err != nil { return errors.Trace(err) } - var useDB strings.Builder - useDB.WriteString("USE ") - common.WriteMySQLIdentifier(&useDB, database) - err = sqlExecutor.ExecuteWithLog(ctx, useDB.String(), "use database", logger) - if err != nil { - return errors.Trace(err) - } task := logger.Begin(zap.InfoLevel, "create tables") for tbl, sqlCreateTable := range tablesSchema { task.Debug("create table", zap.String("schema", sqlCreateTable)) - sqlCreateTable, err = createTableIfNotExistsStmt(g.GetParser(), sqlCreateTable, tbl) + sqlCreateTable, err = createTableIfNotExistsStmt(g.GetParser(), sqlCreateTable, database, tbl) if err != nil { break } @@ -167,7 +160,7 @@ func InitSchema(ctx context.Context, g glue.Glue, database string, tablesSchema return errors.Trace(err) } -func createTableIfNotExistsStmt(p *parser.Parser, createTable, tblName string) (string, error) { +func createTableIfNotExistsStmt(p *parser.Parser, createTable, dbName, tblName string) (string, error) { stmts, _, err := p.Parse(createTable, "", "") if err != nil { return "", err @@ -179,7 +172,7 @@ func createTableIfNotExistsStmt(p *parser.Parser, createTable, tblName string) ( for _, stmt := range stmts { if createTableNode, ok := stmt.(*ast.CreateTableStmt); ok { - createTableNode.Table.Schema = model.NewCIStr("") + createTableNode.Table.Schema = model.NewCIStr(dbName) createTableNode.Table.Name = model.NewCIStr(tblName) createTableNode.IfNotExists = true } diff --git a/lightning/restore/tidb_test.go b/lightning/restore/tidb_test.go index 3397ad110..bfe3b74b9 100644 --- a/lightning/restore/tidb_test.go +++ b/lightning/restore/tidb_test.go @@ -64,8 +64,9 @@ func (s *tidbSuite) TearDownTest(c *C) { } func (s *tidbSuite) TestCreateTableIfNotExistsStmt(c *C) { + dbName := "testdb" createTableIfNotExistsStmt := func(createTable, tableName string) string { - res, err := createTableIfNotExistsStmt(s.tiGlue.GetParser(), createTable, tableName) + res, err := createTableIfNotExistsStmt(s.tiGlue.GetParser(), createTable, dbName, tableName) c.Assert(err, IsNil) return res } @@ -73,61 +74,61 @@ func (s *tidbSuite) TestCreateTableIfNotExistsStmt(c *C) { c.Assert( createTableIfNotExistsStmt("CREATE TABLE `foo`(`bar` TINYINT(1));", "foo"), Equals, - "CREATE TABLE IF NOT EXISTS `foo` (`bar` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`foo` (`bar` TINYINT(1));", ) c.Assert( createTableIfNotExistsStmt("CREATE TABLE IF NOT EXISTS `foo`(`bar` TINYINT(1));", "foo"), Equals, - "CREATE TABLE IF NOT EXISTS `foo` (`bar` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`foo` (`bar` TINYINT(1));", ) // case insensitive c.Assert( createTableIfNotExistsStmt("/* cOmmEnt */ creAte tablE `fOo`(`bar` TinyinT(1));", "fOo"), Equals, - "CREATE TABLE IF NOT EXISTS `fOo` (`bar` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`fOo` (`bar` TINYINT(1));", ) c.Assert( createTableIfNotExistsStmt("/* coMMenT */ crEatE tAble If not EXISts `FoO`(`bAR` tiNyInT(1));", "FoO"), Equals, - "CREATE TABLE IF NOT EXISTS `FoO` (`bAR` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`FoO` (`bAR` TINYINT(1));", ) // only one "CREATE TABLE" is replaced c.Assert( createTableIfNotExistsStmt("CREATE TABLE `foo`(`bar` INT(1) COMMENT 'CREATE TABLE');", "foo"), Equals, - "CREATE TABLE IF NOT EXISTS `foo` (`bar` INT(1) COMMENT 'CREATE TABLE');", + "CREATE TABLE IF NOT EXISTS `testdb`.`foo` (`bar` INT(1) COMMENT 'CREATE TABLE');", ) // upper case becomes shorter c.Assert( createTableIfNotExistsStmt("CREATE TABLE `ſ`(`ı` TINYINT(1));", "ſ"), Equals, - "CREATE TABLE IF NOT EXISTS `ſ` (`ı` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`ſ` (`ı` TINYINT(1));", ) // upper case becomes longer c.Assert( createTableIfNotExistsStmt("CREATE TABLE `ɑ`(`ȿ` TINYINT(1));", "ɑ"), Equals, - "CREATE TABLE IF NOT EXISTS `ɑ` (`ȿ` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`ɑ` (`ȿ` TINYINT(1));", ) // non-utf-8 c.Assert( createTableIfNotExistsStmt("CREATE TABLE `\xcc\xcc\xcc`(`\xdd\xdd\xdd` TINYINT(1));", "\xcc\xcc\xcc"), Equals, - "CREATE TABLE IF NOT EXISTS `\xcc\xcc\xcc` (`ÝÝÝ` TINYINT(1));", + "CREATE TABLE IF NOT EXISTS `testdb`.`\xcc\xcc\xcc` (`ÝÝÝ` TINYINT(1));", ) // renaming a table c.Assert( createTableIfNotExistsStmt("create table foo(x int);", "ba`r"), Equals, - "CREATE TABLE IF NOT EXISTS `ba``r` (`x` INT);", + "CREATE TABLE IF NOT EXISTS `testdb`.`ba``r` (`x` INT);", ) // conditional comments @@ -138,7 +139,7 @@ func (s *tidbSuite) TestCreateTableIfNotExistsStmt(c *C) { CREATE TABLE x.y (z double) ENGINE=InnoDB AUTO_INCREMENT=8343230 DEFAULT CHARSET=utf8; `, "m"), Equals, - "SET NAMES 'binary';SET @@SESSION.`FOREIGN_KEY_CHECKS`=0;CREATE TABLE IF NOT EXISTS `m` (`z` DOUBLE) ENGINE = InnoDB AUTO_INCREMENT = 8343230 DEFAULT CHARACTER SET = UTF8;", + "SET NAMES 'binary';SET @@SESSION.`FOREIGN_KEY_CHECKS`=0;CREATE TABLE IF NOT EXISTS `testdb`.`m` (`z` DOUBLE) ENGINE = InnoDB AUTO_INCREMENT = 8343230 DEFAULT CHARACTER SET = UTF8;", ) } @@ -149,13 +150,10 @@ func (s *tidbSuite) TestInitSchema(c *C) { ExpectExec("CREATE DATABASE IF NOT EXISTS `db`"). WillReturnResult(sqlmock.NewResult(1, 1)) s.mockDB. - ExpectExec("USE `db`"). - WillReturnResult(sqlmock.NewResult(0, 0)) - s.mockDB. - ExpectExec("\\QCREATE TABLE IF NOT EXISTS `t1` (`a` INT PRIMARY KEY,`b` VARCHAR(200));\\E"). + ExpectExec("\\QCREATE TABLE IF NOT EXISTS `db`.`t1` (`a` INT PRIMARY KEY,`b` VARCHAR(200));\\E"). WillReturnResult(sqlmock.NewResult(2, 1)) s.mockDB. - ExpectExec("\\QSET @@SESSION.`FOREIGN_KEY_CHECKS`=0;CREATE TABLE IF NOT EXISTS `t2` (`xx` TEXT) AUTO_INCREMENT = 11203;\\E"). + ExpectExec("\\QSET @@SESSION.`FOREIGN_KEY_CHECKS`=0;CREATE TABLE IF NOT EXISTS `db`.`t2` (`xx` TEXT) AUTO_INCREMENT = 11203;\\E"). WillReturnResult(sqlmock.NewResult(2, 1)) s.mockDB. ExpectClose() @@ -175,9 +173,6 @@ func (s *tidbSuite) TestInitSchemaSyntaxError(c *C) { s.mockDB. ExpectExec("CREATE DATABASE IF NOT EXISTS `db`"). WillReturnResult(sqlmock.NewResult(1, 1)) - s.mockDB. - ExpectExec("USE `db`"). - WillReturnResult(sqlmock.NewResult(0, 0)) s.mockDB. ExpectClose() @@ -194,10 +189,7 @@ func (s *tidbSuite) TestInitSchemaUnsupportedSchemaError(c *C) { ExpectExec("CREATE DATABASE IF NOT EXISTS `db`"). WillReturnResult(sqlmock.NewResult(1, 1)) s.mockDB. - ExpectExec("USE `db`"). - WillReturnResult(sqlmock.NewResult(0, 0)) - s.mockDB. - ExpectExec("CREATE TABLE IF NOT EXISTS `t1`.*"). + ExpectExec("CREATE TABLE IF NOT EXISTS `db`.`t1`.*"). WillReturnError(&mysql.MySQLError{ Number: tmysql.ErrTooBigFieldlength, Message: "Column length too big",