From f69a63255d7d968dbe08bd08d80390b1b5270559 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 00:10:01 +0200 Subject: [PATCH 01/17] `slack-vitess-r15.0.5`: close topoServer after use in `vttest` Signed-off-by: Tim Vaillancourt --- go/vt/vttest/topoctl.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/vt/vttest/topoctl.go b/go/vt/vttest/topoctl.go index 2b63900d6d8..1fd4cb6e101 100644 --- a/go/vt/vttest/topoctl.go +++ b/go/vt/vttest/topoctl.go @@ -31,6 +31,7 @@ func (ctl *Topoctl) Setup() error { if err != nil { return err } + defer topoServer.Close() log.Infof("Creating cells if they don't exist in the provided topo server %s %s %s", ctl.TopoImplementation, ctl.TopoGlobalServerAddress, ctl.TopoGlobalRoot) // Create cells if it doesn't exist to be idempotent. Should work when we share the same topo server across multiple local clusters. From 4cfa58f4b69bb24cece62a0bcf7a4c78bb379b09 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 00:34:37 +0200 Subject: [PATCH 02/17] go test -v Signed-off-by: Tim Vaillancourt --- tools/unit_test_race.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/unit_test_race.sh b/tools/unit_test_race.sh index 4cec1f365a9..2a588042888 100755 --- a/tools/unit_test_race.sh +++ b/tools/unit_test_race.sh @@ -36,7 +36,7 @@ all_except_flaky_tests=$(echo "$packages_with_tests" | grep -vE ".+ .+_flaky_tes flaky_tests=$(echo "$packages_with_tests" | grep -E ".+ .+_flaky_test\.go" | cut -d" " -f1) # Run non-flaky tests. -echo "$all_except_flaky_tests" | xargs go test $VT_GO_PARALLEL -race -count=1 +echo "$all_except_flaky_tests" | xargs go test -v $VT_GO_PARALLEL -race -count=1 if [ $? -ne 0 ]; then echo "ERROR: Go unit tests failed. See above for errors." echo @@ -52,7 +52,7 @@ for pkg in $flaky_tests; do max_attempts=3 attempt=1 # Set a timeout because some tests may deadlock when they flake. - until go test -timeout 2m $VT_GO_PARALLEL $pkg -race -count=1; do + until go test -v -timeout 2m $VT_GO_PARALLEL $pkg -race -count=1; do echo "FAILED (try $attempt/$max_attempts) in $pkg (return code $?). See above for errors." if [ $((++attempt)) -gt $max_attempts ]; then echo "ERROR: Flaky Go unit tests in package $pkg failed too often (after $max_attempts retries). Please reduce the flakiness." From a7e7300293502f2ad7b384f6b5877e3f02cf306c Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 00:42:49 +0200 Subject: [PATCH 03/17] Close idle consul connections Signed-off-by: Tim Vaillancourt --- go/vt/topo/consultopo/server.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/vt/topo/consultopo/server.go b/go/vt/topo/consultopo/server.go index 0eeb24728fa..dcd7c68b620 100644 --- a/go/vt/topo/consultopo/server.go +++ b/go/vt/topo/consultopo/server.go @@ -173,6 +173,9 @@ func parseConsulLockSessionChecks(s string) []string { // It will nil out the global and cells fields, so any attempt to // re-use this server will panic. func (s *Server) Close() { + if consulConfig.Transport != nil { + consulConfig.Transport.CloseIdleConnections() + } s.client = nil s.kv = nil s.mu.Lock() From 20640b45f55be4ff326673b6271cd1b7c0a885a1 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 02:15:24 +0200 Subject: [PATCH 04/17] cleanup consul conn overrides Signed-off-by: Tim Vaillancourt --- go/vt/topo/consultopo/server.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/go/vt/topo/consultopo/server.go b/go/vt/topo/consultopo/server.go index dcd7c68b620..240fd0ec0b9 100644 --- a/go/vt/topo/consultopo/server.go +++ b/go/vt/topo/consultopo/server.go @@ -28,6 +28,7 @@ import ( "time" "github.com/hashicorp/consul/api" + "github.com/hashicorp/go-cleanhttp" "github.com/spf13/pflag" "vitess.io/vitess/go/vt/log" @@ -42,7 +43,10 @@ var ( // serfHealth is the default check from consul consulLockSessionChecks = "serfHealth" consulLockSessionTTL string - consulLockDelay = 15 * time.Second + consulLockDelay = 15 * time.Second + consulMaxConnsPerHost int = 250 // do not use client default of 0/unlimited + consulMaxIdleConns int + consulIdleConnTimeout time.Duration ) func init() { @@ -50,13 +54,18 @@ func init() { } func registerServerFlags(fs *pflag.FlagSet) { + // cleanhttp.DefaultPooledTransport() is used by the consul api client + // as an *http.Transport. We call it here just to get the default + // values the consul api client will inherit from it later. + defaultConsulPooledTransport := cleanhttp.DefaultPooledTransport() + fs.StringVar(&consulAuthClientStaticFile, "consul_auth_static_file", consulAuthClientStaticFile, "JSON File to read the topos/tokens from.") fs.StringVar(&consulLockSessionChecks, "topo_consul_lock_session_checks", consulLockSessionChecks, "List of checks for consul session.") fs.StringVar(&consulLockSessionTTL, "topo_consul_lock_session_ttl", consulLockSessionTTL, "TTL for consul session.") fs.DurationVar(&consulLockDelay, "topo_consul_lock_delay", consulLockDelay, "LockDelay for consul session.") - fs.IntVar(&consulConfig.Transport.MaxConnsPerHost, "topo_consul_max_conns_per_host", consulConfig.Transport.MaxConnsPerHost, "Maximum number of consul connections per host.") - fs.IntVar(&consulConfig.Transport.MaxIdleConns, "topo_consul_max_idle_conns", consulConfig.Transport.MaxIdleConns, "Maximum number of idle consul connections.") - fs.DurationVar(&consulConfig.Transport.IdleConnTimeout, "topo_consul_idle_conn_timeout", consulConfig.Transport.IdleConnTimeout, "Maximum amount of time to pool idle connections.") + fs.IntVar(&consulMaxConnsPerHost, "topo_consul_max_conns_per_host", consulMaxConnsPerHost, "Maximum number of consul connections per host.") + fs.IntVar(&consulMaxIdleConns, "topo_consul_max_idle_conns", defaultConsulPooledTransport.MaxIdleConns, "Maximum number of idle consul connections.") + fs.DurationVar(&consulIdleConnTimeout, "topo_consul_idle_conn_timeout", defaultConsulPooledTransport.IdleConnTimeout, "Maximum amount of time to pool idle connections.") } // ClientAuthCred credential to use for consul clusters @@ -135,7 +144,10 @@ func NewServer(cell, serverAddr, root string) (*Server, error) { if err != nil { return nil, err } - cfg := consulConfig + cfg := api.DefaultConfig() + cfg.Transport.MaxConnsPerHost = consulMaxConnsPerHost + cfg.Transport.MaxIdleConns = consulMaxIdleConns + cfg.Transport.IdleConnTimeout = consulIdleConnTimeout cfg.Address = serverAddr if creds != nil { if creds[cell] != nil { @@ -173,9 +185,6 @@ func parseConsulLockSessionChecks(s string) []string { // It will nil out the global and cells fields, so any attempt to // re-use this server will panic. func (s *Server) Close() { - if consulConfig.Transport != nil { - consulConfig.Transport.CloseIdleConnections() - } s.client = nil s.kv = nil s.mu.Lock() From 1527d5f1f27aa5f86e73ed1f1fe4268672d1252a Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 02:20:32 +0200 Subject: [PATCH 05/17] rm unused var Signed-off-by: Tim Vaillancourt --- go/vt/topo/consultopo/server.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/vt/topo/consultopo/server.go b/go/vt/topo/consultopo/server.go index 240fd0ec0b9..a21633aadd8 100644 --- a/go/vt/topo/consultopo/server.go +++ b/go/vt/topo/consultopo/server.go @@ -39,7 +39,6 @@ import ( var ( consulAuthClientStaticFile string - consulConfig = api.DefaultConfig() // serfHealth is the default check from consul consulLockSessionChecks = "serfHealth" consulLockSessionTTL string From 76be4080baa52f533c7b50565d54e86f5f6158fa Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 02:27:30 +0200 Subject: [PATCH 06/17] go mod tidy Signed-off-by: Tim Vaillancourt --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 63317711cbd..944bccf8ec6 100644 --- a/go.mod +++ b/go.mod @@ -117,6 +117,7 @@ require ( require ( github.com/bndr/gotabulate v1.1.2 + github.com/hashicorp/go-cleanhttp v0.5.1 github.com/hashicorp/go-version v1.6.0 github.com/planetscale/log v0.0.0-20221118170849-fb599bc35c50 github.com/slok/noglog v0.2.0 @@ -147,7 +148,6 @@ require ( github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/google/gofuzz v1.1.0 // indirect github.com/googleapis/gax-go/v2 v2.0.5 // indirect - github.com/hashicorp/go-cleanhttp v0.5.1 // indirect github.com/hashicorp/go-hclog v0.12.0 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect github.com/hashicorp/golang-lru v0.5.1 // indirect From bdc8460755746988126c4ad9090919b448c4de55 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 02:29:04 +0200 Subject: [PATCH 07/17] update flags e2e tests Signed-off-by: Tim Vaillancourt --- go/flags/endtoend/vtctld.txt | 2 +- go/flags/endtoend/vtgate.txt | 2 +- go/flags/endtoend/vttablet.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/flags/endtoend/vtctld.txt b/go/flags/endtoend/vtctld.txt index 628015216ca..ae213a80119 100644 --- a/go/flags/endtoend/vtctld.txt +++ b/go/flags/endtoend/vtctld.txt @@ -124,7 +124,7 @@ Usage of vtctld: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index 8a8b55b3a6f..f29885644c4 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -171,7 +171,7 @@ Usage of vtgate: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt index 5ef27dc0ac6..5cd22bacf55 100644 --- a/go/flags/endtoend/vttablet.txt +++ b/go/flags/endtoend/vttablet.txt @@ -308,7 +308,7 @@ Usage of vttablet: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) From e935013acb55d6c2682460f72e0b40ec9157f527 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 02:55:14 +0200 Subject: [PATCH 08/17] update e2e flags tests Signed-off-by: Tim Vaillancourt --- go/flags/endtoend/vtbackup.txt | 2 +- go/flags/endtoend/vtgr.txt | 2 +- go/flags/endtoend/vtorc.txt | 2 +- go/flags/endtoend/vttestserver.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go/flags/endtoend/vtbackup.txt b/go/flags/endtoend/vtbackup.txt index efdff99b732..29029fc1501 100644 --- a/go/flags/endtoend/vtbackup.txt +++ b/go/flags/endtoend/vtbackup.txt @@ -155,7 +155,7 @@ Usage of vtbackup: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtgr.txt b/go/flags/endtoend/vtgr.txt index 9460616b6cf..67d85bae4b1 100644 --- a/go/flags/endtoend/vtgr.txt +++ b/go/flags/endtoend/vtgr.txt @@ -54,7 +54,7 @@ Usage of vtgr: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtorc.txt b/go/flags/endtoend/vtorc.txt index b3196e929a2..e9c1a5358ed 100644 --- a/go/flags/endtoend/vtorc.txt +++ b/go/flags/endtoend/vtorc.txt @@ -64,7 +64,7 @@ Usage of vtorc: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vttestserver.txt b/go/flags/endtoend/vttestserver.txt index e6befce3122..42cdb5488a3 100644 --- a/go/flags/endtoend/vttestserver.txt +++ b/go/flags/endtoend/vttestserver.txt @@ -112,7 +112,7 @@ Usage of vttestserver: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_zk_auth_file string auth to use when connecting to the zk topo server, file contents should be :, e.g., digest:user:pass From a303aad978c087b13d6485e9114e9ec1393a84b8 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 03:40:33 +0200 Subject: [PATCH 09/17] revert go test -v Signed-off-by: Tim Vaillancourt --- tools/unit_test_race.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/unit_test_race.sh b/tools/unit_test_race.sh index 2a588042888..4cec1f365a9 100755 --- a/tools/unit_test_race.sh +++ b/tools/unit_test_race.sh @@ -36,7 +36,7 @@ all_except_flaky_tests=$(echo "$packages_with_tests" | grep -vE ".+ .+_flaky_tes flaky_tests=$(echo "$packages_with_tests" | grep -E ".+ .+_flaky_test\.go" | cut -d" " -f1) # Run non-flaky tests. -echo "$all_except_flaky_tests" | xargs go test -v $VT_GO_PARALLEL -race -count=1 +echo "$all_except_flaky_tests" | xargs go test $VT_GO_PARALLEL -race -count=1 if [ $? -ne 0 ]; then echo "ERROR: Go unit tests failed. See above for errors." echo @@ -52,7 +52,7 @@ for pkg in $flaky_tests; do max_attempts=3 attempt=1 # Set a timeout because some tests may deadlock when they flake. - until go test -v -timeout 2m $VT_GO_PARALLEL $pkg -race -count=1; do + until go test -timeout 2m $VT_GO_PARALLEL $pkg -race -count=1; do echo "FAILED (try $attempt/$max_attempts) in $pkg (return code $?). See above for errors." if [ $((++attempt)) -gt $max_attempts ]; then echo "ERROR: Flaky Go unit tests in package $pkg failed too often (after $max_attempts retries). Please reduce the flakiness." From 622268e78b0871786a22c3b80a4c722e58ecb492 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 03:43:03 +0200 Subject: [PATCH 10/17] reorder Signed-off-by: Tim Vaillancourt --- go/vt/topo/consultopo/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/topo/consultopo/server.go b/go/vt/topo/consultopo/server.go index a21633aadd8..95269732916 100644 --- a/go/vt/topo/consultopo/server.go +++ b/go/vt/topo/consultopo/server.go @@ -144,10 +144,10 @@ func NewServer(cell, serverAddr, root string) (*Server, error) { return nil, err } cfg := api.DefaultConfig() + cfg.Address = serverAddr cfg.Transport.MaxConnsPerHost = consulMaxConnsPerHost cfg.Transport.MaxIdleConns = consulMaxIdleConns cfg.Transport.IdleConnTimeout = consulIdleConnTimeout - cfg.Address = serverAddr if creds != nil { if creds[cell] != nil { cfg.Token = creds[cell].ACLToken From 551a353546decbf7f136207eacf9f9db47fa0c98 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sun, 19 May 2024 22:51:17 +0200 Subject: [PATCH 11/17] `slack-vitess-r15.0.5`: fix race in `consultopo` patch Signed-off-by: Tim Vaillancourt From f12125534e3181f0ea4d520995b708a08ff621b1 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 20 May 2024 00:01:15 +0200 Subject: [PATCH 12/17] fix race in .HandleFunc() from servenv Signed-off-by: Tim Vaillancourt --- go/vt/servenv/exporter.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/vt/servenv/exporter.go b/go/vt/servenv/exporter.go index 397be415581..d8eb4ef428d 100644 --- a/go/vt/servenv/exporter.go +++ b/go/vt/servenv/exporter.go @@ -102,6 +102,7 @@ type Exporter struct { name, label string handleFuncs map[string]*handleFunc sp *statusPage + mu sync.Mutex } // NewExporter creates a new Exporter with name as namespace. @@ -154,6 +155,8 @@ func (e *Exporter) URLPrefix() string { // url remapped from /path to /name/path. If name is empty, the request // is passed through to http.HandleFunc. func (e *Exporter) HandleFunc(url string, f func(w http.ResponseWriter, r *http.Request)) { + e.mu.Lock() + defer e.mu.Unlock() if e.name == "" { http.HandleFunc(url, f) return From de170de3857a7ab589493d5b1ce5f4c4f0c3e803 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 20 May 2024 00:34:17 +0200 Subject: [PATCH 13/17] add lock to 'IsFlagProvided(...)' Signed-off-by: Tim Vaillancourt --- go/internal/flag/flag.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go/internal/flag/flag.go b/go/internal/flag/flag.go index e126f3c45f5..7f052ced2b2 100644 --- a/go/internal/flag/flag.go +++ b/go/internal/flag/flag.go @@ -29,12 +29,15 @@ import ( "os" "reflect" "strings" + "sync" flag "github.com/spf13/pflag" "vitess.io/vitess/go/vt/log" ) +var flagsMu sync.Mutex + // Parse wraps the standard library's flag.Parse to perform some sanity checking // and issue deprecation warnings in advance of our move to pflag. // @@ -72,6 +75,8 @@ func Parse(fs *flag.FlagSet) { // IsFlagProvided returns if the given flag has been provided by the user explicitly or not func IsFlagProvided(name string) bool { + flagsMu.Lock() + defer flagsMu.Unlock() found := false flag.Visit(func(f *flag.Flag) { if f.Name == name { From 8911a6d633573e899ee3df322e059a00767de028 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 20 May 2024 05:39:53 +0200 Subject: [PATCH 14/17] skip TestCrossCellDurability on 'old'/v14 tests Signed-off-by: Tim Vaillancourt --- .../upgrade_downgrade_test_reparent_old_vtctl.yml | 1 + .../upgrade_downgrade_test_reparent_old_vttablet.yml | 1 + go/internal/flag/flag.go | 8 ++++++++ .../endtoend/reparent/plannedreparent/reparent_test.go | 4 ++++ 4 files changed, 14 insertions(+) diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml index 853974c96af..629919397df 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml @@ -191,4 +191,5 @@ jobs: mkdir -p /tmp/vtdataroot source build.env + export SKIPTESTCROSSCELLDURABILITY=1 # skip on v14 eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml index 0f45494080c..2c38f343989 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml @@ -188,4 +188,5 @@ jobs: mkdir -p /tmp/vtdataroot source build.env + export SKIPTESTCROSSCELLDURABILITY=1 # skip on v14 eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent diff --git a/go/internal/flag/flag.go b/go/internal/flag/flag.go index 7f052ced2b2..5e36941e480 100644 --- a/go/internal/flag/flag.go +++ b/go/internal/flag/flag.go @@ -47,6 +47,8 @@ var flagsMu sync.Mutex // // See VEP-4, phase 1 for details: https://github.com/vitessio/enhancements/blob/c766ea905e55409cddeb666d6073cd2ac4c9783e/veps/vep-4.md#phase-1-preparation func Parse(fs *flag.FlagSet) { + flagsMu.Lock() + defer flagsMu.Unlock() preventGlogVFlagFromClobberingVersionFlagShorthand(fs) fs.AddGoFlagSet(goflag.CommandLine) @@ -176,6 +178,8 @@ func filterTestFlags() ([]string, []string) { // handle `go test` flags correctly. We need to separately parse the test flags using goflags. Additionally flags // like test.Short() require that goflag.Parse() is called first. func ParseFlagsForTest() { + flagsMu.Lock() + defer flagsMu.Unlock() // We need to split up the test flags and the regular app pflags. // Then hand them off the std flags and pflags parsers respectively. args, testFlags := filterTestFlags() @@ -207,6 +211,8 @@ func Parsed() bool { // standard library `flag` CommandLine. If found in the latter, it is converted // to a pflag.Flag first. If found in neither, this function returns nil. func Lookup(name string) *flag.Flag { + flagsMu.Lock() + defer flagsMu.Unlock() if f := flag.Lookup(name); f != nil { return f } @@ -222,6 +228,8 @@ func Lookup(name string) *flag.Flag { // removed. If no double-dash was specified on the command-line, this is // equivalent to flag.Args() from the standard library flag package. func Args() (args []string) { + flagsMu.Lock() + defer flagsMu.Unlock() doubleDashIdx := -1 for i, arg := range flag.Args() { if arg == "--" { diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index c2f2b948d71..fce456d4266 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -19,6 +19,7 @@ package plannedreparent import ( "context" "fmt" + "os" "strconv" "testing" "time" @@ -392,6 +393,9 @@ func TestReparentDoesntHangIfPrimaryFails(t *testing.T) { // 1. When PRS is run with the cross_cell durability policy setup, then the semi-sync settings on all the tablets are as expected // 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any manual intervention func TestCrossCellDurability(t *testing.T) { + if os.Getenv("SKIPTESTCROSSCELLDURABILITY") == "1" { + return + } defer cluster.PanicHandler(t) clusterInstance := utils.SetupReparentCluster(t, "cross_cell") defer utils.TeardownCluster(clusterInstance) From 9f54ac2b8531309c0bbf9b85e4d6ed469d23c9cf Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 20 May 2024 05:45:32 +0200 Subject: [PATCH 15/17] Add log Signed-off-by: Tim Vaillancourt --- go/test/endtoend/reparent/plannedreparent/reparent_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index fce456d4266..3b7c0d759b5 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -394,6 +394,7 @@ func TestReparentDoesntHangIfPrimaryFails(t *testing.T) { // 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any manual intervention func TestCrossCellDurability(t *testing.T) { if os.Getenv("SKIPTESTCROSSCELLDURABILITY") == "1" { + t.Log("skipping due to SKIPTESTCROSSCELLDURABILITY=1") return } defer cluster.PanicHandler(t) From a3b2660f5f83b64c6821605042a003eea4bf1c6a Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 20 May 2024 05:54:15 +0200 Subject: [PATCH 16/17] improve comment Signed-off-by: Tim Vaillancourt --- .../workflows/upgrade_downgrade_test_reparent_old_vtctl.yml | 5 ++++- .../upgrade_downgrade_test_reparent_old_vttablet.yml | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml index 629919397df..e9e63024858 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml @@ -191,5 +191,8 @@ jobs: mkdir -p /tmp/vtdataroot source build.env - export SKIPTESTCROSSCELLDURABILITY=1 # skip on v14 + + # skip TestCrossCellDurability test on v14 (as previous). It doesn't setup semi-sync the way this test (from v16) expects + export SKIPTESTCROSSCELLDURABILITY=1 + eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml index 2c38f343989..c0b08f1acd7 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml @@ -188,5 +188,8 @@ jobs: mkdir -p /tmp/vtdataroot source build.env - export SKIPTESTCROSSCELLDURABILITY=1 # skip on v14 + + # skip TestCrossCellDurability test on v14 (as previous). It doesn't setup semi-sync the way this test (from v16) expects + export SKIPTESTCROSSCELLDURABILITY=1 + eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent From e5b8e924e47866686fe5893c70994b26dca26d45 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 20 May 2024 17:29:12 +0200 Subject: [PATCH 17/17] print offending primaryStatusString Signed-off-by: Tim Vaillancourt --- go/test/endtoend/reparent/plannedreparent/reparent_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index 3b7c0d759b5..1e47c6d3c64 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -448,6 +448,9 @@ func TestFullStatus(t *testing.T) { require.NoError(t, err) primaryStatus := &replicationdatapb.FullStatus{} err = protojson.Unmarshal([]byte(primaryStatusString), primaryStatus) + if err != nil { + t.Logf("TestFullStatus got primaryStatusString: %s", string(primaryStatusString)) + } require.NoError(t, err) assert.NotEmpty(t, primaryStatus.ServerUuid) assert.NotEmpty(t, primaryStatus.ServerId)