diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml index 853974c96af..e9e63024858 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml @@ -191,4 +191,8 @@ jobs: mkdir -p /tmp/vtdataroot source build.env + + # skip TestCrossCellDurability test on v14 (as previous). It doesn't setup semi-sync the way this test (from v16) expects + export SKIPTESTCROSSCELLDURABILITY=1 + eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml index 0f45494080c..c0b08f1acd7 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml @@ -188,4 +188,8 @@ jobs: mkdir -p /tmp/vtdataroot source build.env + + # skip TestCrossCellDurability test on v14 (as previous). It doesn't setup semi-sync the way this test (from v16) expects + export SKIPTESTCROSSCELLDURABILITY=1 + eatmydata -- go run test.go -skip-build -keep-data=false -docker=false -print-log -follow -tag upgrade_downgrade_reparent diff --git a/go.mod b/go.mod index 63317711cbd..944bccf8ec6 100644 --- a/go.mod +++ b/go.mod @@ -117,6 +117,7 @@ require ( require ( github.com/bndr/gotabulate v1.1.2 + github.com/hashicorp/go-cleanhttp v0.5.1 github.com/hashicorp/go-version v1.6.0 github.com/planetscale/log v0.0.0-20221118170849-fb599bc35c50 github.com/slok/noglog v0.2.0 @@ -147,7 +148,6 @@ require ( github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/google/gofuzz v1.1.0 // indirect github.com/googleapis/gax-go/v2 v2.0.5 // indirect - github.com/hashicorp/go-cleanhttp v0.5.1 // indirect github.com/hashicorp/go-hclog v0.12.0 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect github.com/hashicorp/golang-lru v0.5.1 // indirect diff --git a/go/flags/endtoend/vtbackup.txt b/go/flags/endtoend/vtbackup.txt index efdff99b732..29029fc1501 100644 --- a/go/flags/endtoend/vtbackup.txt +++ b/go/flags/endtoend/vtbackup.txt @@ -155,7 +155,7 @@ Usage of vtbackup: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtctld.txt b/go/flags/endtoend/vtctld.txt index 628015216ca..ae213a80119 100644 --- a/go/flags/endtoend/vtctld.txt +++ b/go/flags/endtoend/vtctld.txt @@ -124,7 +124,7 @@ Usage of vtctld: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index 8a8b55b3a6f..f29885644c4 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -171,7 +171,7 @@ Usage of vtgate: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtgr.txt b/go/flags/endtoend/vtgr.txt index 9460616b6cf..67d85bae4b1 100644 --- a/go/flags/endtoend/vtgr.txt +++ b/go/flags/endtoend/vtgr.txt @@ -54,7 +54,7 @@ Usage of vtgr: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vtorc.txt b/go/flags/endtoend/vtorc.txt index b3196e929a2..e9c1a5358ed 100644 --- a/go/flags/endtoend/vtorc.txt +++ b/go/flags/endtoend/vtorc.txt @@ -64,7 +64,7 @@ Usage of vtorc: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt index 5ef27dc0ac6..5cd22bacf55 100644 --- a/go/flags/endtoend/vttablet.txt +++ b/go/flags/endtoend/vttablet.txt @@ -308,7 +308,7 @@ Usage of vttablet: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_etcd_lease_ttl int Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going. (default 30) diff --git a/go/flags/endtoend/vttestserver.txt b/go/flags/endtoend/vttestserver.txt index e6befce3122..42cdb5488a3 100644 --- a/go/flags/endtoend/vttestserver.txt +++ b/go/flags/endtoend/vttestserver.txt @@ -112,7 +112,7 @@ Usage of vttestserver: --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) --topo_consul_lock_session_checks string List of checks for consul session. (default "serfHealth") --topo_consul_lock_session_ttl string TTL for consul session. - --topo_consul_max_conns_per_host int Maximum number of consul connections per host. + --topo_consul_max_conns_per_host int Maximum number of consul connections per host. (default 250) --topo_consul_max_idle_conns int Maximum number of idle consul connections. (default 100) --topo_consul_watch_poll_duration duration time of the long poll for watch queries. (default 30s) --topo_zk_auth_file string auth to use when connecting to the zk topo server, file contents should be :, e.g., digest:user:pass diff --git a/go/internal/flag/flag.go b/go/internal/flag/flag.go index e126f3c45f5..5e36941e480 100644 --- a/go/internal/flag/flag.go +++ b/go/internal/flag/flag.go @@ -29,12 +29,15 @@ import ( "os" "reflect" "strings" + "sync" flag "github.com/spf13/pflag" "vitess.io/vitess/go/vt/log" ) +var flagsMu sync.Mutex + // Parse wraps the standard library's flag.Parse to perform some sanity checking // and issue deprecation warnings in advance of our move to pflag. // @@ -44,6 +47,8 @@ import ( // // See VEP-4, phase 1 for details: https://github.com/vitessio/enhancements/blob/c766ea905e55409cddeb666d6073cd2ac4c9783e/veps/vep-4.md#phase-1-preparation func Parse(fs *flag.FlagSet) { + flagsMu.Lock() + defer flagsMu.Unlock() preventGlogVFlagFromClobberingVersionFlagShorthand(fs) fs.AddGoFlagSet(goflag.CommandLine) @@ -72,6 +77,8 @@ func Parse(fs *flag.FlagSet) { // IsFlagProvided returns if the given flag has been provided by the user explicitly or not func IsFlagProvided(name string) bool { + flagsMu.Lock() + defer flagsMu.Unlock() found := false flag.Visit(func(f *flag.Flag) { if f.Name == name { @@ -171,6 +178,8 @@ func filterTestFlags() ([]string, []string) { // handle `go test` flags correctly. We need to separately parse the test flags using goflags. Additionally flags // like test.Short() require that goflag.Parse() is called first. func ParseFlagsForTest() { + flagsMu.Lock() + defer flagsMu.Unlock() // We need to split up the test flags and the regular app pflags. // Then hand them off the std flags and pflags parsers respectively. args, testFlags := filterTestFlags() @@ -202,6 +211,8 @@ func Parsed() bool { // standard library `flag` CommandLine. If found in the latter, it is converted // to a pflag.Flag first. If found in neither, this function returns nil. func Lookup(name string) *flag.Flag { + flagsMu.Lock() + defer flagsMu.Unlock() if f := flag.Lookup(name); f != nil { return f } @@ -217,6 +228,8 @@ func Lookup(name string) *flag.Flag { // removed. If no double-dash was specified on the command-line, this is // equivalent to flag.Args() from the standard library flag package. func Args() (args []string) { + flagsMu.Lock() + defer flagsMu.Unlock() doubleDashIdx := -1 for i, arg := range flag.Args() { if arg == "--" { diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index c2f2b948d71..1e47c6d3c64 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -19,6 +19,7 @@ package plannedreparent import ( "context" "fmt" + "os" "strconv" "testing" "time" @@ -392,6 +393,10 @@ func TestReparentDoesntHangIfPrimaryFails(t *testing.T) { // 1. When PRS is run with the cross_cell durability policy setup, then the semi-sync settings on all the tablets are as expected // 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any manual intervention func TestCrossCellDurability(t *testing.T) { + if os.Getenv("SKIPTESTCROSSCELLDURABILITY") == "1" { + t.Log("skipping due to SKIPTESTCROSSCELLDURABILITY=1") + return + } defer cluster.PanicHandler(t) clusterInstance := utils.SetupReparentCluster(t, "cross_cell") defer utils.TeardownCluster(clusterInstance) @@ -443,6 +448,9 @@ func TestFullStatus(t *testing.T) { require.NoError(t, err) primaryStatus := &replicationdatapb.FullStatus{} err = protojson.Unmarshal([]byte(primaryStatusString), primaryStatus) + if err != nil { + t.Logf("TestFullStatus got primaryStatusString: %s", string(primaryStatusString)) + } require.NoError(t, err) assert.NotEmpty(t, primaryStatus.ServerUuid) assert.NotEmpty(t, primaryStatus.ServerId) diff --git a/go/vt/servenv/exporter.go b/go/vt/servenv/exporter.go index 397be415581..d8eb4ef428d 100644 --- a/go/vt/servenv/exporter.go +++ b/go/vt/servenv/exporter.go @@ -102,6 +102,7 @@ type Exporter struct { name, label string handleFuncs map[string]*handleFunc sp *statusPage + mu sync.Mutex } // NewExporter creates a new Exporter with name as namespace. @@ -154,6 +155,8 @@ func (e *Exporter) URLPrefix() string { // url remapped from /path to /name/path. If name is empty, the request // is passed through to http.HandleFunc. func (e *Exporter) HandleFunc(url string, f func(w http.ResponseWriter, r *http.Request)) { + e.mu.Lock() + defer e.mu.Unlock() if e.name == "" { http.HandleFunc(url, f) return diff --git a/go/vt/topo/consultopo/server.go b/go/vt/topo/consultopo/server.go index 0eeb24728fa..95269732916 100644 --- a/go/vt/topo/consultopo/server.go +++ b/go/vt/topo/consultopo/server.go @@ -28,6 +28,7 @@ import ( "time" "github.com/hashicorp/consul/api" + "github.com/hashicorp/go-cleanhttp" "github.com/spf13/pflag" "vitess.io/vitess/go/vt/log" @@ -38,11 +39,13 @@ import ( var ( consulAuthClientStaticFile string - consulConfig = api.DefaultConfig() // serfHealth is the default check from consul consulLockSessionChecks = "serfHealth" consulLockSessionTTL string - consulLockDelay = 15 * time.Second + consulLockDelay = 15 * time.Second + consulMaxConnsPerHost int = 250 // do not use client default of 0/unlimited + consulMaxIdleConns int + consulIdleConnTimeout time.Duration ) func init() { @@ -50,13 +53,18 @@ func init() { } func registerServerFlags(fs *pflag.FlagSet) { + // cleanhttp.DefaultPooledTransport() is used by the consul api client + // as an *http.Transport. We call it here just to get the default + // values the consul api client will inherit from it later. + defaultConsulPooledTransport := cleanhttp.DefaultPooledTransport() + fs.StringVar(&consulAuthClientStaticFile, "consul_auth_static_file", consulAuthClientStaticFile, "JSON File to read the topos/tokens from.") fs.StringVar(&consulLockSessionChecks, "topo_consul_lock_session_checks", consulLockSessionChecks, "List of checks for consul session.") fs.StringVar(&consulLockSessionTTL, "topo_consul_lock_session_ttl", consulLockSessionTTL, "TTL for consul session.") fs.DurationVar(&consulLockDelay, "topo_consul_lock_delay", consulLockDelay, "LockDelay for consul session.") - fs.IntVar(&consulConfig.Transport.MaxConnsPerHost, "topo_consul_max_conns_per_host", consulConfig.Transport.MaxConnsPerHost, "Maximum number of consul connections per host.") - fs.IntVar(&consulConfig.Transport.MaxIdleConns, "topo_consul_max_idle_conns", consulConfig.Transport.MaxIdleConns, "Maximum number of idle consul connections.") - fs.DurationVar(&consulConfig.Transport.IdleConnTimeout, "topo_consul_idle_conn_timeout", consulConfig.Transport.IdleConnTimeout, "Maximum amount of time to pool idle connections.") + fs.IntVar(&consulMaxConnsPerHost, "topo_consul_max_conns_per_host", consulMaxConnsPerHost, "Maximum number of consul connections per host.") + fs.IntVar(&consulMaxIdleConns, "topo_consul_max_idle_conns", defaultConsulPooledTransport.MaxIdleConns, "Maximum number of idle consul connections.") + fs.DurationVar(&consulIdleConnTimeout, "topo_consul_idle_conn_timeout", defaultConsulPooledTransport.IdleConnTimeout, "Maximum amount of time to pool idle connections.") } // ClientAuthCred credential to use for consul clusters @@ -135,8 +143,11 @@ func NewServer(cell, serverAddr, root string) (*Server, error) { if err != nil { return nil, err } - cfg := consulConfig + cfg := api.DefaultConfig() cfg.Address = serverAddr + cfg.Transport.MaxConnsPerHost = consulMaxConnsPerHost + cfg.Transport.MaxIdleConns = consulMaxIdleConns + cfg.Transport.IdleConnTimeout = consulIdleConnTimeout if creds != nil { if creds[cell] != nil { cfg.Token = creds[cell].ACLToken diff --git a/go/vt/vttest/topoctl.go b/go/vt/vttest/topoctl.go index 2b63900d6d8..1fd4cb6e101 100644 --- a/go/vt/vttest/topoctl.go +++ b/go/vt/vttest/topoctl.go @@ -31,6 +31,7 @@ func (ctl *Topoctl) Setup() error { if err != nil { return err } + defer topoServer.Close() log.Infof("Creating cells if they don't exist in the provided topo server %s %s %s", ctl.TopoImplementation, ctl.TopoGlobalServerAddress, ctl.TopoGlobalRoot) // Create cells if it doesn't exist to be idempotent. Should work when we share the same topo server across multiple local clusters.