diff --git a/doc/releasenotes/16_0_0_summary.md b/doc/releasenotes/16_0_0_summary.md index fd41913b4d4..9b44c6b8177 100644 --- a/doc/releasenotes/16_0_0_summary.md +++ b/doc/releasenotes/16_0_0_summary.md @@ -62,6 +62,14 @@ If you have code searching for error strings from Vitess, this is a breaking cha Many error strings have been tweaked. If your application is searching for specific errors, you might need to update your code. +#### `lock-timeout` and `remote_operation_timeout` Changes +Earlier, the shard and keyspace locks used to be capped by the `remote_operation_timeout`. This is no longer the case and instead a new flag called `lock-timeout` is introduced. +For backward compatibility, if `lock-timeout` is unspecified and `remote_operation_timeout` flag is provided, then its value will also be used for `lock-timeout` as well. +The default value for `remote_operation_timeout` has also changed from 30 seconds to 15 seconds. The default for the new flag `lock-timeout` is 45 seconds. + +During upgrades, if the users want to preserve the same behaviour as previous releases, then they should provide the `remote_operation_timeout` flag explicitly before upgrading. +After the upgrade, they should then alter their configuration to also specify `lock-timeout` explicitly. + ### New command line flags and behavior #### VTGate: Support query timeout --query-timeout @@ -232,4 +240,10 @@ VSchema Example "view2": "select * from t2", } } -``` \ No newline at end of file +``` + +### VTOrc + +#### Flag Deprecations + +The flag `lock-shard-timeout` has been deprecated. Please use the newly introduced `lock-timeout` instead. More detail [here](#lock-timeout-introduction). \ No newline at end of file diff --git a/go/flags/endtoend/mysqlctl.txt b/go/flags/endtoend/mysqlctl.txt index f5b810f0dd0..4fd044196d9 100644 --- a/go/flags/endtoend/mysqlctl.txt +++ b/go/flags/endtoend/mysqlctl.txt @@ -68,7 +68,7 @@ Global flags: --mysqlctl_client_protocol string the protocol to use to talk to the mysqlctl server (default "grpc") --mysqlctl_mycnf_template string template file to use for generating the my.cnf file during server init --mysqlctl_socket string socket file to use for remote mysqlctl actions (empty for local actions) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. --pool_hostname_resolve_interval duration if set force an update to all hostnames and reconnect if changed, defaults to 0 (disabled) diff --git a/go/flags/endtoend/mysqlctld.txt b/go/flags/endtoend/mysqlctld.txt index 50ec38dab3e..e00ac7fac09 100644 --- a/go/flags/endtoend/mysqlctld.txt +++ b/go/flags/endtoend/mysqlctld.txt @@ -72,7 +72,7 @@ Usage of mysqlctld: --mysql_socket string Path to the mysqld socket file --mysqlctl_mycnf_template string template file to use for generating the my.cnf file during server init --mysqlctl_socket string socket file to use for remote mysqlctl actions (empty for local actions) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. --pool_hostname_resolve_interval duration if set force an update to all hostnames and reconnect if changed, defaults to 0 (disabled) diff --git a/go/flags/endtoend/vtbackup.txt b/go/flags/endtoend/vtbackup.txt index 2207babd197..84b4b25a367 100644 --- a/go/flags/endtoend/vtbackup.txt +++ b/go/flags/endtoend/vtbackup.txt @@ -86,6 +86,7 @@ Usage of vtbackup: --keep-alive-timeout duration Wait until timeout elapses after a successful backup before shutting down. --keep_logs duration keep logs for this long (using ctime) (zero to keep forever) --keep_logs_by_mtime duration keep logs for this long (using mtime) (zero to keep forever) + --lock-timeout duration Maximum time for which a shard/keyspace lock can be acquired for (default 45s) --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) --log_dir string If non-empty, write log files in this directory --log_err_stacks log stack traces for errors @@ -119,7 +120,7 @@ Usage of vtbackup: --port int port for the server --pprof strings enable profiling --purge_logs_interval duration how often try to remove old logs (default 1h0m0s) - --remote_operation_timeout duration time to wait for a remote operation (default 30s) + --remote_operation_timeout duration time to wait for a remote operation (default 15s) --restart_before_backup Perform a mysqld clean/full restart after applying binlogs, but before taking the backup. Only makes sense to work around xtrabackup bugs. --s3_backup_aws_endpoint string endpoint of the S3 backend (region must be provided). --s3_backup_aws_region string AWS region to use. (default "us-east-1") diff --git a/go/flags/endtoend/vtctld.txt b/go/flags/endtoend/vtctld.txt index 5cd676b1b0e..f62908aa215 100644 --- a/go/flags/endtoend/vtctld.txt +++ b/go/flags/endtoend/vtctld.txt @@ -1,5 +1,5 @@ Usage of vtctld: - --action_timeout duration time to wait for an action before resorting to force (default 2m0s) + --action_timeout duration time to wait for an action before resorting to force (default 1m0s) --alsologtostderr log to standard error as well as files --azblob_backup_account_key_file string Path to a file containing the Azure Storage account key; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_KEY will be used as the key itself (NOT a file path). --azblob_backup_account_name string Azure Storage Account name for backups; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_NAME will be used. @@ -60,13 +60,14 @@ Usage of vtctld: --keep_logs duration keep logs for this long (using ctime) (zero to keep forever) --keep_logs_by_mtime duration keep logs for this long (using mtime) (zero to keep forever) --lameduck-period duration keep running at least this long after SIGTERM before stopping (default 50ms) + --lock-timeout duration Maximum time for which a shard/keyspace lock can be acquired for (default 45s) --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) --log_dir string If non-empty, write log files in this directory --log_err_stacks log stack traces for errors --log_rotate_max_size uint size in bytes at which logs are rotated (glog.MaxSize) (default 1887436800) --logtostderr log to standard error instead of files --max-stack-size int configure the maximum stack size in bytes (default 67108864) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --opentsdb_uri string URI of opentsdb /api/put method --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. @@ -74,7 +75,7 @@ Usage of vtctld: --pprof strings enable profiling --proxy_tablets Setting this true will make vtctld proxy the tablet status instead of redirecting to them --purge_logs_interval duration how often try to remove old logs (default 1h0m0s) - --remote_operation_timeout duration time to wait for a remote operation (default 30s) + --remote_operation_timeout duration time to wait for a remote operation (default 15s) --s3_backup_aws_endpoint string endpoint of the S3 backend (region must be provided). --s3_backup_aws_region string AWS region to use. (default "us-east-1") --s3_backup_aws_retries int AWS request retries. (default -1) diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index e5e1cb3ed0f..339a70d1515 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -68,6 +68,7 @@ Usage of vtgate: --keyspaces_to_watch strings Specifies which keyspaces this vtgate should have access to while routing queries or accessing the vschema. --lameduck-period duration keep running at least this long after SIGTERM before stopping (default 50ms) --legacy_replication_lag_algorithm Use the legacy algorithm when selecting vttablets for serving. (default true) + --lock-timeout duration Maximum time for which a shard/keyspace lock can be acquired for (default 45s) --lock_heartbeat_time duration If there is lock function used. This will keep the lock connection active by using this heartbeat (default 5s) --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) --log_dir string If non-empty, write log files in this directory @@ -119,7 +120,7 @@ Usage of vtgate: --mysql_tcp_version string Select tcp, tcp4, or tcp6 to control the socket type. (default "tcp") --no_scatter when set to true, the planner will fail instead of producing a plan that includes scatter queries --normalize_queries Rewrite queries with bind vars. Turn this off if the app itself sends normalized queries with bind vars. (default true) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --opentsdb_uri string URI of opentsdb /api/put method --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. @@ -134,7 +135,7 @@ Usage of vtgate: --querylog-format string format for query logs ("text" or "json") (default "text") --querylog-row-threshold uint Number of rows a query has to return or affect before being logged; not useful for streaming queries. 0 means all queries will be logged. --redact-debug-ui-queries redact full queries and bind variables from debug UI - --remote_operation_timeout duration time to wait for a remote operation (default 30s) + --remote_operation_timeout duration time to wait for a remote operation (default 15s) --retry-count int retry count (default 2) --schema_change_signal Enable the schema tracker; requires queryserver-config-schema-change-signal to be enabled on the underlying vttablets for this to work (default true) --schema_change_signal_user string User to be used to send down query to vttablet to retrieve schema changes diff --git a/go/flags/endtoend/vtgr.txt b/go/flags/endtoend/vtgr.txt index 75e7b0a0fc4..9e0798f9fca 100644 --- a/go/flags/endtoend/vtgr.txt +++ b/go/flags/endtoend/vtgr.txt @@ -22,6 +22,7 @@ Usage of vtgr: -h, --help display usage and exit --keep_logs duration keep logs for this long (using ctime) (zero to keep forever) --keep_logs_by_mtime duration keep logs for this long (using mtime) (zero to keep forever) + --lock-timeout duration Maximum time for which a shard/keyspace lock can be acquired for (default 45s) --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) --log_dir string If non-empty, write log files in this directory --log_err_stacks log stack traces for errors @@ -31,7 +32,7 @@ Usage of vtgr: --pprof strings enable profiling --purge_logs_interval duration how often try to remove old logs (default 1h0m0s) --refresh_interval duration Refresh interval to load tablets. (default 10s) - --remote_operation_timeout duration time to wait for a remote operation (default 30s) + --remote_operation_timeout duration time to wait for a remote operation (default 15s) --scan_interval duration Scan interval to diagnose and repair. (default 3s) --scan_repair_timeout duration Time to wait for a Diagnose and repair operation. (default 3s) --security_policy string the name of a registered security policy to use for controlling access to URLs - empty means allow all for anyone (built-in policies: deny-all, read-only) diff --git a/go/flags/endtoend/vtorc.txt b/go/flags/endtoend/vtorc.txt index 254b3fd8331..68ffda88f1e 100644 --- a/go/flags/endtoend/vtorc.txt +++ b/go/flags/endtoend/vtorc.txt @@ -23,14 +23,14 @@ Usage of vtorc: --keep_logs duration keep logs for this long (using ctime) (zero to keep forever) --keep_logs_by_mtime duration keep logs for this long (using mtime) (zero to keep forever) --lameduck-period duration keep running at least this long after SIGTERM before stopping (default 50ms) - --lock-shard-timeout duration Duration for which a shard lock is held when running a recovery (default 30s) + --lock-timeout duration Maximum time for which a shard/keyspace lock can be acquired for (default 45s) --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) --log_dir string If non-empty, write log files in this directory --log_err_stacks log stack traces for errors --log_rotate_max_size uint size in bytes at which logs are rotated (glog.MaxSize) (default 1887436800) --logtostderr log to standard error instead of files --max-stack-size int configure the maximum stack size in bytes (default 67108864) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. --port int port for the server @@ -40,7 +40,7 @@ Usage of vtorc: --reasonable-replication-lag duration Maximum replication lag on replicas which is deemed to be acceptable (default 10s) --recovery-period-block-duration duration Duration for which a new recovery is blocked on an instance after running a recovery (default 30s) --recovery-poll-duration duration Timer duration on which VTOrc polls its database to run a recovery (default 1s) - --remote_operation_timeout duration time to wait for a remote operation (default 30s) + --remote_operation_timeout duration time to wait for a remote operation (default 15s) --security_policy string the name of a registered security policy to use for controlling access to URLs - empty means allow all for anyone (built-in policies: deny-all, read-only) --shutdown_wait_time duration Maximum time to wait for VTOrc to release all the locks that it is holding before shutting down on SIGTERM (default 30s) --snapshot-topology-interval duration Timer duration on which VTOrc takes a snapshot of the current MySQL information it has in the database. Should be in multiple of hours diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt index ff445a03ddc..ea529bd0bd2 100644 --- a/go/flags/endtoend/vttablet.txt +++ b/go/flags/endtoend/vttablet.txt @@ -160,6 +160,7 @@ Usage of vttablet: --keep_logs duration keep logs for this long (using ctime) (zero to keep forever) --keep_logs_by_mtime duration keep logs for this long (using mtime) (zero to keep forever) --lameduck-period duration keep running at least this long after SIGTERM before stopping (default 50ms) + --lock-timeout duration Maximum time for which a shard/keyspace lock can be acquired for (default 45s) --lock_tables_timeout duration How long to keep the table locked before timing out (default 1m0s) --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) --log_dir string If non-empty, write log files in this directory @@ -192,7 +193,7 @@ Usage of vttablet: --mysql_server_version string MySQL server version to advertise. --mysqlctl_mycnf_template string template file to use for generating the my.cnf file during server init --mysqlctl_socket string socket file to use for remote mysqlctl actions (empty for local actions) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --opentsdb_uri string URI of opentsdb /api/put method --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. @@ -242,7 +243,7 @@ Usage of vttablet: --redact-debug-ui-queries redact full queries and bind variables from debug UI --relay_log_max_items int Maximum number of rows for VReplication target buffering. (default 5000) --relay_log_max_size int Maximum buffer size (in bytes) for VReplication target buffering. If single rows are larger than this, a single row is buffered at a time. (default 250000) - --remote_operation_timeout duration time to wait for a remote operation (default 30s) + --remote_operation_timeout duration time to wait for a remote operation (default 15s) --replication_connect_retry duration how long to wait in between replica reconnect attempts. Only precise to the second. (default 10s) --restore_concurrency int (init restore parameter) how many concurrent files to restore at once (default 4) --restore_from_backup (init restore parameter) will check BackupStorage for a recent backup at startup and start there diff --git a/go/flags/endtoend/vttestserver.txt b/go/flags/endtoend/vttestserver.txt index fbe1c130f70..df27f46bcb8 100644 --- a/go/flags/endtoend/vttestserver.txt +++ b/go/flags/endtoend/vttestserver.txt @@ -76,7 +76,7 @@ Usage of vttestserver: --mysqlctl_socket string socket file to use for remote mysqlctl actions (empty for local actions) --null_probability float The probability to initialize a field with 'NULL' if --initialize_with_random_data is true. Only applies to fields that can contain NULL values. (default 0.1) --num_shards strings Comma separated shard count (one per keyspace) (default [2]) - --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 1ns) + --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) --persistent_mode If this flag is set, the MySQL data directory is not cleaned up when LocalCluster.TearDown() is called. This is useful for running vttestserver as a database container in local developer environments. Note that db migration files (--schema_dir option) and seeding of random data (--initialize_with_random_data option) will only run during cluster startup if the data directory does not already exist. vschema migrations are run every time the cluster starts, since persistence for the topology server has not been implemented yet --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. diff --git a/go/internal/flag/flag.go b/go/internal/flag/flag.go index 1f55da7b4ee..e126f3c45f5 100644 --- a/go/internal/flag/flag.go +++ b/go/internal/flag/flag.go @@ -70,6 +70,17 @@ func Parse(fs *flag.FlagSet) { flag.Parse() } +// IsFlagProvided returns if the given flag has been provided by the user explicitly or not +func IsFlagProvided(name string) bool { + found := false + flag.Visit(func(f *flag.Flag) { + if f.Name == name { + found = true + } + }) + return found +} + // TrickGlog tricks glog into understanding that flags have been parsed. // // N.B. Do not delete this function. `glog` is a persnickity package and wants diff --git a/go/test/endtoend/reparent/newfeaturetest/reparent_test.go b/go/test/endtoend/reparent/newfeaturetest/reparent_test.go index b450fb44420..fb4026703a4 100644 --- a/go/test/endtoend/reparent/newfeaturetest/reparent_test.go +++ b/go/test/endtoend/reparent/newfeaturetest/reparent_test.go @@ -17,155 +17,41 @@ limitations under the License. package newfeaturetest import ( - "strconv" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "google.golang.org/protobuf/encoding/protojson" - - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/reparent/utils" - replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" ) -// TestCrossCellDurability tests 2 things - -// 1. When PRS is run with the cross_cell durability policy setup, then the semi-sync settings on all the tablets are as expected -// 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any external interference -func TestCrossCellDurability(t *testing.T) { - defer cluster.PanicHandler(t) - clusterInstance := utils.SetupReparentCluster(t, "cross_cell") - defer utils.TeardownCluster(clusterInstance) - tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets - - utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) - - // When tablets[0] is the primary, the only tablet in a different cell is tablets[3]. - // So the other two should have semi-sync turned off - utils.CheckSemiSyncSetupCorrectly(t, tablets[0], "ON") - utils.CheckSemiSyncSetupCorrectly(t, tablets[3], "ON") - utils.CheckSemiSyncSetupCorrectly(t, tablets[1], "OFF") - utils.CheckSemiSyncSetupCorrectly(t, tablets[2], "OFF") - - // Run forced reparent operation, this should proceed unimpeded. - out, err := utils.Prs(t, clusterInstance, tablets[3]) - require.NoError(t, err, out) - - utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2]}) - - // All the tablets will have semi-sync setup since tablets[3] is in Cell2 and all - // others are in Cell1, so all of them are eligible to send semi-sync ACKs - for _, tablet := range tablets { - utils.CheckSemiSyncSetupCorrectly(t, tablet, "ON") - } - - for i, supportsBackup := range []bool{false, true} { - // Bring up a new replica tablet - // In this new tablet, we do not disable active reparents, otherwise replication will not be started. - newReplica := utils.StartNewVTTablet(t, clusterInstance, 300+i, supportsBackup) - // Add the tablet to the list of tablets in this shard - clusterInstance.Keyspaces[0].Shards[0].Vttablets = append(clusterInstance.Keyspaces[0].Shards[0].Vttablets, newReplica) - // Check that we can replicate to it and semi-sync is setup correctly on it - utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2], newReplica}) - utils.CheckSemiSyncSetupCorrectly(t, newReplica, "ON") - } -} - -// TestFullStatus tests that the RPC FullStatus works as intended. -func TestFullStatus(t *testing.T) { +// TestRecoverWithMultipleVttabletFailures tests that ERS succeeds with the default values +// even when there are multiple vttablet failures. In this test we use the semi_sync policy +// to allow multiple failures to happen and still be recoverable. +// The test takes down the vttablets of the primary and a rdonly tablet and runs ERS with the +// default values of remote_operation_timeout, lock-timeout flags and wait_replicas_timeout subflag. +func TestRecoverWithMultipleVttabletFailures(t *testing.T) { defer cluster.PanicHandler(t) clusterInstance := utils.SetupReparentCluster(t, "semi_sync") defer utils.TeardownCluster(clusterInstance) tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) - // Check that full status gives the correct result for a primary tablet - primaryStatusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", tablets[0].Alias) + // make tablets[1] a rdonly tablet. + err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly") require.NoError(t, err) - primaryStatus := &replicationdatapb.FullStatus{} - err = protojson.Unmarshal([]byte(primaryStatusString), primaryStatus) - require.NoError(t, err) - assert.NotEmpty(t, primaryStatus.ServerUuid) - assert.NotEmpty(t, primaryStatus.ServerId) - // For a primary tablet there is no replication status - assert.Nil(t, primaryStatus.ReplicationStatus) - assert.Contains(t, primaryStatus.PrimaryStatus.String(), "vt-0000000101-bin") - assert.Equal(t, primaryStatus.GtidPurged, "MySQL56/") - assert.False(t, primaryStatus.ReadOnly) - assert.True(t, primaryStatus.SemiSyncPrimaryEnabled) - assert.True(t, primaryStatus.SemiSyncReplicaEnabled) - assert.True(t, primaryStatus.SemiSyncPrimaryStatus) - assert.False(t, primaryStatus.SemiSyncReplicaStatus) - assert.EqualValues(t, 3, primaryStatus.SemiSyncPrimaryClients) - assert.EqualValues(t, 1000000000000000000, primaryStatus.SemiSyncPrimaryTimeout) - assert.EqualValues(t, 1, primaryStatus.SemiSyncWaitForReplicaCount) - assert.Equal(t, "ROW", primaryStatus.BinlogFormat) - assert.Equal(t, "FULL", primaryStatus.BinlogRowImage) - assert.Equal(t, "ON", primaryStatus.GtidMode) - assert.True(t, primaryStatus.LogReplicaUpdates) - assert.True(t, primaryStatus.LogBinEnabled) - assert.Regexp(t, `[58]\.[07].*`, primaryStatus.Version) - assert.NotEmpty(t, primaryStatus.VersionComment) - // Check that full status gives the correct result for a replica tablet - replicaStatusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", tablets[1].Alias) - require.NoError(t, err) - replicaStatus := &replicationdatapb.FullStatus{} - err = protojson.Unmarshal([]byte(replicaStatusString), replicaStatus) - require.NoError(t, err) - assert.NotEmpty(t, replicaStatus.ServerUuid) - assert.NotEmpty(t, replicaStatus.ServerId) - assert.Contains(t, replicaStatus.ReplicationStatus.Position, "MySQL56/"+replicaStatus.ReplicationStatus.SourceUuid) - assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.IoState) - assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.SqlState) - assert.Equal(t, fileNameFromPosition(replicaStatus.ReplicationStatus.FilePosition), fileNameFromPosition(primaryStatus.PrimaryStatus.FilePosition)) - assert.LessOrEqual(t, rowNumberFromPosition(replicaStatus.ReplicationStatus.FilePosition), rowNumberFromPosition(primaryStatus.PrimaryStatus.FilePosition)) - assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogSourceBinlogEquivalentPosition, primaryStatus.PrimaryStatus.FilePosition) - assert.Contains(t, replicaStatus.ReplicationStatus.RelayLogFilePosition, "vt-0000000102-relay") - assert.Equal(t, replicaStatus.ReplicationStatus.Position, primaryStatus.PrimaryStatus.Position) - assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogPosition, primaryStatus.PrimaryStatus.Position) - assert.Empty(t, replicaStatus.ReplicationStatus.LastIoError) - assert.Empty(t, replicaStatus.ReplicationStatus.LastSqlError) - assert.Equal(t, replicaStatus.ReplicationStatus.SourceUuid, primaryStatus.ServerUuid) - assert.LessOrEqual(t, int(replicaStatus.ReplicationStatus.ReplicationLagSeconds), 1) - assert.False(t, replicaStatus.ReplicationStatus.ReplicationLagUnknown) - assert.EqualValues(t, 0, replicaStatus.ReplicationStatus.SqlDelay) - assert.False(t, replicaStatus.ReplicationStatus.SslAllowed) - assert.False(t, replicaStatus.ReplicationStatus.HasReplicationFilters) - assert.False(t, replicaStatus.ReplicationStatus.UsingGtid) - assert.True(t, replicaStatus.ReplicationStatus.AutoPosition) - assert.Equal(t, replicaStatus.ReplicationStatus.SourceHost, utils.Hostname) - assert.EqualValues(t, replicaStatus.ReplicationStatus.SourcePort, tablets[0].MySQLPort) - assert.Equal(t, replicaStatus.ReplicationStatus.SourceUser, "vt_repl") - assert.Contains(t, replicaStatus.PrimaryStatus.String(), "vt-0000000102-bin") - assert.Equal(t, replicaStatus.GtidPurged, "MySQL56/") - assert.True(t, replicaStatus.ReadOnly) - assert.False(t, replicaStatus.SemiSyncPrimaryEnabled) - assert.True(t, replicaStatus.SemiSyncReplicaEnabled) - assert.False(t, replicaStatus.SemiSyncPrimaryStatus) - assert.True(t, replicaStatus.SemiSyncReplicaStatus) - assert.EqualValues(t, 0, replicaStatus.SemiSyncPrimaryClients) - assert.EqualValues(t, 1000000000000000000, replicaStatus.SemiSyncPrimaryTimeout) - assert.EqualValues(t, 1, replicaStatus.SemiSyncWaitForReplicaCount) - assert.Equal(t, "ROW", replicaStatus.BinlogFormat) - assert.Equal(t, "FULL", replicaStatus.BinlogRowImage) - assert.Equal(t, "ON", replicaStatus.GtidMode) - assert.True(t, replicaStatus.LogReplicaUpdates) - assert.True(t, replicaStatus.LogBinEnabled) - assert.Regexp(t, `[58]\.[07].*`, replicaStatus.Version) - assert.NotEmpty(t, replicaStatus.VersionComment) -} + // Confirm that replication is still working as intended + utils.ConfirmReplication(t, tablets[0], tablets[1:]) -// fileNameFromPosition gets the file name from the position -func fileNameFromPosition(pos string) string { - return pos[0 : len(pos)-4] -} + // Make the rdonly and primary tablets and databases unavailable. + utils.StopTablet(t, tablets[1], true) + utils.StopTablet(t, tablets[0], true) + + // We expect this to succeed since we only have 1 primary eligible tablet which is down + out, err := utils.Ers(clusterInstance, nil, "", "") + require.NoError(t, err, out) -// rowNumberFromPosition gets the row number from the position -func rowNumberFromPosition(pos string) int { - rowNumStr := pos[len(pos)-4:] - rowNum, _ := strconv.Atoi(rowNumStr) - return rowNum + newPrimary := utils.GetNewPrimary(t, clusterInstance) + utils.ConfirmReplication(t, newPrimary, []*cluster.Vttablet{tablets[2], tablets[3]}) } diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index 66db2908380..72f560a9bc0 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -19,15 +19,20 @@ package plannedreparent import ( "context" "fmt" + "strconv" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/encoding/protojson" + + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/reparent/utils" "vitess.io/vitess/go/vt/log" + replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" ) func TestPrimaryToSpareStateChangeImpossible(t *testing.T) { @@ -376,3 +381,142 @@ func TestReparentDoesntHangIfPrimaryFails(t *testing.T) { require.Error(t, err) assert.Contains(t, out, "primary failed to PopulateReparentJournal") } + +// TestCrossCellDurability tests 2 things - +// 1. When PRS is run with the cross_cell durability policy setup, then the semi-sync settings on all the tablets are as expected +// 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any manual intervention +func TestCrossCellDurability(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "cross_cell") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + + utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + + // When tablets[0] is the primary, the only tablet in a different cell is tablets[3]. + // So the other two should have semi-sync turned off + utils.CheckSemiSyncSetupCorrectly(t, tablets[0], "ON") + utils.CheckSemiSyncSetupCorrectly(t, tablets[3], "ON") + utils.CheckSemiSyncSetupCorrectly(t, tablets[1], "OFF") + utils.CheckSemiSyncSetupCorrectly(t, tablets[2], "OFF") + + // Run forced reparent operation, this should proceed unimpeded. + out, err := utils.Prs(t, clusterInstance, tablets[3]) + require.NoError(t, err, out) + + utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2]}) + + // All the tablets will have semi-sync setup since tablets[3] is in Cell2 and all + // others are in Cell1, so all of them are eligible to send semi-sync ACKs + for _, tablet := range tablets { + utils.CheckSemiSyncSetupCorrectly(t, tablet, "ON") + } + + for i, supportsBackup := range []bool{false, true} { + // Bring up a new replica tablet + // In this new tablet, we do not disable active reparents, otherwise replication will not be started. + newReplica := utils.StartNewVTTablet(t, clusterInstance, 300+i, supportsBackup) + // Add the tablet to the list of tablets in this shard + clusterInstance.Keyspaces[0].Shards[0].Vttablets = append(clusterInstance.Keyspaces[0].Shards[0].Vttablets, newReplica) + // Check that we can replicate to it and semi-sync is setup correctly on it + utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2], newReplica}) + utils.CheckSemiSyncSetupCorrectly(t, newReplica, "ON") + } +} + +// TestFullStatus tests that the RPC FullStatus works as intended. +func TestFullStatus(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := utils.SetupReparentCluster(t, "semi_sync") + defer utils.TeardownCluster(clusterInstance) + tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets + utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) + + // Check that full status gives the correct result for a primary tablet + primaryStatusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", tablets[0].Alias) + require.NoError(t, err) + primaryStatus := &replicationdatapb.FullStatus{} + err = protojson.Unmarshal([]byte(primaryStatusString), primaryStatus) + require.NoError(t, err) + assert.NotEmpty(t, primaryStatus.ServerUuid) + assert.NotEmpty(t, primaryStatus.ServerId) + // For a primary tablet there is no replication status + assert.Nil(t, primaryStatus.ReplicationStatus) + assert.Contains(t, primaryStatus.PrimaryStatus.String(), "vt-0000000101-bin") + assert.Equal(t, primaryStatus.GtidPurged, "MySQL56/") + assert.False(t, primaryStatus.ReadOnly) + assert.True(t, primaryStatus.SemiSyncPrimaryEnabled) + assert.True(t, primaryStatus.SemiSyncReplicaEnabled) + assert.True(t, primaryStatus.SemiSyncPrimaryStatus) + assert.False(t, primaryStatus.SemiSyncReplicaStatus) + assert.EqualValues(t, 3, primaryStatus.SemiSyncPrimaryClients) + assert.EqualValues(t, 1000000000000000000, primaryStatus.SemiSyncPrimaryTimeout) + assert.EqualValues(t, 1, primaryStatus.SemiSyncWaitForReplicaCount) + assert.Equal(t, "ROW", primaryStatus.BinlogFormat) + assert.Equal(t, "FULL", primaryStatus.BinlogRowImage) + assert.Equal(t, "ON", primaryStatus.GtidMode) + assert.True(t, primaryStatus.LogReplicaUpdates) + assert.True(t, primaryStatus.LogBinEnabled) + assert.Regexp(t, `[58]\.[07].*`, primaryStatus.Version) + assert.NotEmpty(t, primaryStatus.VersionComment) + + // Check that full status gives the correct result for a replica tablet + replicaStatusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", tablets[1].Alias) + require.NoError(t, err) + replicaStatus := &replicationdatapb.FullStatus{} + err = protojson.Unmarshal([]byte(replicaStatusString), replicaStatus) + require.NoError(t, err) + assert.NotEmpty(t, replicaStatus.ServerUuid) + assert.NotEmpty(t, replicaStatus.ServerId) + assert.Contains(t, replicaStatus.ReplicationStatus.Position, "MySQL56/"+replicaStatus.ReplicationStatus.SourceUuid) + assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.IoState) + assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.SqlState) + assert.Equal(t, fileNameFromPosition(replicaStatus.ReplicationStatus.FilePosition), fileNameFromPosition(primaryStatus.PrimaryStatus.FilePosition)) + assert.LessOrEqual(t, rowNumberFromPosition(replicaStatus.ReplicationStatus.FilePosition), rowNumberFromPosition(primaryStatus.PrimaryStatus.FilePosition)) + assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogSourceBinlogEquivalentPosition, primaryStatus.PrimaryStatus.FilePosition) + assert.Contains(t, replicaStatus.ReplicationStatus.RelayLogFilePosition, "vt-0000000102-relay") + assert.Equal(t, replicaStatus.ReplicationStatus.Position, primaryStatus.PrimaryStatus.Position) + assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogPosition, primaryStatus.PrimaryStatus.Position) + assert.Empty(t, replicaStatus.ReplicationStatus.LastIoError) + assert.Empty(t, replicaStatus.ReplicationStatus.LastSqlError) + assert.Equal(t, replicaStatus.ReplicationStatus.SourceUuid, primaryStatus.ServerUuid) + assert.LessOrEqual(t, int(replicaStatus.ReplicationStatus.ReplicationLagSeconds), 1) + assert.False(t, replicaStatus.ReplicationStatus.ReplicationLagUnknown) + assert.EqualValues(t, 0, replicaStatus.ReplicationStatus.SqlDelay) + assert.False(t, replicaStatus.ReplicationStatus.SslAllowed) + assert.False(t, replicaStatus.ReplicationStatus.HasReplicationFilters) + assert.False(t, replicaStatus.ReplicationStatus.UsingGtid) + assert.True(t, replicaStatus.ReplicationStatus.AutoPosition) + assert.Equal(t, replicaStatus.ReplicationStatus.SourceHost, utils.Hostname) + assert.EqualValues(t, replicaStatus.ReplicationStatus.SourcePort, tablets[0].MySQLPort) + assert.Equal(t, replicaStatus.ReplicationStatus.SourceUser, "vt_repl") + assert.Contains(t, replicaStatus.PrimaryStatus.String(), "vt-0000000102-bin") + assert.Equal(t, replicaStatus.GtidPurged, "MySQL56/") + assert.True(t, replicaStatus.ReadOnly) + assert.False(t, replicaStatus.SemiSyncPrimaryEnabled) + assert.True(t, replicaStatus.SemiSyncReplicaEnabled) + assert.False(t, replicaStatus.SemiSyncPrimaryStatus) + assert.True(t, replicaStatus.SemiSyncReplicaStatus) + assert.EqualValues(t, 0, replicaStatus.SemiSyncPrimaryClients) + assert.EqualValues(t, 1000000000000000000, replicaStatus.SemiSyncPrimaryTimeout) + assert.EqualValues(t, 1, replicaStatus.SemiSyncWaitForReplicaCount) + assert.Equal(t, "ROW", replicaStatus.BinlogFormat) + assert.Equal(t, "FULL", replicaStatus.BinlogRowImage) + assert.Equal(t, "ON", replicaStatus.GtidMode) + assert.True(t, replicaStatus.LogReplicaUpdates) + assert.True(t, replicaStatus.LogBinEnabled) + assert.Regexp(t, `[58]\.[07].*`, replicaStatus.Version) + assert.NotEmpty(t, replicaStatus.VersionComment) +} + +// fileNameFromPosition gets the file name from the position +func fileNameFromPosition(pos string) string { + return pos[0 : len(pos)-4] +} + +// rowNumberFromPosition gets the row number from the position +func rowNumberFromPosition(pos string) int { + rowNumStr := pos[len(pos)-4:] + rowNum, _ := strconv.Atoi(rowNumStr) + return rowNum +} diff --git a/go/test/endtoend/reparent/utils/utils.go b/go/test/endtoend/reparent/utils/utils.go index a3359d172d5..744a3c44c2b 100644 --- a/go/test/endtoend/reparent/utils/utils.go +++ b/go/test/endtoend/reparent/utils/utils.go @@ -129,7 +129,7 @@ func setupCluster(ctx context.Context, t *testing.T, shardName string, cells []s // the replication manager to silently fix the replication in case ERS or PRS mess up. All the // tests in this test suite should work irrespective of this flag. Each run of ERS, PRS should be // setting up the replication correctly. - "--disable_active_reparents") + "--disable-replication-manager") // Initialize Cluster err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard}) diff --git a/go/test/endtoend/tabletmanager/replication_manager/tablet_test.go b/go/test/endtoend/tabletmanager/replication_manager/tablet_test.go index 3284ab65d49..d9a24b3b444 100644 --- a/go/test/endtoend/tabletmanager/replication_manager/tablet_test.go +++ b/go/test/endtoend/tabletmanager/replication_manager/tablet_test.go @@ -159,8 +159,7 @@ func waitForSourcePort(ctx context.Context, t *testing.T, tablet cluster.Vttable for time.Now().Before(timeout) { // Check that initially replication is setup correctly on the replica tablet replicaStatus, err := tmcGetReplicationStatus(ctx, tablet.GrpcPort) - require.NoError(t, err) - if replicaStatus.SourcePort == expectedPort { + if err == nil && replicaStatus.SourcePort == expectedPort { return nil } time.Sleep(300 * time.Millisecond) diff --git a/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go b/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go index 4dcbe0a92ed..8e91028926c 100644 --- a/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go +++ b/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go @@ -33,7 +33,7 @@ import ( // Also tests that VTOrc can handle multiple failures, if the durability policies allow it func TestDownPrimary(t *testing.T) { defer cluster.PanicHandler(t) - utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{ + utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s"}, cluster.VTOrcConfiguration{ PreventCrossDataCenterPrimaryFailover: true, }, 1, "semi_sync") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] @@ -64,14 +64,18 @@ func TestDownPrimary(t *testing.T) { // check that the replication is setup correctly before we failover utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica, crossCellReplica}, 10*time.Second) - // Make the rdonly tablet unavailable - err := rdonly.MysqlctlProcess.Stop() + // Make the rdonly vttablet unavailable + err := rdonly.VttabletProcess.TearDown() + require.NoError(t, err) + err = rdonly.MysqlctlProcess.Stop() + require.NoError(t, err) + // Make the current primary vttablet unavailable. + err = curPrimary.VttabletProcess.TearDown() require.NoError(t, err) - // Make the current primary database unavailable. err = curPrimary.MysqlctlProcess.Stop() require.NoError(t, err) defer func() { - // we remove the tablet from our global list since its mysqlctl process has stopped and cannot be reused for other tests + // we remove the tablet from our global list utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary) utils.PermanentlyRemoveVttablet(clusterInfo, rdonly) }() diff --git a/go/vt/servenv/servenv.go b/go/vt/servenv/servenv.go index a55c3241ca0..4e5d582a2ae 100644 --- a/go/vt/servenv/servenv.go +++ b/go/vt/servenv/servenv.go @@ -79,7 +79,7 @@ var ( var ( lameduckPeriod = 50 * time.Millisecond onTermTimeout = 10 * time.Second - onCloseTimeout = time.Nanosecond + onCloseTimeout = 10 * time.Second catchSigpipe bool maxStackSize = 64 * 1024 * 1024 ) diff --git a/go/vt/servenv/servenv_test.go b/go/vt/servenv/servenv_test.go index 3d835fcea1a..b7bd874989a 100644 --- a/go/vt/servenv/servenv_test.go +++ b/go/vt/servenv/servenv_test.go @@ -65,9 +65,7 @@ func TestFireOnCloseHooksTimeout(t *testing.T) { time.Sleep(1 * time.Second) }) - // we deliberatly test the flag to make sure it's not accidently set to a - // high value. - if finished, want := fireOnCloseHooks(onCloseTimeout), false; finished != want { + if finished, want := fireOnCloseHooks(1*time.Nanosecond), false; finished != want { t.Errorf("finished = %v, want %v", finished, want) } } diff --git a/go/vt/topo/locks.go b/go/vt/topo/locks.go index bebe195e3c5..036ce983078 100644 --- a/go/vt/topo/locks.go +++ b/go/vt/topo/locks.go @@ -27,6 +27,7 @@ import ( "github.com/spf13/pflag" + _flag "vitess.io/vitess/go/internal/flag" "vitess.io/vitess/go/trace" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/proto/vtrpc" @@ -38,15 +39,14 @@ import ( // keyspaces and shards. var ( - // DefaultLockTimeout is a good value to use as a default for - // locking a shard / keyspace. - // Now used only for unlock operations - defaultLockTimeout = 30 * time.Second + // LockTimeout is the maximum duration for which a + // shard / keyspace lock can be acquired for. + LockTimeout = 45 * time.Second // RemoteOperationTimeout is used for operations where we have to // call out to another process. // Used for RPC calls (including topo server calls) - RemoteOperationTimeout = 30 * time.Second + RemoteOperationTimeout = 15 * time.Second ) // Lock describes a long-running lock on a keyspace or a shard. @@ -70,6 +70,7 @@ func init() { func registerTopoLockFlags(fs *pflag.FlagSet) { fs.DurationVar(&RemoteOperationTimeout, "remote_operation_timeout", RemoteOperationTimeout, "time to wait for a remote operation") + fs.DurationVar(&LockTimeout, "lock-timeout", LockTimeout, "Maximum time for which a shard/keyspace lock can be acquired for") } // newLock creates a new Lock. @@ -244,7 +245,7 @@ func CheckKeyspaceLockedAndRenew(ctx context.Context, keyspace string) error { func (l *Lock) lockKeyspace(ctx context.Context, ts *Server, keyspace string) (LockDescriptor, error) { log.Infof("Locking keyspace %v for action %v", keyspace, l.Action) - ctx, cancel := context.WithTimeout(ctx, RemoteOperationTimeout) + ctx, cancel := context.WithTimeout(ctx, getLockTimeout()) defer cancel() span, ctx := trace.NewSpan(ctx, "TopoServer.LockKeyspaceForAction") @@ -265,10 +266,8 @@ func (l *Lock) unlockKeyspace(ctx context.Context, ts *Server, keyspace string, // Detach from the parent timeout, but copy the trace span. // We need to still release the lock even if the parent // context timed out. - // Note that we are not using the user provided RemoteOperationTimeout - // here because it is possible that that timeout is too short. ctx = trace.CopySpan(context.TODO(), ctx) - ctx, cancel := context.WithTimeout(ctx, defaultLockTimeout) + ctx, cancel := context.WithTimeout(ctx, RemoteOperationTimeout) defer cancel() span, ctx := trace.NewSpan(ctx, "TopoServer.UnlockKeyspaceForAction") @@ -432,7 +431,7 @@ func (l *Lock) tryLockShard(ctx context.Context, ts *Server, keyspace, shard str func (l *Lock) internalLockShard(ctx context.Context, ts *Server, keyspace, shard string, isBlocking bool) (LockDescriptor, error) { log.Infof("Locking shard %v/%v for action %v", keyspace, shard, l.Action) - ctx, cancel := context.WithTimeout(ctx, RemoteOperationTimeout) + ctx, cancel := context.WithTimeout(ctx, getLockTimeout()) defer cancel() span, ctx := trace.NewSpan(ctx, "TopoServer.LockShardForAction") @@ -456,10 +455,8 @@ func (l *Lock) internalLockShard(ctx context.Context, ts *Server, keyspace, shar func (l *Lock) unlockShard(ctx context.Context, ts *Server, keyspace, shard string, lockDescriptor LockDescriptor, actionError error) error { // Detach from the parent timeout, but copy the trace span. // We need to still release the lock even if the parent context timed out. - // Note that we are not using the user provided RemoteOperationTimeout - // here because it is possible that that timeout is too short. ctx = trace.CopySpan(context.TODO(), ctx) - ctx, cancel := context.WithTimeout(ctx, defaultLockTimeout) + ctx, cancel := context.WithTimeout(ctx, RemoteOperationTimeout) defer cancel() span, ctx := trace.NewSpan(ctx, "TopoServer.UnlockShardForAction") @@ -478,3 +475,15 @@ func (l *Lock) unlockShard(ctx context.Context, ts *Server, keyspace, shard stri } return lockDescriptor.Unlock(ctx) } + +// getLockTimeout is shim code used for backward compatibility with v15 +// This code can be removed in v17+ and LockTimeout can be used directly +func getLockTimeout() time.Duration { + if _flag.IsFlagProvided("lock-timeout") { + return LockTimeout + } + if _flag.IsFlagProvided("remote_operation_timeout") { + return RemoteOperationTimeout + } + return LockTimeout +} diff --git a/go/vt/topo/locks_test.go b/go/vt/topo/locks_test.go new file mode 100644 index 00000000000..da4f179f83c --- /dev/null +++ b/go/vt/topo/locks_test.go @@ -0,0 +1,85 @@ +/* +Copyright 2022 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package topo + +import ( + "os" + "testing" + "time" + + "github.com/spf13/pflag" + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/internal/flag" +) + +// TestGetLockTimeout tests the behaviour of +// getLockTimeout function in different situations where +// the two flags `remote_operation_timeout` and `lock-timeout` are +// provided or not. +func TestGetLockTimeout(t *testing.T) { + tests := []struct { + description string + lockTimeoutValue string + remoteOperationTimeoutValue string + expectedLockTimeout time.Duration + }{ + { + description: "no flags specified", + lockTimeoutValue: "", + remoteOperationTimeoutValue: "", + expectedLockTimeout: 45 * time.Second, + }, { + description: "lock-timeout flag specified", + lockTimeoutValue: "33s", + remoteOperationTimeoutValue: "", + expectedLockTimeout: 33 * time.Second, + }, { + description: "remote operation timeout flag specified", + lockTimeoutValue: "", + remoteOperationTimeoutValue: "33s", + expectedLockTimeout: 33 * time.Second, + }, { + description: "both flags specified", + lockTimeoutValue: "33s", + remoteOperationTimeoutValue: "22s", + expectedLockTimeout: 33 * time.Second, + }, + } + + for _, tt := range tests { + t.Run(tt.description, func(t *testing.T) { + var args []string + if tt.lockTimeoutValue != "" { + args = append(args, "--lock-timeout", tt.lockTimeoutValue) + } + if tt.remoteOperationTimeoutValue != "" { + args = append(args, "--remote_operation_timeout", tt.remoteOperationTimeoutValue) + } + os.Args = os.Args[0:1] + os.Args = append(os.Args, args...) + + fs := pflag.NewFlagSet("test", pflag.ExitOnError) + registerTopoLockFlags(fs) + flag.Parse(fs) + + val := getLockTimeout() + require.Equal(t, tt.expectedLockTimeout, val) + }) + } + +} diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index ba846ebc147..9e4ac550a8f 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -192,7 +192,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve } // Stop replication on all the tablets and build their status map - stoppedReplicationSnapshot, err = stopReplicationAndBuildStatusMaps(ctx, erp.tmc, ev, tabletMap, opts.WaitReplicasTimeout, opts.IgnoreReplicas, opts.NewPrimaryAlias, opts.durability, erp.logger) + stoppedReplicationSnapshot, err = stopReplicationAndBuildStatusMaps(ctx, erp.tmc, ev, tabletMap, topo.RemoteOperationTimeout, opts.IgnoreReplicas, opts.NewPrimaryAlias, opts.durability, erp.logger) if err != nil { return vterrors.Wrapf(err, "failed to stop replication and build status maps: %v", err) } diff --git a/go/vt/vtctl/reparentutil/replication.go b/go/vt/vtctl/reparentutil/replication.go index b1510ffaf09..512b3a60221 100644 --- a/go/vt/vtctl/reparentutil/replication.go +++ b/go/vt/vtctl/reparentutil/replication.go @@ -214,7 +214,7 @@ func stopReplicationAndBuildStatusMaps( tmc tmclient.TabletManagerClient, ev *events.Reparent, tabletMap map[string]*topo.TabletInfo, - waitReplicasTimeout time.Duration, + stopReplicationTimeout time.Duration, ignoredTablets sets.String, tabletToWaitFor *topodatapb.TabletAlias, durability Durabler, @@ -233,7 +233,7 @@ func stopReplicationAndBuildStatusMaps( } ) - groupCtx, groupCancel := context.WithTimeout(ctx, waitReplicasTimeout) + groupCtx, groupCancel := context.WithTimeout(ctx, stopReplicationTimeout) defer groupCancel() fillStatus := func(alias string, tabletInfo *topo.TabletInfo, mustWaitForTablet bool) { diff --git a/go/vt/vtctl/reparentutil/replication_test.go b/go/vt/vtctl/reparentutil/replication_test.go index 42b01cac770..01f043ac827 100644 --- a/go/vt/vtctl/reparentutil/replication_test.go +++ b/go/vt/vtctl/reparentutil/replication_test.go @@ -18,27 +18,33 @@ package reparentutil import ( "context" + "os" "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "vitess.io/vitess/go/vt/vterrors" - - "github.com/stretchr/testify/assert" "k8s.io/apimachinery/pkg/util/sets" + _flag "vitess.io/vitess/go/internal/flag" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/topotools/events" + "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/vt/vttablet/tmclient" replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) +func TestMain(m *testing.M) { + _flag.ParseFlagsForTest() + os.Exit(m.Run()) +} + func TestFindValidEmergencyReparentCandidates(t *testing.T) { t.Parallel() @@ -278,7 +284,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) { durability string tmc *stopReplicationAndBuildStatusMapsTestTMClient tabletMap map[string]*topo.TabletInfo - waitReplicasTimeout time.Duration + stopReplicasTimeout time.Duration ignoredTablets sets.String tabletToWaitFor *topodatapb.TabletAlias expectedStatusMap map[string]*replicationdatapb.StopReplicationStatus @@ -796,7 +802,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) { shouldErr: true, // we get multiple errors, so we fail }, { - name: "waitReplicasTimeout exceeded", + name: "stopReplicasTimeout exceeded", durability: "none", tmc: &stopReplicationAndBuildStatusMapsTestTMClient{ stopReplicationAndGetStatusDelays: map[string]time.Duration{ @@ -840,7 +846,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) { }, }, }, - waitReplicasTimeout: time.Millisecond * 5, + stopReplicasTimeout: time.Millisecond * 5, ignoredTablets: sets.NewString(), expectedStatusMap: map[string]*replicationdatapb.StopReplicationStatus{ "zone1-0000000101": { @@ -1098,7 +1104,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) { Uid: 102, }, }}, - waitReplicasTimeout: time.Minute, + stopReplicasTimeout: time.Minute, expectedPrimaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{}, shouldErr: false, }, @@ -1110,7 +1116,7 @@ func Test_stopReplicationAndBuildStatusMaps(t *testing.T) { t.Run(tt.name, func(t *testing.T) { durability, err := GetDurabilityPolicy(tt.durability) require.NoError(t, err) - res, err := stopReplicationAndBuildStatusMaps(ctx, tt.tmc, &events.Reparent{}, tt.tabletMap, tt.waitReplicasTimeout, tt.ignoredTablets, tt.tabletToWaitFor, durability, logger) + res, err := stopReplicationAndBuildStatusMaps(ctx, tt.tmc, &events.Reparent{}, tt.tabletMap, tt.stopReplicasTimeout, tt.ignoredTablets, tt.tabletToWaitFor, durability, logger) if tt.shouldErr { assert.Error(t, err) return diff --git a/go/vt/vtorc/config/config.go b/go/vt/vtorc/config/config.go index 0c5cadd2431..fd54e9ed582 100644 --- a/go/vt/vtorc/config/config.go +++ b/go/vt/vtorc/config/config.go @@ -64,7 +64,6 @@ var ( auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days recoveryPeriodBlockDuration = 30 * time.Second preventCrossCellFailover = false - lockShardTimeout = 30 * time.Second waitReplicasTimeout = 30 * time.Second topoInformationRefreshDuration = 15 * time.Second recoveryPollDuration = 1 * time.Second @@ -82,7 +81,8 @@ func RegisterFlags(fs *pflag.FlagSet) { fs.DurationVar(&auditPurgeDuration, "audit-purge-duration", auditPurgeDuration, "Duration for which audit logs are held before being purged. Should be in multiples of days") fs.DurationVar(&recoveryPeriodBlockDuration, "recovery-period-block-duration", recoveryPeriodBlockDuration, "Duration for which a new recovery is blocked on an instance after running a recovery") fs.BoolVar(&preventCrossCellFailover, "prevent-cross-cell-failover", preventCrossCellFailover, "Prevent VTOrc from promoting a primary in a different cell than the current primary in case of a failover") - fs.DurationVar(&lockShardTimeout, "lock-shard-timeout", lockShardTimeout, "Duration for which a shard lock is held when running a recovery") + fs.Duration("lock-shard-timeout", 30*time.Second, "Duration for which a shard lock is held when running a recovery") + _ = fs.MarkDeprecated("lock-shard-timeout", "Please use lock-timeout instead.") fs.DurationVar(&waitReplicasTimeout, "wait-replicas-timeout", waitReplicasTimeout, "Duration for which to wait for replica's to respond when issuing RPCs") fs.DurationVar(&topoInformationRefreshDuration, "topo-information-refresh-duration", topoInformationRefreshDuration, "Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topology server") fs.DurationVar(&recoveryPollDuration, "recovery-poll-duration", recoveryPollDuration, "Timer duration on which VTOrc polls its database to run a recovery") @@ -103,8 +103,7 @@ type Configuration struct { AuditPurgeDays uint // Days after which audit entries are purged from the database RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping PreventCrossDataCenterPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, vtorc will do all it can to only fail over within same DC, or else not fail over at all. - LockShardTimeoutSeconds int // Timeout on context used to lock shard. Should be a small value because we should fail-fast - WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockShardTimeoutSeconds since that is the total time we use for an ERS. + WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS. TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server. RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs } @@ -133,7 +132,6 @@ func UpdateConfigValuesFromFlags() { Config.AuditPurgeDays = uint(auditPurgeDuration / (time.Hour * 24)) Config.RecoveryPeriodBlockSeconds = int(recoveryPeriodBlockDuration / time.Second) Config.PreventCrossDataCenterPrimaryFailover = preventCrossCellFailover - Config.LockShardTimeoutSeconds = int(lockShardTimeout / time.Second) Config.WaitReplicasTimeoutSeconds = int(waitReplicasTimeout / time.Second) Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second) Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second) @@ -157,7 +155,6 @@ func newConfiguration() *Configuration { AuditPurgeDays: 7, RecoveryPeriodBlockSeconds: 30, PreventCrossDataCenterPrimaryFailover: false, - LockShardTimeoutSeconds: 30, WaitReplicasTimeoutSeconds: 30, TopoInformationRefreshSeconds: 15, RecoveryPollSeconds: 1, diff --git a/go/vt/vtorc/config/config_test.go b/go/vt/vtorc/config/config_test.go index 90e78d56623..2009b476f1d 100644 --- a/go/vt/vtorc/config/config_test.go +++ b/go/vt/vtorc/config/config_test.go @@ -187,21 +187,6 @@ func TestUpdateConfigValuesFromFlags(t *testing.T) { require.Equal(t, testConfig, Config) }) - t.Run("override lockShardTimeout", func(t *testing.T) { - oldLockShardTimeout := lockShardTimeout - lockShardTimeout = 3 * time.Hour - // Restore the changes we make - defer func() { - Config = newConfiguration() - lockShardTimeout = oldLockShardTimeout - }() - - testConfig := newConfiguration() - testConfig.LockShardTimeoutSeconds = 10800 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - t.Run("override waitReplicasTimeout", func(t *testing.T) { oldWaitReplicasTimeout := waitReplicasTimeout waitReplicasTimeout = 3*time.Minute + 4*time.Second diff --git a/go/vt/vtorc/logic/tablet_discovery.go b/go/vt/vtorc/logic/tablet_discovery.go index c133ca95a4e..6716d5a727b 100644 --- a/go/vt/vtorc/logic/tablet_discovery.go +++ b/go/vt/vtorc/logic/tablet_discovery.go @@ -202,20 +202,21 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an // Discover new tablets. // TODO(sougou): enhance this to work with multi-schema, // where each instanceKey can have multiple tablets. - latestInstances := make(map[inst.InstanceKey]bool) + latestInstances := make(map[string]bool) + var wg sync.WaitGroup for _, tabletInfo := range tablets { tablet := tabletInfo.Tablet - if tablet.MysqlHostname == "" { + if tablet.Type != topodatapb.TabletType_PRIMARY && !topo.IsReplicaType(tablet.Type) { continue } - if tablet.Type != topodatapb.TabletType_PRIMARY && !topo.IsReplicaType(tablet.Type) { + latestInstances[topoproto.TabletAliasString(tablet.Alias)] = true + if tablet.MysqlHostname == "" { continue } instanceKey := inst.InstanceKey{ Hostname: tablet.MysqlHostname, Port: int(tablet.MysqlPort), } - latestInstances[instanceKey] = true old, err := inst.ReadTablet(instanceKey) if err != nil && err != inst.ErrTabletAliasNil { log.Error(err) @@ -228,9 +229,14 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an log.Error(err) continue } - loader(&instanceKey) + wg.Add(1) + go func() { + defer wg.Done() + loader(&instanceKey) + }() log.Infof("Discovered: %v", tablet) } + wg.Wait() // Forget tablets that were removed. toForget := make(map[inst.InstanceKey]*topodatapb.Tablet) @@ -239,12 +245,12 @@ func refreshTablets(tablets map[string]*topo.TabletInfo, query string, args []an Hostname: row.GetString("hostname"), Port: row.GetInt("port"), } - if !latestInstances[curKey] { - tablet := &topodatapb.Tablet{} - if err := prototext.Unmarshal([]byte(row.GetString("info")), tablet); err != nil { - log.Error(err) - return nil - } + tablet := &topodatapb.Tablet{} + if err := prototext.Unmarshal([]byte(row.GetString("info")), tablet); err != nil { + log.Error(err) + return nil + } + if !latestInstances[topoproto.TabletAliasString(tablet.Alias)] { toForget[curKey] = tablet } return nil @@ -286,18 +292,15 @@ func LockShard(ctx context.Context, instanceKey inst.InstanceKey) (context.Conte return nil, nil, err } - ctx, cancel := context.WithTimeout(ctx, time.Duration(config.Config.LockShardTimeoutSeconds)*time.Second) atomic.AddInt32(&shardsLockCounter, 1) ctx, unlock, err := ts.TryLockShard(ctx, tablet.Keyspace, tablet.Shard, "Orc Recovery") if err != nil { - cancel() atomic.AddInt32(&shardsLockCounter, -1) return nil, nil, err } return ctx, func(e *error) { defer atomic.AddInt32(&shardsLockCounter, -1) unlock(e) - cancel() }, nil } diff --git a/go/vt/vtorc/logic/tablet_discovery_test.go b/go/vt/vtorc/logic/tablet_discovery_test.go index 64262eff250..410f1a70e0a 100644 --- a/go/vt/vtorc/logic/tablet_discovery_test.go +++ b/go/vt/vtorc/logic/tablet_discovery_test.go @@ -18,6 +18,7 @@ package logic import ( "context" + "sync/atomic" "testing" "github.com/google/go-cmp/cmp" @@ -137,10 +138,33 @@ func TestRefreshTabletsInKeyspaceShard(t *testing.T) { verifyRefreshTabletsInKeyspaceShard(t, true, 3, tablets) }) + t.Run("tablet shutdown removes mysql hostname and port. We shouldn't forget the tablet", func(t *testing.T) { + defer func() { + _, err = ts.UpdateTabletFields(context.Background(), tab100.Alias, func(tablet *topodatapb.Tablet) error { + tablet.MysqlHostname = hostname + tablet.MysqlPort = 100 + return nil + }) + }() + // Let's assume tab100 shutdown. This would clear its tablet hostname and port + _, err = ts.UpdateTabletFields(context.Background(), tab100.Alias, func(tablet *topodatapb.Tablet) error { + tablet.MysqlHostname = "" + tablet.MysqlPort = 0 + return nil + }) + require.NoError(t, err) + // We expect no tablets to be refreshed. Also, tab100 shouldn't be forgotten + verifyRefreshTabletsInKeyspaceShard(t, false, 0, tablets) + }) + t.Run("change a tablet and call refreshTabletsInKeyspaceShard again", func(t *testing.T) { startTimeInitially := tab100.PrimaryTermStartTime.Seconds defer func() { tab100.PrimaryTermStartTime.Seconds = startTimeInitially + _, err = ts.UpdateTabletFields(context.Background(), tab100.Alias, func(tablet *topodatapb.Tablet) error { + tablet.PrimaryTermStartTime.Seconds = startTimeInitially + return nil + }) }() tab100.PrimaryTermStartTime.Seconds = 1000 _, err = ts.UpdateTabletFields(context.Background(), tab100.Alias, func(tablet *topodatapb.Tablet) error { @@ -224,17 +248,18 @@ func TestShardPrimary(t *testing.T) { // verifyRefreshTabletsInKeyspaceShard calls refreshTabletsInKeyspaceShard with the forceRefresh parameter provided and verifies that // the number of instances refreshed matches the parameter and all the tablets match the ones provided func verifyRefreshTabletsInKeyspaceShard(t *testing.T, forceRefresh bool, instanceRefreshRequired int, tablets []*topodatapb.Tablet) { - instancesRefreshed := 0 + var instancesRefreshed atomic.Int32 + instancesRefreshed.Store(0) // call refreshTabletsInKeyspaceShard while counting all the instances that are refreshed refreshTabletsInKeyspaceShard(context.Background(), keyspace, shard, func(instanceKey *inst.InstanceKey) { - instancesRefreshed++ + instancesRefreshed.Add(1) }, forceRefresh) // Verify that all the tablets are present in the database for _, tablet := range tablets { verifyTabletInfo(t, tablet, "") } // Verify that refresh as many tablets as expected - assert.EqualValues(t, instanceRefreshRequired, instancesRefreshed) + assert.EqualValues(t, instanceRefreshRequired, instancesRefreshed.Load()) } // verifyTabletInfo verifies that the tablet information read from the vtorc database diff --git a/go/vt/vtorc/logic/topology_recovery.go b/go/vt/vtorc/logic/topology_recovery.go index af12587177c..808f6d3e21b 100644 --- a/go/vt/vtorc/logic/topology_recovery.go +++ b/go/vt/vtorc/logic/topology_recovery.go @@ -289,6 +289,8 @@ func recoverDeadPrimary(ctx context.Context, analysisEntry inst.ReplicationAnaly log.Warningf("ERS - %s", value) case logutilpb.Level_ERROR: log.Errorf("ERS - %s", value) + default: + log.Infof("ERS - %s", value) } _ = AuditTopologyRecovery(topologyRecovery, value) })).ReparentShard(ctx, @@ -301,6 +303,9 @@ func recoverDeadPrimary(ctx context.Context, analysisEntry inst.ReplicationAnaly PreventCrossCellPromotion: config.Config.PreventCrossDataCenterPrimaryFailover, }, ) + if err != nil { + log.Errorf("Error running ERS - %v", err) + } if ev != nil && ev.NewPrimary != nil { promotedReplica, _, _ = inst.ReadInstance(&inst.InstanceKey{ diff --git a/go/vt/vttablet/tabletmanager/tm_init.go b/go/vt/vttablet/tabletmanager/tm_init.go index 219bacbcb04..4e9cdce7e2f 100644 --- a/go/vt/vttablet/tabletmanager/tm_init.go +++ b/go/vt/vttablet/tabletmanager/tm_init.go @@ -901,6 +901,10 @@ func (tm *TabletManager) initializeReplication(ctx context.Context, tabletType t } // Set primary and start replication. + if currentPrimary.Tablet.MysqlHostname == "" { + log.Warningf("primary tablet in the shard record does not have mysql hostname specified, possibly because that tablet has been shut down.") + return nil, nil + } if err := tm.MysqlDaemon.SetReplicationSource(ctx, currentPrimary.Tablet.MysqlHostname, int(currentPrimary.Tablet.MysqlPort), false /* stopReplicationBefore */, true /* startReplicationAfter */); err != nil { return nil, vterrors.Wrap(err, "MysqlDaemon.SetReplicationSource failed") }