diff --git a/go/cmd/vtbackup/vtbackup.go b/go/cmd/vtbackup/vtbackup.go index 5cb4d5be52c..f5117376a23 100644 --- a/go/cmd/vtbackup/vtbackup.go +++ b/go/cmd/vtbackup/vtbackup.go @@ -306,8 +306,6 @@ func takeBackup(ctx context.Context, topoServer *topo.Server, backupStorage back return fmt.Errorf("no backup found; not starting up empty since -initial_backup flag was not enabled") } restorePos = mysql.Position{} - case mysqlctl.ErrExistingDB: - return fmt.Errorf("can't run vtbackup because data directory is not empty") default: return fmt.Errorf("can't restore from backup: %v", err) } diff --git a/go/test/endtoend/backup/vtbackup/backup_only_test.go b/go/test/endtoend/backup/vtbackup/backup_only_test.go index 0557c4b410f..0a128f47a50 100644 --- a/go/test/endtoend/backup/vtbackup/backup_only_test.go +++ b/go/test/endtoend/backup/vtbackup/backup_only_test.go @@ -147,6 +147,8 @@ func firstBackupTest(t *testing.T, tabletType string) { // check that the restored replica has the right local_metadata result, err := replica2.VttabletProcess.QueryTabletWithDB("select * from local_metadata", "_vt") require.Nil(t, err) + require.NotNil(t, result) + require.NotEmpty(t, result.Rows) assert.Equal(t, replica2.Alias, result.Rows[0][1].ToString(), "Alias") assert.Equal(t, "ks.0", result.Rows[1][1].ToString(), "ClusterAlias") assert.Equal(t, cell, result.Rows[2][1].ToString(), "DataCenter") diff --git a/go/vt/mysqlctl/backup.go b/go/vt/mysqlctl/backup.go index 0bd7f8b1578..aade6fc0ab2 100644 --- a/go/vt/mysqlctl/backup.go +++ b/go/vt/mysqlctl/backup.go @@ -64,9 +64,6 @@ var ( // but none of them are complete. ErrNoCompleteBackup = errors.New("backup(s) found but none are complete") - // ErrExistingDB is returned when there's already an active DB. - ErrExistingDB = errors.New("skipping restore due to existing database") - // backupStorageHook contains the hook name to use to process // backup files. If not set, we will not process the files. It is // only used at backup time. Then it is put in the manifest, @@ -218,34 +215,25 @@ func removeExistingFiles(cnf *Mycnf) error { return nil } +// ShouldRestore checks whether a database with tables already exists +// and returns whether a restore action should be performed +func ShouldRestore(ctx context.Context, params RestoreParams) (bool, error) { + if params.DeleteBeforeRestore || RestoreWasInterrupted(params.Cnf) { + return true, nil + } + params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState) + // Wait for mysqld to be ready, in case it was launched in parallel with us. + // If this doesn't succeed, we should not attempt a restore + if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil { + return false, err + } + return checkNoDB(ctx, params.Mysqld, params.DbName) +} + // Restore is the main entry point for backup restore. If there is no // appropriate backup on the BackupStorage, Restore logs an error // and returns ErrNoBackup. Any other error is returned. func Restore(ctx context.Context, params RestoreParams) (*BackupManifest, error) { - - if !params.DeleteBeforeRestore { - params.Logger.Infof("Restore: Checking if a restore is in progress") - if !RestoreWasInterrupted(params.Cnf) { - params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState) - // Wait for mysqld to be ready, in case it was launched in parallel with us. - if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil { - return nil, err - } - - ok, err := checkNoDB(ctx, params.Mysqld, params.DbName) - if err != nil { - return nil, err - } - if !ok { - params.Logger.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.") - if err = PopulateMetadataTables(params.Mysqld, params.LocalMetadata, params.DbName); err == nil { - err = ErrExistingDB - } - return nil, err - } - } - } - // find the right backup handle: most recent one, with a MANIFEST params.Logger.Infof("Restore: looking for a suitable backup to restore") bs, err := backupstorage.GetBackupStorage() diff --git a/go/vt/vttablet/tabletmanager/restore.go b/go/vt/vttablet/tabletmanager/restore.go index 0cbd401227b..88df3a130e3 100644 --- a/go/vt/vttablet/tabletmanager/restore.go +++ b/go/vt/vttablet/tabletmanager/restore.go @@ -76,12 +76,9 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger, } func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error { + tablet := tm.Tablet() originalType := tablet.Type - if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_RESTORE); err != nil { - return err - } - // Try to restore. Depending on the reason for failure, we may be ok. // If we're not ok, return an error and the tm will log.Fatalf, // causing the process to be restarted and the restore retried. @@ -117,6 +114,24 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L StartTime: logutil.ProtoToTime(keyspaceInfo.SnapshotTime), } + // Check whether we're going to restore before changing to RESTORE type, + // so we keep our MasterTermStartTime (if any) if we aren't actually restoring. + ok, err := mysqlctl.ShouldRestore(ctx, params) + if err != nil { + return err + } + if !ok { + params.Logger.Infof("Attempting to restore, but mysqld already contains data. Assuming vttablet was just restarted.") + return mysqlctl.PopulateMetadataTables(params.Mysqld, params.LocalMetadata, params.DbName) + } + // We should not become master after restore, because that would incorrectly + // start a new master term, and it's likely our data dir will be out of date. + if originalType == topodatapb.TabletType_MASTER { + originalType = tm.baseTabletType + } + if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_RESTORE); err != nil { + return err + } // Loop until a backup exists, unless we were told to give up immediately. var backupManifest *mysqlctl.BackupManifest for { @@ -161,11 +176,6 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L } case mysqlctl.ErrNoBackup: // No-op, starting with empty database. - case mysqlctl.ErrExistingDB: - // No-op, assuming we've just restarted. Note the - // replication reporter may restart replication at the - // next health check if it thinks it should. We do not - // alter replication here. default: // If anything failed, we should reset the original tablet type if err := tm.tmState.ChangeTabletType(ctx, originalType); err != nil { diff --git a/go/vt/wrangler/testlib/backup_test.go b/go/vt/wrangler/testlib/backup_test.go index 4ec02b6943e..fe0849dddb1 100644 --- a/go/vt/wrangler/testlib/backup_test.go +++ b/go/vt/wrangler/testlib/backup_test.go @@ -23,6 +23,10 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + + "github.com/stretchr/testify/assert" + "vitess.io/vitess/go/vt/discovery" "golang.org/x/net/context" @@ -74,9 +78,7 @@ func TestBackupRestore(t *testing.T) { // Initialize our temp dirs root, err := ioutil.TempDir("", "backuptest") - if err != nil { - t.Fatalf("os.TempDir failed: %v", err) - } + require.NoError(t, err) defer os.RemoveAll(root) // Initialize BackupStorage @@ -90,19 +92,11 @@ func TestBackupRestore(t *testing.T) { sourceDataDir := path.Join(root, "source_data") sourceDataDbDir := path.Join(sourceDataDir, "vt_db") for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} { - if err := os.MkdirAll(s, os.ModePerm); err != nil { - t.Fatalf("failed to create directory %v: %v", s, err) - } - } - if err := ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm); err != nil { - t.Fatalf("failed to write file innodb_data_1: %v", err) - } - if err := ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm); err != nil { - t.Fatalf("failed to write file innodb_log_1: %v", err) - } - if err := ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm); err != nil { - t.Fatalf("failed to write file db.opt: %v", err) + require.NoError(t, os.MkdirAll(s, os.ModePerm)) } + require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm)) + require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm)) + require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm)) // create a master tablet, set its master position master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db) @@ -150,20 +144,12 @@ func TestBackupRestore(t *testing.T) { } // run the backup - if err := vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}); err != nil { - t.Fatalf("Backup failed: %v", err) - } + require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)})) // verify the full status - if err := sourceTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("sourceTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) - } - if !sourceTablet.FakeMysqlDaemon.Replicating { - t.Errorf("sourceTablet.FakeMysqlDaemon.Replicating not set") - } - if !sourceTablet.FakeMysqlDaemon.Running { - t.Errorf("sourceTablet.FakeMysqlDaemon.Running not set") - } + require.NoError(t, sourceTablet.FakeMysqlDaemon.CheckSuperQueryList()) + assert.True(t, sourceTablet.FakeMysqlDaemon.Replicating) + assert.True(t, sourceTablet.FakeMysqlDaemon.Running) // create a destination tablet, set it up so we can do restores destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db) @@ -204,21 +190,59 @@ func TestBackupRestore(t *testing.T) { RelayLogInfoPath: path.Join(root, "relay-log.info"), } - if err := destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */); err != nil { - t.Fatalf("RestoreData failed: %v", err) + require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */)) + // verify the full status + require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed") + assert.True(t, destTablet.FakeMysqlDaemon.Replicating) + assert.True(t, destTablet.FakeMysqlDaemon.Running) + + // Initialize mycnf, required for restore + masterInnodbDataDir := path.Join(root, "master_innodb_data") + masterInnodbLogDir := path.Join(root, "master_innodb_log") + masterDataDir := path.Join(root, "master_data") + master.TM.Cnf = &mysqlctl.Mycnf{ + DataDir: masterDataDir, + InnodbDataHomeDir: masterInnodbDataDir, + InnodbLogGroupHomeDir: masterInnodbLogDir, + BinLogPath: path.Join(root, "bin-logs/filename_prefix"), + RelayLogPath: path.Join(root, "relay-logs/filename_prefix"), + RelayLogIndexPath: path.Join(root, "relay-log.index"), + RelayLogInfoPath: path.Join(root, "relay-log.info"), } - // verify the full status - if err := destTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("destTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{ + "SHOW DATABASES": {}, } - if !destTablet.FakeMysqlDaemon.Replicating { - t.Errorf("destTablet.FakeMysqlDaemon.Replicating not set") + master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + "STOP SLAVE", + "RESET SLAVE ALL", + "FAKE SET SLAVE POSITION", + "FAKE SET MASTER", + "START SLAVE", } - if !destTablet.FakeMysqlDaemon.Running { - t.Errorf("destTablet.FakeMysqlDaemon.Running not set") + + master.FakeMysqlDaemon.SetReplicationPositionPos = master.FakeMysqlDaemon.CurrentMasterPosition + + // restore master from backup + require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed") + // tablet was created as MASTER, so it's baseTabletType is MASTER + assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type) + assert.False(t, master.FakeMysqlDaemon.Replicating) + assert.True(t, master.FakeMysqlDaemon.Running) + + // restore master when database already exists + // checkNoDb should return false + // so fake the necessary queries + master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{ + "SHOW DATABASES": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("vt_test_keyspace")}}}, + "SHOW TABLES FROM `vt_test_keyspace`": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("a")}}}, } + require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed") + // Tablet type should not change + assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type) + assert.False(t, master.FakeMysqlDaemon.Replicating) + assert.True(t, master.FakeMysqlDaemon.Running) } func TestRestoreUnreachableMaster(t *testing.T) { @@ -252,9 +276,7 @@ func TestRestoreUnreachableMaster(t *testing.T) { // Initialize our temp dirs root, err := ioutil.TempDir("", "backuptest") - if err != nil { - t.Fatalf("os.TempDir failed: %v", err) - } + require.NoError(t, err) defer os.RemoveAll(root) // Initialize BackupStorage @@ -268,19 +290,11 @@ func TestRestoreUnreachableMaster(t *testing.T) { sourceDataDir := path.Join(root, "source_data") sourceDataDbDir := path.Join(sourceDataDir, "vt_db") for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} { - if err := os.MkdirAll(s, os.ModePerm); err != nil { - t.Fatalf("failed to create directory %v: %v", s, err) - } - } - if err := ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm); err != nil { - t.Fatalf("failed to write file innodb_data_1: %v", err) - } - if err := ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm); err != nil { - t.Fatalf("failed to write file innodb_log_1: %v", err) - } - if err := ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm); err != nil { - t.Fatalf("failed to write file db.opt: %v", err) + require.NoError(t, os.MkdirAll(s, os.ModePerm)) } + require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm)) + require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm)) + require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm)) // create a master tablet, set its master position master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db) @@ -327,9 +341,7 @@ func TestRestoreUnreachableMaster(t *testing.T) { } // run the backup - if err := vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}); err != nil { - t.Fatalf("Backup failed: %v", err) - } + require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)})) // create a destination tablet, set it up so we can do restores destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db) @@ -376,19 +388,9 @@ func TestRestoreUnreachableMaster(t *testing.T) { // set a short timeout so that we don't have to wait 30 seconds *topo.RemoteOperationTimeout = 2 * time.Second // Restore should still succeed - if err := destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */); err != nil { - t.Fatalf("RestoreData failed: %v", err) - } - + require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */)) // verify the full status - if err := destTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { - t.Errorf("destTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) - } - if !destTablet.FakeMysqlDaemon.Replicating { - t.Errorf("destTablet.FakeMysqlDaemon.Replicating not set") - } - if !destTablet.FakeMysqlDaemon.Running { - t.Errorf("destTablet.FakeMysqlDaemon.Running not set") - } - + require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed") + assert.True(t, destTablet.FakeMysqlDaemon.Replicating) + assert.True(t, destTablet.FakeMysqlDaemon.Running) }