Skip to content

Commit

Permalink
restore: do not change tablet type to RESTORE if not actually perform…
Browse files Browse the repository at this point in the history
…ing a restore (#6679)

* restore: do not change tablet type to RESTORE if not actually performing a restore

Signed-off-by: deepthi <deepthi@planetscale.com>

* restore: cleanup usage of ErrExistingDB, improve comments, do not restore if mysql doesn't come up

Signed-off-by: deepthi <deepthi@planetscale.com>

* restore: remove ErrExistingDB which is no longer needed

Signed-off-by: deepthi <deepthi@planetscale.com>
  • Loading branch information
deepthi authored Sep 9, 2020
1 parent 1f6be50 commit ec2e971
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 106 deletions.
2 changes: 0 additions & 2 deletions go/cmd/vtbackup/vtbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,6 @@ func takeBackup(ctx context.Context, topoServer *topo.Server, backupStorage back
return fmt.Errorf("no backup found; not starting up empty since -initial_backup flag was not enabled")
}
restorePos = mysql.Position{}
case mysqlctl.ErrExistingDB:
return fmt.Errorf("can't run vtbackup because data directory is not empty")
default:
return fmt.Errorf("can't restore from backup: %v", err)
}
Expand Down
2 changes: 2 additions & 0 deletions go/test/endtoend/backup/vtbackup/backup_only_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func firstBackupTest(t *testing.T, tabletType string) {
// check that the restored replica has the right local_metadata
result, err := replica2.VttabletProcess.QueryTabletWithDB("select * from local_metadata", "_vt")
require.Nil(t, err)
require.NotNil(t, result)
require.NotEmpty(t, result.Rows)
assert.Equal(t, replica2.Alias, result.Rows[0][1].ToString(), "Alias")
assert.Equal(t, "ks.0", result.Rows[1][1].ToString(), "ClusterAlias")
assert.Equal(t, cell, result.Rows[2][1].ToString(), "DataCenter")
Expand Down
42 changes: 15 additions & 27 deletions go/vt/mysqlctl/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ var (
// but none of them are complete.
ErrNoCompleteBackup = errors.New("backup(s) found but none are complete")

// ErrExistingDB is returned when there's already an active DB.
ErrExistingDB = errors.New("skipping restore due to existing database")

// backupStorageHook contains the hook name to use to process
// backup files. If not set, we will not process the files. It is
// only used at backup time. Then it is put in the manifest,
Expand Down Expand Up @@ -218,34 +215,25 @@ func removeExistingFiles(cnf *Mycnf) error {
return nil
}

// ShouldRestore checks whether a database with tables already exists
// and returns whether a restore action should be performed
func ShouldRestore(ctx context.Context, params RestoreParams) (bool, error) {
if params.DeleteBeforeRestore || RestoreWasInterrupted(params.Cnf) {
return true, nil
}
params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState)
// Wait for mysqld to be ready, in case it was launched in parallel with us.
// If this doesn't succeed, we should not attempt a restore
if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil {
return false, err
}
return checkNoDB(ctx, params.Mysqld, params.DbName)
}

// Restore is the main entry point for backup restore. If there is no
// appropriate backup on the BackupStorage, Restore logs an error
// and returns ErrNoBackup. Any other error is returned.
func Restore(ctx context.Context, params RestoreParams) (*BackupManifest, error) {

if !params.DeleteBeforeRestore {
params.Logger.Infof("Restore: Checking if a restore is in progress")
if !RestoreWasInterrupted(params.Cnf) {
params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState)
// Wait for mysqld to be ready, in case it was launched in parallel with us.
if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil {
return nil, err
}

ok, err := checkNoDB(ctx, params.Mysqld, params.DbName)
if err != nil {
return nil, err
}
if !ok {
params.Logger.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.")
if err = PopulateMetadataTables(params.Mysqld, params.LocalMetadata, params.DbName); err == nil {
err = ErrExistingDB
}
return nil, err
}
}
}

// find the right backup handle: most recent one, with a MANIFEST
params.Logger.Infof("Restore: looking for a suitable backup to restore")
bs, err := backupstorage.GetBackupStorage()
Expand Down
28 changes: 19 additions & 9 deletions go/vt/vttablet/tabletmanager/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,9 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger,
}

func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error {

tablet := tm.Tablet()
originalType := tablet.Type
if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_RESTORE); err != nil {
return err
}

// Try to restore. Depending on the reason for failure, we may be ok.
// If we're not ok, return an error and the tm will log.Fatalf,
// causing the process to be restarted and the restore retried.
Expand Down Expand Up @@ -117,6 +114,24 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
StartTime: logutil.ProtoToTime(keyspaceInfo.SnapshotTime),
}

// Check whether we're going to restore before changing to RESTORE type,
// so we keep our MasterTermStartTime (if any) if we aren't actually restoring.
ok, err := mysqlctl.ShouldRestore(ctx, params)
if err != nil {
return err
}
if !ok {
params.Logger.Infof("Attempting to restore, but mysqld already contains data. Assuming vttablet was just restarted.")
return mysqlctl.PopulateMetadataTables(params.Mysqld, params.LocalMetadata, params.DbName)
}
// We should not become master after restore, because that would incorrectly
// start a new master term, and it's likely our data dir will be out of date.
if originalType == topodatapb.TabletType_MASTER {
originalType = tm.baseTabletType
}
if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_RESTORE); err != nil {
return err
}
// Loop until a backup exists, unless we were told to give up immediately.
var backupManifest *mysqlctl.BackupManifest
for {
Expand Down Expand Up @@ -161,11 +176,6 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
}
case mysqlctl.ErrNoBackup:
// No-op, starting with empty database.
case mysqlctl.ErrExistingDB:
// No-op, assuming we've just restarted. Note the
// replication reporter may restart replication at the
// next health check if it thinks it should. We do not
// alter replication here.
default:
// If anything failed, we should reset the original tablet type
if err := tm.tmState.ChangeTabletType(ctx, originalType); err != nil {
Expand Down
138 changes: 70 additions & 68 deletions go/vt/wrangler/testlib/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ import (
"testing"
"time"

"github.com/stretchr/testify/require"

"github.com/stretchr/testify/assert"

"vitess.io/vitess/go/vt/discovery"

"golang.org/x/net/context"
Expand Down Expand Up @@ -74,9 +78,7 @@ func TestBackupRestore(t *testing.T) {

// Initialize our temp dirs
root, err := ioutil.TempDir("", "backuptest")
if err != nil {
t.Fatalf("os.TempDir failed: %v", err)
}
require.NoError(t, err)
defer os.RemoveAll(root)

// Initialize BackupStorage
Expand All @@ -90,19 +92,11 @@ func TestBackupRestore(t *testing.T) {
sourceDataDir := path.Join(root, "source_data")
sourceDataDbDir := path.Join(sourceDataDir, "vt_db")
for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} {
if err := os.MkdirAll(s, os.ModePerm); err != nil {
t.Fatalf("failed to create directory %v: %v", s, err)
}
}
if err := ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_data_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_log_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm); err != nil {
t.Fatalf("failed to write file db.opt: %v", err)
require.NoError(t, os.MkdirAll(s, os.ModePerm))
}
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm))

// create a master tablet, set its master position
master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db)
Expand Down Expand Up @@ -150,20 +144,12 @@ func TestBackupRestore(t *testing.T) {
}

// run the backup
if err := vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}); err != nil {
t.Fatalf("Backup failed: %v", err)
}
require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}))

// verify the full status
if err := sourceTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil {
t.Errorf("sourceTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err)
}
if !sourceTablet.FakeMysqlDaemon.Replicating {
t.Errorf("sourceTablet.FakeMysqlDaemon.Replicating not set")
}
if !sourceTablet.FakeMysqlDaemon.Running {
t.Errorf("sourceTablet.FakeMysqlDaemon.Running not set")
}
require.NoError(t, sourceTablet.FakeMysqlDaemon.CheckSuperQueryList())
assert.True(t, sourceTablet.FakeMysqlDaemon.Replicating)
assert.True(t, sourceTablet.FakeMysqlDaemon.Running)

// create a destination tablet, set it up so we can do restores
destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db)
Expand Down Expand Up @@ -204,21 +190,59 @@ func TestBackupRestore(t *testing.T) {
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

if err := destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */); err != nil {
t.Fatalf("RestoreData failed: %v", err)
require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)

// Initialize mycnf, required for restore
masterInnodbDataDir := path.Join(root, "master_innodb_data")
masterInnodbLogDir := path.Join(root, "master_innodb_log")
masterDataDir := path.Join(root, "master_data")
master.TM.Cnf = &mysqlctl.Mycnf{
DataDir: masterDataDir,
InnodbDataHomeDir: masterInnodbDataDir,
InnodbLogGroupHomeDir: masterInnodbLogDir,
BinLogPath: path.Join(root, "bin-logs/filename_prefix"),
RelayLogPath: path.Join(root, "relay-logs/filename_prefix"),
RelayLogIndexPath: path.Join(root, "relay-log.index"),
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

// verify the full status
if err := destTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil {
t.Errorf("destTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err)
master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{
"SHOW DATABASES": {},
}
if !destTablet.FakeMysqlDaemon.Replicating {
t.Errorf("destTablet.FakeMysqlDaemon.Replicating not set")
master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"STOP SLAVE",
"RESET SLAVE ALL",
"FAKE SET SLAVE POSITION",
"FAKE SET MASTER",
"START SLAVE",
}
if !destTablet.FakeMysqlDaemon.Running {
t.Errorf("destTablet.FakeMysqlDaemon.Running not set")

master.FakeMysqlDaemon.SetReplicationPositionPos = master.FakeMysqlDaemon.CurrentMasterPosition

// restore master from backup
require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed")
// tablet was created as MASTER, so it's baseTabletType is MASTER
assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type)
assert.False(t, master.FakeMysqlDaemon.Replicating)
assert.True(t, master.FakeMysqlDaemon.Running)

// restore master when database already exists
// checkNoDb should return false
// so fake the necessary queries
master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{
"SHOW DATABASES": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("vt_test_keyspace")}}},
"SHOW TABLES FROM `vt_test_keyspace`": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("a")}}},
}

require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed")
// Tablet type should not change
assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type)
assert.False(t, master.FakeMysqlDaemon.Replicating)
assert.True(t, master.FakeMysqlDaemon.Running)
}

func TestRestoreUnreachableMaster(t *testing.T) {
Expand Down Expand Up @@ -252,9 +276,7 @@ func TestRestoreUnreachableMaster(t *testing.T) {

// Initialize our temp dirs
root, err := ioutil.TempDir("", "backuptest")
if err != nil {
t.Fatalf("os.TempDir failed: %v", err)
}
require.NoError(t, err)
defer os.RemoveAll(root)

// Initialize BackupStorage
Expand All @@ -268,19 +290,11 @@ func TestRestoreUnreachableMaster(t *testing.T) {
sourceDataDir := path.Join(root, "source_data")
sourceDataDbDir := path.Join(sourceDataDir, "vt_db")
for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} {
if err := os.MkdirAll(s, os.ModePerm); err != nil {
t.Fatalf("failed to create directory %v: %v", s, err)
}
}
if err := ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_data_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_log_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm); err != nil {
t.Fatalf("failed to write file db.opt: %v", err)
require.NoError(t, os.MkdirAll(s, os.ModePerm))
}
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm))

// create a master tablet, set its master position
master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db)
Expand Down Expand Up @@ -327,9 +341,7 @@ func TestRestoreUnreachableMaster(t *testing.T) {
}

// run the backup
if err := vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}); err != nil {
t.Fatalf("Backup failed: %v", err)
}
require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}))

// create a destination tablet, set it up so we can do restores
destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db)
Expand Down Expand Up @@ -376,19 +388,9 @@ func TestRestoreUnreachableMaster(t *testing.T) {
// set a short timeout so that we don't have to wait 30 seconds
*topo.RemoteOperationTimeout = 2 * time.Second
// Restore should still succeed
if err := destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */); err != nil {
t.Fatalf("RestoreData failed: %v", err)
}

require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
// verify the full status
if err := destTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil {
t.Errorf("destTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err)
}
if !destTablet.FakeMysqlDaemon.Replicating {
t.Errorf("destTablet.FakeMysqlDaemon.Replicating not set")
}
if !destTablet.FakeMysqlDaemon.Running {
t.Errorf("destTablet.FakeMysqlDaemon.Running not set")
}

require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)
}

0 comments on commit ec2e971

Please sign in to comment.