Skip to content

Commit

Permalink
backupccl: add some basic checks for online restore
Browse files Browse the repository at this point in the history
This adds some basic guardrails to stop people from trying things out
that currently don't work.

Release note: None
  • Loading branch information
stevendanna committed Nov 15, 2023
1 parent 236b9ba commit 37818c0
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 1 deletion.
42 changes: 42 additions & 0 deletions pkg/ccl/backupccl/restore_online.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"fmt"
"time"

"github.com/cockroachdb/cockroach/pkg/ccl/backupccl/backuppb"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
Expand All @@ -21,6 +22,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand Down Expand Up @@ -189,3 +192,42 @@ func sendAddRemoteSSTWorker(
return flush(nil)
}
}

// checkManifestsForOnlineCompat returns an error if the set of
// manifests appear to be from a backup that we cannot currently
// support for online restore.
func checkManifestsForOnlineCompat(ctx context.Context, manifests []backuppb.BackupManifest) error {
if len(manifests) < 1 {
return errors.AssertionFailedf("expected at least 1 backup manifest")
}
// TODO(online-restore): Remove once we support layer ordering.
if len(manifests) > 1 {
return pgerror.Newf(pgcode.FeatureNotSupported, "experimental online restore: restoring from an incremental backup not supported")
}

// TODO(online-restore): Remove once we support layer ordering and have tested some reasonable number of layers.
const layerLimit = 16
if len(manifests) > layerLimit {
return pgerror.Newf(pgcode.FeatureNotSupported, "experimental online restore: too many incremental layers %d (from backup) > %d (limit)", len(manifests), layerLimit)
}

for _, manifest := range manifests {
log.Infof(ctx, "MANIFEST: %#v", manifest)
if !manifest.RevisionStartTime.IsEmpty() || !manifest.StartTime.IsEmpty() || manifest.MVCCFilter == backuppb.MVCCFilter_All {
return pgerror.Newf(pgcode.FeatureNotSupported, "experimental online restore: restoring from a revision history backup not supported")
}
}
return nil
}

// checkRewritesAreNoops returns an error if any of the rewrites in
// the rewrite map actually require key rewriting. We currently don't
// rewrite keys, so this would be a problem.
func checkRewritesAreNoops(rewrites jobspb.DescRewriteMap) error {
for oldID, rw := range rewrites {
if rw.ID != oldID {
return pgerror.Newf(pgcode.FeatureNotSupported, "experimental online restore: descriptor rewrites not supported but required (%d -> %d)", oldID, rw.ID)
}
}
return nil
}
41 changes: 41 additions & 0 deletions pkg/ccl/backupccl/restore_online_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,47 @@ func TestOnlineRestoreBasic(t *testing.T) {
bankOnlineRestore(t, rSQLDB, numAccounts, externalStorage)
}

func TestOnlineRestoreErrors(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

_, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, 1, InitManualReplication)
defer cleanupFn()
params := base.TestClusterArgs{}
_, rSQLDB, cleanupFnRestored := backupRestoreTestSetupEmpty(t, 1, dir, InitManualReplication, params)
defer cleanupFnRestored()
rSQLDB.Exec(t, "CREATE DATABASE data")
var (
fullBackup = "nodelocal://1/full-backup"
fullBackupWithRevs = "nodelocal://1/full-backup-with-revs"
incrementalBackup = "nodelocal://1/incremental-backup"
incrementalBackupWithRevs = "nodelocal://1/incremental-backup-with-revs"
)

t.Run("incremental backups are unsupported", func(t *testing.T) {
sqlDB.Exec(t, fmt.Sprintf("BACKUP INTO '%s'", incrementalBackup))
sqlDB.Exec(t, fmt.Sprintf("BACKUP INTO LATEST IN '%s'", incrementalBackup))
rSQLDB.ExpectErr(t, "incremental backup not supported", fmt.Sprintf("RESTORE TABLE data.bank FROM LATEST IN '%s' WITH EXPERIMENTAL DEFERRED COPY", incrementalBackup))
})
t.Run("full backups with revision history are unsupported", func(t *testing.T) {
var systemTime string
sqlDB.QueryRow(t, "SELECT cluster_logical_timestamp()").Scan(&systemTime)
sqlDB.Exec(t, fmt.Sprintf("BACKUP INTO '%s' AS OF SYSTEM TIME '%s' WITH revision_history", fullBackupWithRevs, systemTime))
rSQLDB.ExpectErr(t, "revision history backup not supported", fmt.Sprintf("RESTORE TABLE data.bank FROM LATEST IN '%s' WITH EXPERIMENTAL DEFERRED COPY", fullBackupWithRevs))
})
t.Run("icremental backups with revision history are unsupported", func(t *testing.T) {
sqlDB.Exec(t, fmt.Sprintf("BACKUP INTO '%s' WITH revision_history", incrementalBackupWithRevs))
sqlDB.Exec(t, fmt.Sprintf("BACKUP INTO LATEST IN '%s' WITH revision_history", incrementalBackupWithRevs))
rSQLDB.ExpectErr(t, "incremental backup not supported", fmt.Sprintf("RESTORE TABLE data.bank FROM LATEST IN '%s' WITH EXPERIMENTAL DEFERRED COPY", incrementalBackupWithRevs))
})
t.Run("descriptor rewrites are unsupported", func(t *testing.T) {
sqlDB.Exec(t, fmt.Sprintf("BACKUP INTO '%s'", fullBackup))
rSQLDB.Exec(t, "CREATE DATABASE new_data")
rSQLDB.ExpectErr(t, "descriptor rewrites not supported", fmt.Sprintf("RESTORE TABLE data.bank FROM LATEST IN '%s' WITH into_db=new_data,EXPERIMENTAL DEFERRED COPY", fullBackup))
})

}

func bankOnlineRestore(
t *testing.T, sqlDB *sqlutils.SQLRunner, numAccounts int, externalStorage string,
) {
Expand Down
20 changes: 19 additions & 1 deletion pkg/ccl/backupccl/restore_planning.go
Original file line number Diff line number Diff line change
Expand Up @@ -1361,6 +1361,12 @@ func restorePlanHook(
}
}

if restoreStmt.Options.ExperimentalOnline && !restoreStmt.Targets.TenantID.IsSet() {
// TODO(ssd): Disable this once it is less annoying to
// disable it in tests.
log.Warningf(ctx, "running non-tenant online RESTORE; this is dangerous and will only work if you know exactly what you are doing")
}

var newTenantID *roachpb.TenantID
var newTenantName *roachpb.TenantName
if restoreStmt.Options.AsTenant != nil || restoreStmt.Options.ForceTenantID != nil {
Expand Down Expand Up @@ -1856,10 +1862,15 @@ func doRestorePlan(
err = checkBackupManifestVersionCompatability(ctx, p.ExecCfg().Settings.Version,
mainBackupManifests, restoreStmt.Options.UnsafeRestoreIncompatibleVersion)
if err != nil {

return err
}

if restoreStmt.Options.ExperimentalOnline {
if err := checkManifestsForOnlineCompat(ctx, mainBackupManifests); err != nil {
return err
}
}

if restoreStmt.DescriptorCoverage == tree.AllDescriptors {
// Validate that the backup is a full cluster backup if a full cluster restore was requested.
if mainBackupManifests[0].DescriptorCoverage == tree.RequestedDescriptors {
Expand Down Expand Up @@ -2122,6 +2133,13 @@ func doRestorePlan(
} else {
fromDescription = from
}

if restoreStmt.Options.ExperimentalOnline {
if err := checkRewritesAreNoops(descriptorRewrites); err != nil {
return err
}
}

description, err := restoreJobDescription(
ctx,
p,
Expand Down

0 comments on commit 37818c0

Please sign in to comment.