From c8c838e37799684ce4fc7a61340ef4f3ea5b00fa Mon Sep 17 00:00:00 2001 From: Louis Garman Date: Wed, 3 Feb 2021 09:31:46 +0000 Subject: [PATCH 1/2] Move backup config from ws to operator --- Makefile | 3 +- README.md | 14 +- api/etok.dev/v1alpha1/workspace_types.go | 7 +- cmd/install/deployment.go | 26 ++-- cmd/install/deployment_test.go | 15 ++ cmd/install/install.go | 42 +++++- cmd/install/install_test.go | 40 ++++- cmd/manager/manager.go | 24 +++ cmd/workspace/workspace_new.go | 14 +- cmd/workspace/workspace_new_test.go | 7 +- config/crd/bases/etok.dev_workspaces.yaml | 8 +- pkg/backup/fake.go | 28 ++++ pkg/backup/gcs.go | 115 ++++++++++++++ pkg/backup/gcs_test.go | 54 +++++++ pkg/backup/provider.go | 13 ++ pkg/controllers/workspace_controller.go | 148 ++++--------------- pkg/controllers/workspace_controller_test.go | 89 ++++++----- pkg/testobj/k8s.go | 4 +- test/e2e/backup_restore_test.go | 4 +- test/e2e/terraform_test.go | 1 + 20 files changed, 441 insertions(+), 215 deletions(-) create mode 100644 pkg/backup/fake.go create mode 100644 pkg/backup/gcs.go create mode 100644 pkg/backup/gcs_test.go create mode 100644 pkg/backup/provider.go diff --git a/Makefile b/Makefile index 6b0d06d5..7257a0a8 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,8 @@ local: image push # Same as above - image still needs to be built and pushed/loaded .PHONY: deploy deploy: image push - $(BUILD_BIN) install --context $(KUBECTX) --local --image $(IMG):$(TAG) $(DEPLOY_FLAGS) + $(BUILD_BIN) install --context $(KUBECTX) --local --image $(IMG):$(TAG) $(DEPLOY_FLAGS) \ + --backup-provider=gcs --gcs-bucket=$(BACKUP_BUCKET) # Tail operator logs .PHONY: logs diff --git a/README.md b/README.md index 75b96dc9..d7c65e49 100644 --- a/README.md +++ b/README.md @@ -122,15 +122,19 @@ Terraform state is stored in a secret using the [kubernetes backend](https://www Note: Do not define a backend in your terraform configuration - it will conflict with the configuration Etok automatically installs. -### State Persistence +### State Backup and Restore -Persistence of state to cloud storage is supported. If enabled, every update to the state is backed up to a cloud storage bucket. +Backup of state to cloud storage is supported. If enabled, every update to state is backed up to a cloud storage bucket. When a new workspace is created, the operator checks if a backup exists. If so, it is restored. -To enable persistence, pass the name of an existing bucket via the `--backup-bucket` flag when creating a new workspace with `workspace new`. If the secret storing the state cannot be found, the workspace checks if a backup exists in the bucket. If found, it restores the state to the secret. +To enable backups, install the operator with the relevant flags. For example, to backup to a GCS bucket: + +``` +etok install --backup-provider=gcs --gcs-bucket=backups-bucket +``` Note: only GCS is supported at present. -The operator is responsible for persisting the state. Therefore be sure to provide the appropriate credentials to the operator at install time. Either provide the path to a file containing a GCP service account key via the `--secret-file` flag, or setup workload identity (see below). The service account needs the following permissions on the bucket: +Be sure to provide the appropriate credentials to the operator at install time. Either provide the path to a file containing a GCP service account key via the `--secret-file` flag, or setup workload identity (see below). The service account needs the following permissions on the bucket: ``` storage.buckets.get @@ -139,6 +143,8 @@ storage.objects.delete storage.objects.get ``` +To opt a workspace out of automatic backup and restore, pass the `--ephemeral` flag when creating a new workspace with `workspace new`. This is useful if you intend for your workspace to be short-lived. + ## Credentials Etok looks for credentials in a secret named `etok`. If found, the credentials contained within are made available to terraform as environment variables. diff --git a/api/etok.dev/v1alpha1/workspace_types.go b/api/etok.dev/v1alpha1/workspace_types.go index 377208c4..5faa8469 100644 --- a/api/etok.dev/v1alpha1/workspace_types.go +++ b/api/etok.dev/v1alpha1/workspace_types.go @@ -69,10 +69,9 @@ type WorkspaceSpec struct { // Variables as inputs to module Variables []*Variable `json:"variables,omitempty"` - // +kubebuilder:validation:Pattern=`^[0-9a-z][0-9a-z\-_]{0,61}[0-9a-z]$` - - // GCS bucket to which to backup state file - BackupBucket string `json:"backupBucket,omitempty"` + // Ephemeral turns off state backup (and restore) - intended for short-lived + // workspaces. + Ephemeral bool `json:"ephemeral,omitempty"` } // WorkspaceSpec defines the desired state of Workspace's cache storage diff --git a/cmd/install/deployment.go b/cmd/install/deployment.go index b8d5feb2..0770e529 100644 --- a/cmd/install/deployment.go +++ b/cmd/install/deployment.go @@ -33,26 +33,17 @@ func WithAnnotations(annotations map[string]string) podTemplateOption { } } -func WithEnvFromSecretKey(varName, secret, key string) podTemplateOption { +func WithSecret(secretPresent bool) podTemplateOption { return func(c *podTemplateConfig) { - c.envVars = append(c.envVars, corev1.EnvVar{ - Name: varName, - ValueFrom: &corev1.EnvVarSource{ - SecretKeyRef: &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: secret, - }, - Key: key, - }, - }, - }) + c.withSecret = secretPresent + } } -func WithSecret(secretPresent bool) podTemplateOption { +func WithGCSProvider(bucket string) podTemplateOption { return func(c *podTemplateConfig) { - c.withSecret = secretPresent - + c.envVars = append(c.envVars, corev1.EnvVar{Name: "ETOK_BACKUP_PROVIDER", Value: "gcs"}) + c.envVars = append(c.envVars, corev1.EnvVar{Name: "ETOK_GCS_BUCKET", Value: bucket}) } } @@ -115,6 +106,11 @@ func deployment(namespace string, opts ...podTemplateOption) *appsv1.Deployment }, } + // Add environment variables to container + for _, ev := range c.envVars { + deployment.Spec.Template.Spec.Containers[0].Env = append(deployment.Spec.Template.Spec.Containers[0].Env, ev) + } + // Label selector for operator pod. It must match the pod template's // labels. selector := labels.MakeLabels( diff --git a/cmd/install/deployment_test.go b/cmd/install/deployment_test.go index cf6ac43c..567c934e 100644 --- a/cmd/install/deployment_test.go +++ b/cmd/install/deployment_test.go @@ -48,6 +48,21 @@ func TestDeployment(t *testing.T) { }) }, }, + { + name: "with backup enabled", + namespace: "default", + opts: []podTemplateOption{WithGCSProvider("backups-bucket")}, + assertions: func(deploy *appsv1.Deployment) { + assert.Contains(t, deploy.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "ETOK_BACKUP_PROVIDER", + Value: "gcs", + }) + assert.Contains(t, deploy.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "ETOK_GCS_BUCKET", + Value: "backups-bucket", + }) + }, + }, } for _, tt := range tests { testutil.Run(t, tt.name, func(t *testutil.T) { diff --git a/cmd/install/install.go b/cmd/install/install.go index 747a303f..1cec030a 100644 --- a/cmd/install/install.go +++ b/cmd/install/install.go @@ -2,6 +2,7 @@ package install import ( "context" + "errors" "fmt" "io/ioutil" "net/http" @@ -53,6 +54,8 @@ var ( // Interval between polling deployment status interval = time.Second + + errInvalidBackupConfig = errors.New("invalid backup config") ) type installOptions struct { @@ -83,6 +86,12 @@ type installOptions struct { // Print out resources and don't install dryRun bool + + // Toggle state backups + backupProviderName string + + // GCS backup bucket + gcsBucket string } func InstallCmd(f *cmdutil.Factory) (*cobra.Command, *installOptions) { @@ -95,6 +104,10 @@ func InstallCmd(f *cmdutil.Factory) (*cobra.Command, *installOptions) { Use: "install", Short: "Install etok operator", RunE: func(cmd *cobra.Command, args []string) (err error) { + if err := o.validateBackupOptions(); err != nil { + return err + } + o.Client, err = o.CreateRuntimeClient(o.kubeContext) if err != nil { return err @@ -119,9 +132,27 @@ func InstallCmd(f *cmdutil.Factory) (*cobra.Command, *installOptions) { cmd.Flags().StringToStringVar(&o.serviceAccountAnnotations, "sa-annotations", map[string]string{}, "Annotations to add to the etok ServiceAccount. Add iam.gke.io/gcp-service-account=[GSA_NAME]@[PROJECT_NAME].iam.gserviceaccount.com for workload identity") cmd.Flags().BoolVar(&o.crdsOnly, "crds-only", o.crdsOnly, "Only generate CRD resources. Useful for updating CRDs for an existing Etok install.") + cmd.Flags().StringVar(&o.backupProviderName, "backup-provider", "", "Enable backups specifying a provider (only 'gcs' supported currently)") + + cmd.Flags().StringVar(&o.gcsBucket, "gcs-bucket", "", "Specify GCS bucket for terraform state backups") + return cmd, o } +func (o *installOptions) validateBackupOptions() error { + if o.backupProviderName != "" { + if o.backupProviderName != "gcs" { + return fmt.Errorf("%w: %s is invalid value for --backup-provider, valid options are: gcs", errInvalidBackupConfig, o.backupProviderName) + } + } + + if (o.backupProviderName == "" && o.gcsBucket != "") || (o.backupProviderName != "" && o.gcsBucket == "") { + return fmt.Errorf("%w: you must specify both --backup-provider and --gcs-bucket", errInvalidBackupConfig) + } + + return nil +} + func (o *installOptions) install(ctx context.Context) error { var deploy *appsv1.Deployment var resources []runtimeclient.Object @@ -150,7 +181,16 @@ func (o *installOptions) install(ctx context.Context) error { resources = append(resources, serviceAccount(o.namespace, o.serviceAccountAnnotations)) secretPresent := o.secretFile != "" - deploy = deployment(o.namespace, WithSecret(secretPresent), WithImage(o.image)) + + // Deploy options + dopts := []podTemplateOption{} + dopts = append(dopts, WithSecret(secretPresent)) + dopts = append(dopts, WithImage(o.image)) + if o.backupProviderName == "gcs" { + dopts = append(dopts, WithGCSProvider(o.gcsBucket)) + } + + deploy = deployment(o.namespace, dopts...) resources = append(resources, deploy) if o.secretFile != "" { diff --git a/cmd/install/install_test.go b/cmd/install/install_test.go index f6bbc870..99e6be34 100644 --- a/cmd/install/install_test.go +++ b/cmd/install/install_test.go @@ -3,6 +3,7 @@ package install import ( "bytes" "context" + "errors" "fmt" "io" "io/ioutil" @@ -38,7 +39,7 @@ func TestInstall(t *testing.T) { name string args []string objs []runtimeclient.Object - err bool + err error assertions func(*testutil.T, runtimeclient.Client) }{ { @@ -76,6 +77,31 @@ func TestInstall(t *testing.T) { assert.Equal(t, "bugsbunny:v123", d.Spec.Template.Spec.Containers[0].Image) }, }, + { + name: "fresh install with backups enabled", + args: []string{"install", "--wait=false", "--backup-provider=gcs", "--gcs-bucket=backups-bucket"}, + assertions: func(t *testutil.T, client runtimeclient.Client) { + var d = deploy() + client.Get(context.Background(), runtimeclient.ObjectKeyFromObject(d), d) + assert.Contains(t, d.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{Name: "ETOK_BACKUP_PROVIDER", Value: "gcs"}) + assert.Contains(t, d.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{Name: "ETOK_GCS_BUCKET", Value: "backups-bucket"}) + }, + }, + { + name: "missing backup bucket name", + args: []string{"install", "--wait=false", "--backup-provider=gcs"}, + err: errInvalidBackupConfig, + }, + { + name: "missing backup provider name", + args: []string{"install", "--wait=false", "--gcs-bucket=backups-bucket"}, + err: errInvalidBackupConfig, + }, + { + name: "invalid backup provider name", + args: []string{"install", "--wait=false", "--backup-provider=alibaba-cloud-blob"}, + err: errInvalidBackupConfig, + }, } for _, tt := range tests { testutil.Run(t, tt.name, func(t *testutil.T) { @@ -103,7 +129,17 @@ func TestInstall(t *testing.T) { // Override wait interval to ensure fast tests t.Override(&interval, 10*time.Millisecond) - t.CheckError(tt.err, cmd.ExecuteContext(context.Background())) + // Run command and assert returned error is either nil or wraps + // expected error + err := cmd.ExecuteContext(context.Background()) + if !assert.True(t, errors.Is(err, tt.err)) { + t.Errorf("unexpected error: %w", err) + t.FailNow() + } + if err != nil { + // Expected error occurred; there's no point in continuing + return + } // get runtime client now that it's been created client := opts.RuntimeClient diff --git a/cmd/manager/manager.go b/cmd/manager/manager.go index 465923c7..e5090767 100644 --- a/cmd/manager/manager.go +++ b/cmd/manager/manager.go @@ -9,6 +9,7 @@ import ( "github.com/leg100/etok/cmd/flags" cmdutil "github.com/leg100/etok/cmd/util" + "github.com/leg100/etok/pkg/backup" "github.com/leg100/etok/pkg/controllers" "github.com/leg100/etok/pkg/scheme" "github.com/leg100/etok/pkg/version" @@ -38,6 +39,12 @@ type ManagerOptions struct { EnableLeaderElection bool args []string + + // Toggle state backups + backupProviderName string + + // GCS backup bucket + gcsBucket string } func ManagerCmd(f *cmdutil.Factory) *cobra.Command { @@ -69,11 +76,24 @@ func ManagerCmd(f *cmdutil.Factory) *cobra.Command { klog.V(0).Info("Runner image: " + o.Image) + var backupProvider backup.Provider + if o.backupProviderName != "" { + switch o.backupProviderName { + case "gcs": + backupProvider, err = backup.NewGCSProvider(cmd.Context(), o.gcsBucket, nil) + if err != nil { + return err + } + } + } + // Setup workspace ctrl with mgr workspaceReconciler := controllers.NewWorkspaceReconciler( mgr.GetClient(), o.Image, + controllers.WithBackupProvider(backupProvider), controllers.WithEventRecorder(mgr.GetEventRecorderFor("workspace-controller"))) + if err := workspaceReconciler.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create workspace controller: %w", err) } @@ -102,5 +122,9 @@ func ManagerCmd(f *cmdutil.Factory) *cobra.Command { "Enabling this will ensure there is only one active controller manager.") cmd.Flags().StringVar(&o.Image, "image", version.Image, "Docker image used for both the operator and the runner") + cmd.Flags().StringVar(&o.backupProviderName, "backup-provider", "", "Enable backups specifying a provider") + + cmd.Flags().StringVar(&o.gcsBucket, "gcs-bucket", "", "Specify GCS bucket for terraform state backups") + return cmd } diff --git a/cmd/workspace/workspace_new.go b/cmd/workspace/workspace_new.go index 3e40944b..d903d904 100644 --- a/cmd/workspace/workspace_new.go +++ b/cmd/workspace/workspace_new.go @@ -60,9 +60,8 @@ type newOptions struct { // Timeout for workspace pod to be ready podTimeout time.Duration - // Timeout for workspace restore failure condition to report either true or - // false (did the restore fail or not?). - restoreTimeout time.Duration + // Timeout for workspace ready condition to be true + readyTimeout time.Duration // Disable default behaviour of deleting resources upon error disableResourceCleanup bool @@ -77,9 +76,6 @@ type newOptions struct { variables map[string]string environmentVariables map[string]string - // backupBucket is the bucket to which the state file will backed up to - backupBucket string - etokenv *env.Env } @@ -131,7 +127,7 @@ func newCmd(f *cmdutil.Factory) (*cobra.Command, *newOptions) { cmd.Flags().StringVar(&o.workspaceSpec.Cache.Size, "size", defaultCacheSize, "Size of PersistentVolume for cache") cmd.Flags().StringVar(&o.workspaceSpec.TerraformVersion, "terraform-version", "", "Override terraform version") - cmd.Flags().StringVar(&o.workspaceSpec.BackupBucket, "backup-bucket", "", "Backup state to GCS bucket") + cmd.Flags().BoolVarP(&o.workspaceSpec.Ephemeral, "ephemeral", "e", false, "Disable state backup (and restore)") // We want nil to be the default but it doesn't seem like pflags supports // that so use empty string and override later (see above) @@ -139,7 +135,7 @@ func newCmd(f *cmdutil.Factory) (*cobra.Command, *newOptions) { cmd.Flags().DurationVar(&o.reconcileTimeout, "reconcile-timeout", defaultReconcileTimeout, "timeout for resource to be reconciled") cmd.Flags().DurationVar(&o.podTimeout, "pod-timeout", defaultPodTimeout, "timeout for pod to be ready") - cmd.Flags().DurationVar(&o.restoreTimeout, "restore-timeout", defaultReadyTimeout, "timeout for restore condition to report back") + cmd.Flags().DurationVar(&o.readyTimeout, "ready-timeout", defaultReadyTimeout, "timeout for ready condition to report true") cmd.Flags().StringSliceVar(&o.workspaceSpec.PrivilegedCommands, "privileged-commands", []string{}, "Set privileged commands") @@ -294,7 +290,7 @@ func (o *newOptions) waitForReady(ctx context.Context, ws *v1alpha1.Workspace) e lw := &k8s.WorkspaceListWatcher{Client: o.EtokClient, Name: ws.Name, Namespace: ws.Namespace} hdlr := handlers.WorkspaceReady() - ctx, cancel := context.WithTimeout(ctx, o.restoreTimeout) + ctx, cancel := context.WithTimeout(ctx, o.readyTimeout) defer cancel() _, err := watchtools.UntilWithSync(ctx, lw, &v1alpha1.Workspace{}, nil, hdlr) diff --git a/cmd/workspace/workspace_new_test.go b/cmd/workspace/workspace_new_test.go index 580fdfef..b12ead7a 100644 --- a/cmd/workspace/workspace_new_test.go +++ b/cmd/workspace/workspace_new_test.go @@ -237,12 +237,11 @@ func TestNewWorkspace(t *testing.T) { err: handlers.ErrWorkspaceFailed, }, { - name: "restore timeout exceeded", - args: []string{"foo", "--backup-bucket", "my-bucket", "--restore-timeout", "100ms"}, + name: "ready timeout exceeded", + args: []string{"foo", "--ready-timeout", "100ms"}, objs: []runtime.Object{testobj.WorkspacePod("default", "foo")}, overrideStatus: func(status *v1alpha1.WorkspaceStatus) { - // Mock operator failing to provide restoreFailure condition - // status + // Mock operator failing to provide ready condition status status.Conditions = nil }, err: errReadyTimeout, diff --git a/config/crd/bases/etok.dev_workspaces.yaml b/config/crd/bases/etok.dev_workspaces.yaml index 243983bc..d33c35e9 100644 --- a/config/crd/bases/etok.dev_workspaces.yaml +++ b/config/crd/bases/etok.dev_workspaces.yaml @@ -54,10 +54,6 @@ spec: spec: description: WorkspaceSpec defines the desired state of Workspace properties: - backupBucket: - description: GCS bucket to which to backup state file - pattern: ^[0-9a-z][0-9a-z\-_]{0,61}[0-9a-z]$ - type: string cache: description: Persistent Volume Claim specification for workspace's cache. @@ -73,6 +69,10 @@ spec: of persistent volumes). type: string type: object + ephemeral: + description: Ephemeral turns off state backup (and restore) - intended + for short-lived workspaces. + type: boolean privilegedCommands: description: List of commands that are deemed privileged. The client must set a specific annotation on the workspace to approve a run diff --git a/pkg/backup/fake.go b/pkg/backup/fake.go new file mode 100644 index 00000000..9032428d --- /dev/null +++ b/pkg/backup/fake.go @@ -0,0 +1,28 @@ +package backup + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type FakeProvider struct { + BucketObjs []*corev1.Secret +} + +func (p *FakeProvider) Backup(ctx context.Context, secret *corev1.Secret) error { + p.BucketObjs = append(p.BucketObjs, secret) + + return nil +} + +func (p *FakeProvider) Restore(ctx context.Context, key client.ObjectKey) (*corev1.Secret, error) { + for _, obj := range p.BucketObjs { + if client.ObjectKeyFromObject(obj) == key { + return obj, nil + } + } + + return nil, nil +} diff --git a/pkg/backup/gcs.go b/pkg/backup/gcs.go new file mode 100644 index 00000000..994b1a4b --- /dev/null +++ b/pkg/backup/gcs.go @@ -0,0 +1,115 @@ +package backup + +import ( + "bytes" + "context" + "fmt" + "io" + + "cloud.google.com/go/storage" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" +) + +type gcsProvider struct { + bucket string + client *storage.Client +} + +func NewGCSProvider(ctx context.Context, bucket string, client *storage.Client) (Provider, error) { + if client == nil { + var err error + client, err = storage.NewClient(ctx) + if err != nil { + return nil, err + } + } + + // Check bucket exists + bh := client.Bucket(bucket) + _, err := bh.Attrs(ctx) + if err != nil { + return nil, err + } + + return &gcsProvider{ + bucket: bucket, + client: client, + }, nil +} + +func (p *gcsProvider) Backup(ctx context.Context, secret *corev1.Secret) error { + bh := p.client.Bucket(p.bucket) + _, err := bh.Attrs(ctx) + if err != nil { + return err + } + + oh := bh.Object(path(client.ObjectKeyFromObject(secret))) + + // Marshal state file first to json then to yaml + y, err := yaml.Marshal(secret) + if err != nil { + return err + } + + // Copy state file to GCS + owriter := oh.NewWriter(ctx) + _, err = io.Copy(owriter, bytes.NewBuffer(y)) + if err != nil { + return err + } + + if err := owriter.Close(); err != nil { + return err + } + + return nil +} + +func (p *gcsProvider) Restore(ctx context.Context, key client.ObjectKey) (*corev1.Secret, error) { + var secret corev1.Secret + + bh := p.client.Bucket(p.bucket) + _, err := bh.Attrs(ctx) + if err != nil { + return nil, err + } + + // Try to retrieve existing backup + oh := bh.Object(path(key)) + _, err = oh.Attrs(ctx) + if err == storage.ErrObjectNotExist { + return nil, nil + } else if err != nil { + return nil, err + } + + oreader, err := oh.NewReader(ctx) + if err != nil { + return nil, err + } + + // Copy state file from GCS + buf := new(bytes.Buffer) + _, err = io.Copy(buf, oreader) + if err != nil { + return nil, err + } + + // Unmarshal state file into secret obj + if err := yaml.Unmarshal(buf.Bytes(), &secret); err != nil { + return nil, err + } + + if err := oreader.Close(); err != nil { + return nil, err + } + + return &secret, nil +} + +func path(key client.ObjectKey) string { + return fmt.Sprintf("%s.yaml", key) +} diff --git a/pkg/backup/gcs_test.go b/pkg/backup/gcs_test.go new file mode 100644 index 00000000..c00b8859 --- /dev/null +++ b/pkg/backup/gcs_test.go @@ -0,0 +1,54 @@ +package backup + +import ( + "context" + "testing" + + "github.com/fsouza/fake-gcs-server/fakestorage" + "github.com/leg100/etok/pkg/testobj" + "github.com/leg100/etok/pkg/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func TestGCSProvider(t *testing.T) { + tests := []struct { + name string + bucket string + bucketObjs []fakestorage.Object + secret *corev1.Secret + }{ + { + name: "Backup", + bucket: "backups", + bucketObjs: []fakestorage.Object{ + { + BucketName: "backups", + }, + }, + secret: testobj.Secret("default", "tfstate-default-workspace-1", testobj.WithCompressedDataFromFile("tfstate", "testdata/tfstate.json")), + }, + } + for _, tt := range tests { + testutil.Run(t, tt.name, func(t *testutil.T) { + // Setup up new fake GCS server for each test + server, err := fakestorage.NewServerWithOptions(fakestorage.Options{ + InitialObjects: tt.bucketObjs, + Host: "127.0.0.1", + Port: 8081, + }) + require.NoError(t, err) + defer server.Stop() + + p, err := NewGCSProvider(context.Background(), tt.bucket, server.Client()) + require.NoError(t, err) + + require.NoError(t, p.Backup(context.Background(), tt.secret)) + secret, err := p.Restore(context.Background(), client.ObjectKeyFromObject(tt.secret)) + + assert.Equal(t, tt.secret, secret) + }) + } +} diff --git a/pkg/backup/provider.go b/pkg/backup/provider.go new file mode 100644 index 00000000..b9e95545 --- /dev/null +++ b/pkg/backup/provider.go @@ -0,0 +1,13 @@ +package backup + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Provider interface { + Backup(context.Context, *corev1.Secret) error + Restore(context.Context, client.ObjectKey) (*corev1.Secret, error) +} diff --git a/pkg/controllers/workspace_controller.go b/pkg/controllers/workspace_controller.go index ca2644ad..dd4c16f2 100644 --- a/pkg/controllers/workspace_controller.go +++ b/pkg/controllers/workspace_controller.go @@ -1,21 +1,16 @@ package controllers import ( - "bytes" "context" "errors" - "fmt" - "io" "reflect" "strings" - "cloud.google.com/go/storage" "github.com/leg100/etok/api/etok.dev/v1alpha1" - "google.golang.org/api/googleapi" - "sigs.k8s.io/yaml" "sigs.k8s.io/controller-runtime/pkg/log" + "github.com/leg100/etok/pkg/backup" "github.com/leg100/etok/pkg/scheme" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -49,17 +44,17 @@ type workspaceUpdater func(context.Context, *v1alpha1.Workspace) (*metav1.Condit type WorkspaceReconciler struct { client.Client - Scheme *runtime.Scheme - Image string - StorageClient *storage.Client - recorder record.EventRecorder + Scheme *runtime.Scheme + Image string + recorder record.EventRecorder + BackupProvider backup.Provider } type WorkspaceReconcilerOption func(r *WorkspaceReconciler) -func WithStorageClient(sc *storage.Client) WorkspaceReconcilerOption { +func WithBackupProvider(bp backup.Provider) WorkspaceReconcilerOption { return func(r *WorkspaceReconciler) { - r.StorageClient = sc + r.BackupProvider = bp } } @@ -273,7 +268,7 @@ func (r *WorkspaceReconciler) manageState(ctx context.Context, ws *v1alpha1.Work err := r.Get(ctx, types.NamespacedName{Namespace: ws.Namespace, Name: ws.StateSecretName()}, &secret) switch { case kerrors.IsNotFound(err): - if ws.Spec.BackupBucket != "" { + if r.BackupProvider != nil { return r.restore(ctx, ws) } case err != nil: @@ -308,10 +303,17 @@ func (r *WorkspaceReconciler) manageState(ctx context.Context, ws *v1alpha1.Work ws.Status.Outputs = outputs } - if ws.Spec.BackupBucket != "" { - if ws.Status.BackupSerial == nil || state.Serial != *ws.Status.BackupSerial { - // Backup the state file and update status - return r.backup(ctx, ws, &secret, state) + // Determine if backup should be made + if r.BackupProvider != nil && !ws.Spec.Ephemeral { + // Backup if current backup serial doesn't match serial of state + if ws.Status.BackupSerial == nil || *ws.Status.BackupSerial != state.Serial { + if err := r.BackupProvider.Backup(ctx, &secret); err != nil { + return nil, err + } + + ws.Status.BackupSerial = &state.Serial + + r.recorder.Eventf(ws, "Normal", "BackupSuccessful", "Backed up state #%d", state.Serial) } } } @@ -363,109 +365,29 @@ func (r *WorkspaceReconciler) pruneApprovals(ctx context.Context, ws v1alpha1.Wo return annotations, nil } -func (r *WorkspaceReconciler) backup(ctx context.Context, ws *v1alpha1.Workspace, secret *corev1.Secret, sfile *state) (*metav1.Condition, error) { - // Re-use client or create if not yet created - if r.StorageClient == nil { - var err error - r.StorageClient, err = storage.NewClient(ctx) - if err != nil { - return r.handleStorageError(err, ws, "BackupError") - } - } - - bh := r.StorageClient.Bucket(ws.Spec.BackupBucket) - _, err := bh.Attrs(ctx) - if err != nil { - return r.handleStorageError(err, ws, "BackupError") - } - - oh := bh.Object(ws.BackupObjectName()) - - // Marshal state file first to json then to yaml - y, err := yaml.Marshal(secret) - if err != nil { - return r.handleStorageError(err, ws, "BackupError") - } - - // Copy state file to GCS - owriter := oh.NewWriter(ctx) - _, err = io.Copy(owriter, bytes.NewBuffer(y)) - if err != nil { - return r.handleStorageError(err, ws, "BackupError") - } - - if err := owriter.Close(); err != nil { - return r.handleStorageError(err, ws, "BackupError") - } - - // Update latest backup serial - ws.Status.BackupSerial = &sfile.Serial - - r.recorder.Eventf(ws, "Normal", "BackupSuccessful", "Backed up state #%d", sfile.Serial) - return nil, nil -} - func (r *WorkspaceReconciler) restore(ctx context.Context, ws *v1alpha1.Workspace) (*metav1.Condition, error) { - var secret corev1.Secret - - // Re-use client or create if not yet created - if r.StorageClient == nil { - var err error - r.StorageClient, err = storage.NewClient(ctx) - if err != nil { - return nil, err - } - } - - bh := r.StorageClient.Bucket(ws.Spec.BackupBucket) - _, err := bh.Attrs(ctx) + secretKey := types.NamespacedName{Namespace: ws.Namespace, Name: ws.StateSecretName()} + secret, err := r.BackupProvider.Restore(ctx, secretKey) if err != nil { - return r.handleStorageError(err, ws, "RestoreError") + return nil, err } - - // Try to retrieve existing backup - oh := bh.Object(ws.BackupObjectName()) - _, err = oh.Attrs(ctx) - if err == storage.ErrObjectNotExist { + if secret == nil { r.recorder.Eventf(ws, "Normal", "RestoreSkipped", "There is no state to restore") return nil, nil - } else if err != nil { - return nil, err - } - - oreader, err := oh.NewReader(ctx) - if err != nil { - return r.handleStorageError(err, ws, "RestoreError") - } - - // Copy state file from GCS - buf := new(bytes.Buffer) - _, err = io.Copy(buf, oreader) - if err != nil { - return r.handleStorageError(err, ws, "RestoreError") - } - - // Unmarshal state file into secret obj - if err := yaml.Unmarshal(buf.Bytes(), &secret); err != nil { - return r.handleStorageError(err, ws, "RestoreError") - } - - if err := oreader.Close(); err != nil { - return r.handleStorageError(err, ws, "RestoreError") } // Blank out certain fields to avoid errors upon create secret.ResourceVersion = "" secret.OwnerReferences = nil - if err := r.Create(ctx, &secret); err != nil { - return r.handleStorageError(err, ws, "RestoreError") + if err := r.Create(ctx, secret); err != nil { + return r.sendWarningEvent(err, ws, "RestoreError") } // Parse state file - state, err := readState(ctx, &secret) + state, err := readState(ctx, secret) if err != nil { - return r.handleStorageError(err, ws, "RestoreError") + return r.sendWarningEvent(err, ws, "RestoreError") } // Record in status that a backup with the given serial number exists. @@ -476,20 +398,8 @@ func (r *WorkspaceReconciler) restore(ctx context.Context, ws *v1alpha1.Workspac return nil, nil } -// Handle errors from the Google Cloud storage client -func (r *WorkspaceReconciler) handleStorageError(err error, ws *v1alpha1.Workspace, reason string) (*metav1.Condition, error) { - if err == storage.ErrBucketNotExist { - r.recorder.Eventf(ws, "Warning", reason, "bucket does not exist") - return workspaceFailure(fmt.Sprintf("%s: %s", reason, "bucket does not exist")), nil - } - - if gerr, ok := err.(*googleapi.Error); ok { - if gerr.Code >= 400 && gerr.Code < 500 { - // HTTP 40x errors are deemed unrecoverable - r.recorder.Eventf(ws, "Warning", reason, gerr.Message) - return workspaceFailure(fmt.Sprintf("%s: %s", reason, gerr.Message)), nil - } - } +// Send warning event as well as propagating error to caller +func (r *WorkspaceReconciler) sendWarningEvent(err error, ws *v1alpha1.Workspace, reason string) (*metav1.Condition, error) { r.recorder.Eventf(ws, "Warning", reason, err.Error()) return nil, err } diff --git a/pkg/controllers/workspace_controller_test.go b/pkg/controllers/workspace_controller_test.go index 8509b900..0347f1f9 100644 --- a/pkg/controllers/workspace_controller_test.go +++ b/pkg/controllers/workspace_controller_test.go @@ -6,8 +6,8 @@ import ( "cloud.google.com/go/storage" - "github.com/fsouza/fake-gcs-server/fakestorage" v1alpha1 "github.com/leg100/etok/api/etok.dev/v1alpha1" + "github.com/leg100/etok/pkg/backup" "github.com/leg100/etok/pkg/scheme" "github.com/leg100/etok/pkg/testobj" "github.com/leg100/etok/pkg/testutil" @@ -20,6 +20,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -30,11 +31,12 @@ func TestReconcileWorkspace(t *testing.T) { name string workspace *v1alpha1.Workspace objs []runtime.Object - bucketObjs []fakestorage.Object + bucketObjs []*corev1.Secret workspaceAssertions func(*testutil.T, *v1alpha1.Workspace) podAssertions func(*testutil.T, *corev1.Pod) pvcAssertions func(*testutil.T, *corev1.PersistentVolumeClaim) configMapAssertions func(*testutil.T, *corev1.ConfigMap) + backupAssertions func(*testutil.T, []*corev1.Secret) stateAssertions func(*testutil.T, *corev1.Secret) storageAssertions func(*testutil.T, *storage.Client) disableRBACAssertions bool @@ -225,57 +227,55 @@ func TestReconcileWorkspace(t *testing.T) { }, }, { + // Demonstrate that the state secret is backed up into (fake) backup + // provider, and that the backup serial status gets updated name: "Backup", - workspace: testobj.Workspace("default", "workspace-1", testobj.WithBackupBucket("backup-bucket")), + workspace: testobj.Workspace("default", "workspace-1"), objs: []runtime.Object{ testobj.Secret("default", "tfstate-default-workspace-1", testobj.WithCompressedDataFromFile("tfstate", "testdata/tfstate.json")), }, - bucketObjs: []fakestorage.Object{ - { - BucketName: "backup-bucket", - }, - }, - storageAssertions: func(t *testutil.T, client *storage.Client) { - // Check object exists in bucket - obj := client.Bucket("backup-bucket").Object("default/workspace-1.yaml") - _, err := obj.Attrs(context.Background()) - require.NoError(t, err) + backupAssertions: func(t *testutil.T, stateFiles []*corev1.Secret) { + wantKey := types.NamespacedName{Namespace: "default", Name: "tfstate-default-workspace-1"} + for _, s := range stateFiles { + if client.ObjectKeyFromObject(s) == wantKey { + return + } + } + t.Error("failed to find state secret in backup") }, workspaceAssertions: func(t *testutil.T, ws *v1alpha1.Workspace) { assert.Equal(t, 4, *ws.Status.BackupSerial) }, }, { + // Demonstrate that the state secret gets restored, and that the + // backup serial status gets updated name: "Restore", - workspace: testobj.Workspace("default", "workspace-1", testobj.WithBackupBucket("backup-bucket")), - objs: []runtime.Object{ + workspace: testobj.Workspace("default", "workspace-1"), + bucketObjs: []*corev1.Secret{ testobj.Secret("default", "tfstate-default-workspace-1", testobj.WithCompressedDataFromFile("tfstate", "testdata/tfstate.json")), }, - bucketObjs: []fakestorage.Object{ - { - BucketName: "backup-bucket", - Name: "default/workspace-1.yaml", - Content: readFile("testdata/tfstate.yaml"), - }, - }, - storageAssertions: func(t *testutil.T, client *storage.Client) { - // Check object exists in bucket - obj := client.Bucket("backup-bucket").Object("default/workspace-1.yaml") - _, err := obj.Attrs(context.Background()) - require.NoError(t, err) + stateAssertions: func(t *testutil.T, state *corev1.Secret) { + // Empty func causes test to check for existance of state secret + // (see below). }, workspaceAssertions: func(t *testutil.T, ws *v1alpha1.Workspace) { assert.Equal(t, 4, *ws.Status.BackupSerial) - }}, + }, + }, { - name: "Non-existent backup bucket", - workspace: testobj.Workspace("", "workspace-1", testobj.WithBackupBucket("does-not-exist")), + // Demonstrate that an ephemeral workspace's state does *not* get + // backed up. + name: "Ephemeral workspace", + workspace: testobj.Workspace("default", "workspace-1", testobj.WithEphemeral()), objs: []runtime.Object{ - testobj.Secret("dev", "tfstate-default-workspace-1", testobj.WithCompressedDataFromFile("tfstate", "testdata/tfstate.json")), + testobj.Secret("default", "tfstate-default-workspace-1", testobj.WithCompressedDataFromFile("tfstate", "testdata/tfstate.json")), + }, + backupAssertions: func(t *testutil.T, stateFiles []*corev1.Secret) { + assert.Equal(t, 0, len(stateFiles)) }, - wantErr: true, workspaceAssertions: func(t *testutil.T, ws *v1alpha1.Workspace) { - assert.Equal(t, v1alpha1.WorkspacePhaseError, ws.Status.Phase) + assert.Nil(t, ws.Status.BackupSerial) }, }, } @@ -284,19 +284,13 @@ func TestReconcileWorkspace(t *testing.T) { objs := append(tt.objs, runtime.Object(tt.workspace)) cl := fake.NewFakeClientWithScheme(scheme.Scheme, objs...) - // Setup up new fake GCS server for each test - server, err := fakestorage.NewServerWithOptions(fakestorage.Options{ - InitialObjects: tt.bucketObjs, - Host: "127.0.0.1", - Port: 8081, - }) - require.NoError(t, err) - defer server.Stop() + // Create fake backup provider + backupProvider := backup.FakeProvider{BucketObjs: tt.bucketObjs} // Reconcile - r := NewWorkspaceReconciler(cl, "", WithStorageClient(server.Client()), WithEventRecorder(record.NewFakeRecorder(100))) + r := NewWorkspaceReconciler(cl, "", WithBackupProvider(&backupProvider), WithEventRecorder(record.NewFakeRecorder(100))) req := requestFromObject(tt.workspace) - _, err = r.Reconcile(context.Background(), req) + _, err := r.Reconcile(context.Background(), req) if tt.wantErr { assert.Error(t, err) } else { @@ -317,6 +311,11 @@ func TestReconcileWorkspace(t *testing.T) { tt.stateAssertions(t, &state) } + // Fetch fresh state secret for assertions + if tt.backupAssertions != nil { + tt.backupAssertions(t, backupProvider.BucketObjs) + } + if tt.configMapAssertions != nil { vars := corev1.ConfigMap{} require.NoError(t, r.Get(context.TODO(), types.NamespacedName{Namespace: tt.workspace.Namespace, Name: tt.workspace.BuiltinsConfigMapName()}, &vars)) @@ -335,10 +334,6 @@ func TestReconcileWorkspace(t *testing.T) { tt.pvcAssertions(t, &cache) } - if tt.storageAssertions != nil { - tt.storageAssertions(t, r.StorageClient) - } - // RBAC resources should always have been created so check them // unless explicitly told not to if !tt.disableRBACAssertions { diff --git a/pkg/testobj/k8s.go b/pkg/testobj/k8s.go index b2ef3e5e..13fb6007 100644 --- a/pkg/testobj/k8s.go +++ b/pkg/testobj/k8s.go @@ -63,9 +63,9 @@ func WithDeleteTimestamp() func(*v1alpha1.Workspace) { } } -func WithBackupBucket(bucket string) func(*v1alpha1.Workspace) { +func WithEphemeral() func(*v1alpha1.Workspace) { return func(ws *v1alpha1.Workspace) { - ws.Spec.BackupBucket = bucket + ws.Spec.Ephemeral = true } } diff --git a/test/e2e/backup_restore_test.go b/test/e2e/backup_restore_test.go index b7285396..7d5300e6 100644 --- a/test/e2e/backup_restore_test.go +++ b/test/e2e/backup_restore_test.go @@ -36,7 +36,6 @@ func TestBackupRestore(t *testing.T) { "--path", path, "--context", *kubectx, "--variables", "suffix=bar", - "--backup-bucket", *backupBucket, }, []expect.Batcher{ &expect.BExp{R: fmt.Sprintf("Created workspace %s/foo", namespace)}, @@ -75,7 +74,7 @@ func TestBackupRestore(t *testing.T) { // Check state backup exists t.Run("state backup", func(t *testing.T) { - _, err := sclient.Bucket(*backupBucket).Object(fmt.Sprintf("%s/foo.yaml", namespace)).Attrs(context.Background()) + _, err := sclient.Bucket(*backupBucket).Object(fmt.Sprintf("%s/tfstate-default-foo.yaml", namespace)).Attrs(context.Background()) require.NoError(t, err) }) @@ -98,7 +97,6 @@ func TestBackupRestore(t *testing.T) { "--path", path, "--context", *kubectx, "--variables", "suffix=bar", - "--backup-bucket", *backupBucket, }, []expect.Batcher{ &expect.BExp{R: fmt.Sprintf("Created workspace %s/foo", namespace)}, diff --git a/test/e2e/terraform_test.go b/test/e2e/terraform_test.go index 37de97c2..555665cb 100644 --- a/test/e2e/terraform_test.go +++ b/test/e2e/terraform_test.go @@ -34,6 +34,7 @@ func TestTerraform(t *testing.T) { "--namespace", namespace, "--path", path, "--context", *kubectx, + "--ephemeral", "--variables", "suffix=bar", }, []expect.Batcher{ From 3218fc403c9dd86be23422fbf112b7b49ef77361 Mon Sep 17 00:00:00 2001 From: Louis Garman Date: Wed, 3 Feb 2021 12:31:26 +0000 Subject: [PATCH 2/2] Send warning event on backup failure --- pkg/controllers/workspace_controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controllers/workspace_controller.go b/pkg/controllers/workspace_controller.go index dd4c16f2..8a66b06b 100644 --- a/pkg/controllers/workspace_controller.go +++ b/pkg/controllers/workspace_controller.go @@ -308,7 +308,7 @@ func (r *WorkspaceReconciler) manageState(ctx context.Context, ws *v1alpha1.Work // Backup if current backup serial doesn't match serial of state if ws.Status.BackupSerial == nil || *ws.Status.BackupSerial != state.Serial { if err := r.BackupProvider.Backup(ctx, &secret); err != nil { - return nil, err + return r.sendWarningEvent(err, ws, "BackupError") } ws.Status.BackupSerial = &state.Serial @@ -369,7 +369,7 @@ func (r *WorkspaceReconciler) restore(ctx context.Context, ws *v1alpha1.Workspac secretKey := types.NamespacedName{Namespace: ws.Namespace, Name: ws.StateSecretName()} secret, err := r.BackupProvider.Restore(ctx, secretKey) if err != nil { - return nil, err + return r.sendWarningEvent(err, ws, "RestoreError") } if secret == nil { r.recorder.Eventf(ws, "Normal", "RestoreSkipped", "There is no state to restore")