From edb0860dd24733b81f42cc24e30392b4f333b603 Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Wed, 6 Dec 2023 18:27:42 +0800 Subject: [PATCH] Fix issue #7163. Update CSIVolumeSnapshotsCompleted in backup's status and the metric during backup finalize stage according to async operations content. Signed-off-by: Xun Jiang --- changelogs/unreleased/7184-blackpiglet | 2 + pkg/backup/snapshots.go | 23 ++++--- pkg/controller/backup_controller.go | 10 ++- pkg/controller/backup_finalizer_controller.go | 22 ++++++- .../backup_finalizer_controller_test.go | 65 +++++++++++++++++-- pkg/controller/backup_sync_controller_test.go | 4 +- 6 files changed, 105 insertions(+), 21 deletions(-) create mode 100644 changelogs/unreleased/7184-blackpiglet diff --git a/changelogs/unreleased/7184-blackpiglet b/changelogs/unreleased/7184-blackpiglet new file mode 100644 index 0000000000..ac5d180119 --- /dev/null +++ b/changelogs/unreleased/7184-blackpiglet @@ -0,0 +1,2 @@ +Update CSIVolumeSnapshotsCompleted in backup's status and the metric +during backup finalize stage according to async operations content. \ No newline at end of file diff --git a/pkg/backup/snapshots.go b/pkg/backup/snapshots.go index fdfd22cf95..a250833cfd 100644 --- a/pkg/backup/snapshots.go +++ b/pkg/backup/snapshots.go @@ -30,9 +30,18 @@ import ( "github.com/vmware-tanzu/velero/pkg/util/boolptr" ) -// Common function to update the status of CSI snapshots -// returns VolumeSnapshot, VolumeSnapshotContent, VolumeSnapshotClasses referenced -func UpdateBackupCSISnapshotsStatus(client kbclient.Client, globalCRClient kbclient.Client, backup *velerov1api.Backup, backupLog logrus.FieldLogger) (volumeSnapshots []snapshotv1api.VolumeSnapshot, volumeSnapshotContents []snapshotv1api.VolumeSnapshotContent, volumeSnapshotClasses []snapshotv1api.VolumeSnapshotClass) { +// GetBackupCSIResources is used to get CSI snapshot related resources. +// Returns VolumeSnapshot, VolumeSnapshotContent, VolumeSnapshotClasses referenced +func GetBackupCSIResources( + client kbclient.Client, + globalCRClient kbclient.Client, + backup *velerov1api.Backup, + backupLog logrus.FieldLogger, +) ( + volumeSnapshots []snapshotv1api.VolumeSnapshot, + volumeSnapshotContents []snapshotv1api.VolumeSnapshotContent, + volumeSnapshotClasses []snapshotv1api.VolumeSnapshotClass, +) { if boolptr.IsSetToTrue(backup.Spec.SnapshotMoveData) { backupLog.Info("backup SnapshotMoveData is set to true, skip VolumeSnapshot resource persistence.") } else if features.IsEnabled(velerov1api.CSIFeatureFlag) { @@ -69,13 +78,7 @@ func UpdateBackupCSISnapshotsStatus(client kbclient.Client, globalCRClient kbcli } } backup.Status.CSIVolumeSnapshotsAttempted = len(volumeSnapshots) - csiVolumeSnapshotsCompleted := 0 - for _, vs := range volumeSnapshots { - if vs.Status != nil && boolptr.IsSetToTrue(vs.Status.ReadyToUse) { - csiVolumeSnapshotsCompleted++ - } - } - backup.Status.CSIVolumeSnapshotsCompleted = csiVolumeSnapshotsCompleted } + return volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses } diff --git a/pkg/controller/backup_controller.go b/pkg/controller/backup_controller.go index b9e331b3b3..eef7ab9449 100644 --- a/pkg/controller/backup_controller.go +++ b/pkg/controller/backup_controller.go @@ -664,7 +664,9 @@ func (b *backupReconciler) runBackup(backup *pkgbackup.Request) error { backup.Status.VolumeSnapshotsCompleted++ } } - volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses := pkgbackup.UpdateBackupCSISnapshotsStatus(b.kbClient, b.globalCRClient, backup.Backup, backupLog) + volumeSnapshots, volumeSnapshotContents, volumeSnapshotClasses := pkgbackup.GetBackupCSIResources(b.kbClient, b.globalCRClient, backup.Backup, backupLog) + // Update CSIVolumeSnapshotsAttempted + backup.Status.CSIVolumeSnapshotsAttempted = len(volumeSnapshots) // Iterate over backup item operations and update progress. // Any errors on operations at this point should be added to backup errors. @@ -763,6 +765,7 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac backupDurationSeconds := float64(backupDuration / time.Second) serverMetrics.RegisterBackupDuration(backupScheduleName, backupDurationSeconds) } + if !finalize { serverMetrics.RegisterVolumeSnapshotAttempts(backupScheduleName, backup.Status.VolumeSnapshotsAttempted) serverMetrics.RegisterVolumeSnapshotSuccesses(backupScheduleName, backup.Status.VolumeSnapshotsCompleted) @@ -770,8 +773,6 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac if features.IsEnabled(velerov1api.CSIFeatureFlag) { serverMetrics.RegisterCSISnapshotAttempts(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted) - serverMetrics.RegisterCSISnapshotSuccesses(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsCompleted) - serverMetrics.RegisterCSISnapshotFailures(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted-backup.Status.CSIVolumeSnapshotsCompleted) } if backup.Status.Progress != nil { @@ -782,6 +783,9 @@ func recordBackupMetrics(log logrus.FieldLogger, backup *velerov1api.Backup, bac if backup.Status.Warnings > 0 { serverMetrics.RegisterBackupWarning(backupScheduleName) } + } else if features.IsEnabled(velerov1api.CSIFeatureFlag) { + serverMetrics.RegisterCSISnapshotSuccesses(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsCompleted) + serverMetrics.RegisterCSISnapshotFailures(backupScheduleName, backup.Name, backup.Status.CSIVolumeSnapshotsAttempted-backup.Status.CSIVolumeSnapshotsCompleted) } } diff --git a/pkg/controller/backup_finalizer_controller.go b/pkg/controller/backup_finalizer_controller.go index ea9c0364b2..c8fc93358d 100644 --- a/pkg/controller/backup_finalizer_controller.go +++ b/pkg/controller/backup_finalizer_controller.go @@ -31,6 +31,8 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" pkgbackup "github.com/vmware-tanzu/velero/pkg/backup" + "github.com/vmware-tanzu/velero/pkg/itemoperation" + "github.com/vmware-tanzu/velero/pkg/kuberesource" "github.com/vmware-tanzu/velero/pkg/metrics" "github.com/vmware-tanzu/velero/pkg/persistence" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" @@ -187,10 +189,12 @@ func (r *backupFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Requ r.metrics.RegisterBackupPartialFailure(backupScheduleName) r.metrics.RegisterBackupLastStatus(backupScheduleName, metrics.BackupLastStatusFailure) } + backup.Status.CompletionTimestamp = &metav1.Time{Time: r.clock.Now()} + backup.Status.CSIVolumeSnapshotsCompleted = updateCSIVolumeSnapshotsCompleted(operations) + recordBackupMetrics(log, backup, outBackupFile, r.metrics, true) - pkgbackup.UpdateBackupCSISnapshotsStatus(r.client, r.globalCRClient, backup, log) // update backup metadata in object store backupJSON := new(bytes.Buffer) if err := encode.To(backup, "json", backupJSON); err != nil { @@ -214,3 +218,19 @@ func (r *backupFinalizerReconciler) SetupWithManager(mgr ctrl.Manager) error { For(&velerov1api.Backup{}). Complete(r) } + +// updateCSIVolumeSnapshotsCompleted calculate the completed VS number according to +// the backup's async operation list. +func updateCSIVolumeSnapshotsCompleted( + operations []*itemoperation.BackupOperation) int { + completedNum := 0 + + for index := range operations { + if operations[index].Spec.ResourceIdentifier.String() == kuberesource.VolumeSnapshots.String() && + operations[index].Status.Phase == itemoperation.OperationPhaseCompleted { + completedNum++ + } + } + + return completedNum +} diff --git a/pkg/controller/backup_finalizer_controller_test.go b/pkg/controller/backup_finalizer_controller_test.go index 74f6da57c5..e2f07b3e32 100644 --- a/pkg/controller/backup_finalizer_controller_test.go +++ b/pkg/controller/backup_finalizer_controller_test.go @@ -36,6 +36,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + "github.com/vmware-tanzu/velero/pkg/features" "github.com/vmware-tanzu/velero/pkg/itemoperation" "github.com/vmware-tanzu/velero/pkg/kuberesource" "github.com/vmware-tanzu/velero/pkg/metrics" @@ -66,12 +67,14 @@ func TestBackupFinalizerReconcile(t *testing.T) { defaultBackupLocation := builder.ForBackupStorageLocation(velerov1api.DefaultNamespace, "default").Result() tests := []struct { - name string - backup *velerov1api.Backup - backupOperations []*itemoperation.BackupOperation - backupLocation *velerov1api.BackupStorageLocation - expectError bool - expectPhase velerov1api.BackupPhase + name string + backup *velerov1api.Backup + backupOperations []*itemoperation.BackupOperation + backupLocation *velerov1api.BackupStorageLocation + enableCSI bool + expectError bool + expectPhase velerov1api.BackupPhase + expectedCompletedVS int }{ { name: "Finalizing backup is completed", @@ -145,6 +148,50 @@ func TestBackupFinalizerReconcile(t *testing.T) { }, }, }, + { + name: "Test calculate backup.Status.BackupItemOperationsCompleted", + backup: builder.ForBackup(velerov1api.DefaultNamespace, "backup-3"). + StorageLocation("default"). + ObjectMeta(builder.WithUID("foo")). + StartTimestamp(fakeClock.Now()). + WithStatus(velerov1api.BackupStatus{ + StartTimestamp: &metav1Now, + CompletionTimestamp: &metav1Now, + CSIVolumeSnapshotsAttempted: 1, + Phase: velerov1api.BackupPhaseFinalizing, + }). + Result(), + backupLocation: defaultBackupLocation, + enableCSI: true, + expectPhase: velerov1api.BackupPhaseCompleted, + expectedCompletedVS: 1, + backupOperations: []*itemoperation.BackupOperation{ + { + Spec: itemoperation.BackupOperationSpec{ + BackupName: "backup-3", + BackupUID: "foo", + BackupItemAction: "foo", + ResourceIdentifier: velero.ResourceIdentifier{ + GroupResource: kuberesource.VolumeSnapshots, + Namespace: "ns-1", + Name: "vs-1", + }, + PostOperationItems: []velero.ResourceIdentifier{ + { + GroupResource: kuberesource.Secrets, + Namespace: "ns-1", + Name: "secret-1", + }, + }, + OperationID: "operation-3", + }, + Status: itemoperation.OperationStatus{ + Phase: itemoperation.OperationPhaseCompleted, + Created: &metav1Now, + }, + }, + }, + }, } for _, test := range tests { @@ -160,6 +207,11 @@ func TestBackupFinalizerReconcile(t *testing.T) { initObjs = append(initObjs, test.backupLocation) } + if test.enableCSI { + features.Enable(velerov1api.CSIFeatureFlag) + defer features.Enable() + } + fakeClient := velerotest.NewFakeControllerRuntimeClient(t, initObjs...) fakeGlobalClient := velerotest.NewFakeControllerRuntimeClient(t, initObjs...) @@ -184,6 +236,7 @@ func TestBackupFinalizerReconcile(t *testing.T) { require.NoError(t, err) assert.Equal(t, test.expectPhase, backupAfter.Status.Phase) + assert.Equal(t, test.expectedCompletedVS, backupAfter.Status.CSIVolumeSnapshotsCompleted) }) } } diff --git a/pkg/controller/backup_sync_controller_test.go b/pkg/controller/backup_sync_controller_test.go index 1a7857ec12..5858008ae1 100644 --- a/pkg/controller/backup_sync_controller_test.go +++ b/pkg/controller/backup_sync_controller_test.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" "github.com/sirupsen/logrus" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -33,7 +34,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation" testclocks "k8s.io/utils/clock/testing" - ctrl "sigs.k8s.io/controller-runtime" ctrlClient "sigs.k8s.io/controller-runtime/pkg/client" ctrlfake "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -430,6 +430,8 @@ var _ = Describe("Backup Sync Reconciler", func() { backupStore.On("GetBackupMetadata", backup.backup.Name).Return(backup.backup, nil) backupStore.On("GetPodVolumeBackups", backup.backup.Name).Return(backup.podVolumeBackups, nil) backupStore.On("BackupExists", "bucket-1", backup.backup.Name).Return(true, nil) + backupStore.On("GetCSIVolumeSnapshotClasses", backup.backup.Name).Return([]*snapshotv1api.VolumeSnapshotClass{}, nil) + backupStore.On("GetCSIVolumeSnapshotContents", backup.backup.Name).Return([]*snapshotv1api.VolumeSnapshotContent{}, nil) } backupStore.On("ListBackups").Return(backupNames, nil) }