Skip to content

Commit

Permalink
Refactor backup controller with controller-runtime.
Browse files Browse the repository at this point in the history
Signed-off-by: Ming <mqiu@vmware.com>
Signed-off-by: Xun Jiang <blackpiglet@gmail.com>
  • Loading branch information
qiuming-best authored and Xun Jiang committed Mar 1, 2023
1 parent 27bbbec commit 02fa8c3
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 268 deletions.
133 changes: 91 additions & 42 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ import (
snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
snapshotv1client "github.com/kubernetes-csi/external-snapshotter/client/v4/clientset/versioned"
snapshotv1informers "github.com/kubernetes-csi/external-snapshotter/client/v4/informers/externalversions"
snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
corev1api "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
kubeerrs "k8s.io/apimachinery/pkg/util/errors"
Expand Down Expand Up @@ -63,7 +65,6 @@ import (
velerodiscovery "github.com/vmware-tanzu/velero/pkg/discovery"
"github.com/vmware-tanzu/velero/pkg/features"
clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned"
informers "github.com/vmware-tanzu/velero/pkg/generated/informers/externalversions"
"github.com/vmware-tanzu/velero/pkg/metrics"
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/persistence"
Expand Down Expand Up @@ -233,7 +234,6 @@ type server struct {
discoveryClient discovery.DiscoveryInterface
discoveryHelper velerodiscovery.Helper
dynamicClient dynamic.Interface
sharedInformerFactory informers.SharedInformerFactory
csiSnapshotterSharedInformerFactory *CSIInformerFactoryWrapper
csiSnapshotClient *snapshotv1client.Clientset
ctx context.Context
Expand Down Expand Up @@ -348,7 +348,6 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s
veleroClient: veleroClient,
discoveryClient: veleroClient.Discovery(),
dynamicClient: dynamicClient,
sharedInformerFactory: informers.NewSharedInformerFactoryWithOptions(veleroClient, 0, informers.WithNamespace(f.Namespace())),
csiSnapshotterSharedInformerFactory: NewCSIInformerFactoryWrapper(csiSnapClient),
csiSnapshotClient: csiSnapClient,
ctx: ctx,
Expand Down Expand Up @@ -479,32 +478,32 @@ func (s *server) veleroResourcesExist() error {
}

// High priorities:
// - Custom Resource Definitions come before Custom Resource so that they can be
// restored with their corresponding CRD.
// - Namespaces go second because all namespaced resources depend on them.
// - Storage Classes are needed to create PVs and PVCs correctly.
// - VolumeSnapshotClasses are needed to provision volumes using volumesnapshots
// - VolumeSnapshotContents are needed as they contain the handle to the volume snapshot in the
// storage provider
// - VolumeSnapshots are needed to create PVCs using the VolumeSnapshot as their data source.
// - PVs go before PVCs because PVCs depend on them.
// - PVCs go before pods or controllers so they can be mounted as volumes.
// - Service accounts go before secrets so service account token secrets can be filled automatically.
// - Secrets and config maps go before pods or controllers so they can be mounted
// as volumes.
// - Limit ranges go before pods or controllers so pods can use them.
// - Pods go before controllers so they can be explicitly restored and potentially
// have pod volume restores run before controllers adopt the pods.
// - Replica sets go before deployments/other controllers so they can be explicitly
// restored and be adopted by controllers.
// - CAPI ClusterClasses go before Clusters.
// - Custom Resource Definitions come before Custom Resource so that they can be
// restored with their corresponding CRD.
// - Namespaces go second because all namespaced resources depend on them.
// - Storage Classes are needed to create PVs and PVCs correctly.
// - VolumeSnapshotClasses are needed to provision volumes using volumesnapshots
// - VolumeSnapshotContents are needed as they contain the handle to the volume snapshot in the
// storage provider
// - VolumeSnapshots are needed to create PVCs using the VolumeSnapshot as their data source.
// - PVs go before PVCs because PVCs depend on them.
// - PVCs go before pods or controllers so they can be mounted as volumes.
// - Service accounts go before secrets so service account token secrets can be filled automatically.
// - Secrets and config maps go before pods or controllers so they can be mounted
// as volumes.
// - Limit ranges go before pods or controllers so pods can use them.
// - Pods go before controllers so they can be explicitly restored and potentially
// have pod volume restores run before controllers adopt the pods.
// - Replica sets go before deployments/other controllers so they can be explicitly
// restored and be adopted by controllers.
// - CAPI ClusterClasses go before Clusters.
//
// Low priorities:
// - Tanzu ClusterBootstraps go last as it can reference any other kind of resources.
// ClusterBootstraps go before CAPI Clusters otherwise a new default ClusterBootstrap object is created for the cluster
// - CAPI Clusters come before ClusterResourceSets because failing to do so means the CAPI controller-manager will panic.
// Both Clusters and ClusterResourceSets need to come before ClusterResourceSetBinding in order to properly restore workload clusters.
// See https://github.com/kubernetes-sigs/cluster-api/issues/4105
// - Tanzu ClusterBootstraps go last as it can reference any other kind of resources.
// ClusterBootstraps go before CAPI Clusters otherwise a new default ClusterBootstrap object is created for the cluster
// - CAPI Clusters come before ClusterResourceSets because failing to do so means the CAPI controller-manager will panic.
// Both Clusters and ClusterResourceSets need to come before ClusterResourceSetBinding in order to properly restore workload clusters.
// See https://github.com/kubernetes-sigs/cluster-api/issues/4105
var defaultRestorePriorities = restore.Priorities{
HighPriorities: []string{
"customresourcedefinitions",
Expand Down Expand Up @@ -564,6 +563,32 @@ func (s *server) initRepoManager() error {
return nil
}

func (s *server) getCSIVolumeSnapshotListers() snapshotv1listers.VolumeSnapshotLister {
// Make empty listers that will only be populated if CSI is properly enabled.
var vsLister snapshotv1listers.VolumeSnapshotLister
var err error

// If CSI is enabled, check for the CSI groups and generate the listers
// If CSI isn't enabled, return empty listers.
if features.IsEnabled(velerov1api.CSIFeatureFlag) {
_, err = s.discoveryClient.ServerResourcesForGroupVersion(snapshotv1api.SchemeGroupVersion.String())
switch {
case apierrors.IsNotFound(err):
// CSI is enabled, but the required CRDs aren't installed, so halt.
s.logger.Fatalf("The '%s' feature flag was specified, but CSI API group [%s] was not found.", velerov1api.CSIFeatureFlag, snapshotv1api.SchemeGroupVersion.String())
case err == nil:
// CSI is enabled, and the resources were found.
// Instantiate the listers fully
s.logger.Debug("Creating CSI listers")
// Access the wrapped factory directly here since we've already done the feature flag check above to know it's safe.
vsLister = s.csiSnapshotterSharedInformerFactory.factory.Snapshot().V1().VolumeSnapshots().Lister()
case err != nil:
cmd.CheckError(err)
}
}
return vsLister
}

func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string) error {
s.logger.Info("Starting controllers")

Expand Down Expand Up @@ -624,19 +649,12 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
}

// start the informers & and wait for the caches to sync
s.sharedInformerFactory.Start(ctx.Done())
s.csiSnapshotterSharedInformerFactory.Start(ctx.Done())
s.logger.Info("Waiting for informer caches to sync")
cacheSyncResults := s.sharedInformerFactory.WaitForCacheSync(ctx.Done())
csiCacheSyncResults := s.csiSnapshotterSharedInformerFactory.WaitForCacheSync(ctx.Done())
s.logger.Info("Done waiting for informer caches to sync")

// Append our CSI informer types into the larger list of caches, so we can check them all at once
for informer, synced := range csiCacheSyncResults {
cacheSyncResults[informer] = synced
}

for informer, synced := range cacheSyncResults {
if !synced {
return errors.Errorf("cache was not synced for informer %v", informer)
}
Expand All @@ -649,18 +667,42 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
s.discoveryHelper,
client.NewDynamicFactory(s.dynamicClient),
podexec.NewPodCommandExecutor(s.kubeClientConfig, s.kubeClient.CoreV1().RESTClient()),
podvolume.NewBackupperFactory(s.repoLocker, s.repoEnsurer, s.veleroClient, s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(), s.kubeClient.CoreV1(),
s.sharedInformerFactory.Velero().V1().BackupRepositories().Informer().HasSynced, s.logger),
podvolume.NewBackupperFactory(
s.repoLocker,
s.repoEnsurer,
s.veleroClient,
s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(),
s.logger,
),
s.config.podVolumeOperationTimeout,
s.config.defaultVolumesToFsBackup,
s.config.clientPageSize,
s.config.uploaderType,
)
cmd.CheckError(err)
if err := controller.NewBackupReconciler(s.ctx, s.discoveryHelper, backupper, s.logger, s.logLevel, newPluginManager, backupTracker, s.mgr.GetClient(),
s.config.defaultBackupLocation, s.config.defaultVolumesToFsBackup, s.config.defaultBackupTTL, s.config.defaultCSISnapshotTimeout, defaultVolumeSnapshotLocations,
s.metrics, backupStoreGetter, s.config.formatFlag.Parse(), s.credentialFileStore).SetupWithManager(s.mgr); err != nil {
if err := controller.NewBackupReconciler(
s.ctx,
s.discoveryHelper,
backupper,
s.logger,
s.logLevel,
newPluginManager,
backupTracker,
s.mgr.GetClient(),
s.config.defaultBackupLocation,
s.config.defaultVolumesToFsBackup,
s.config.defaultBackupTTL,
s.config.defaultCSISnapshotTimeout,
defaultVolumeSnapshotLocations,
s.metrics,
backupStoreGetter,
s.config.formatFlag.Parse(),
s.getCSIVolumeSnapshotListers(),
s.csiSnapshotClient,
s.credentialFileStore,
).SetupWithManager(s.mgr); err != nil {
s.logger.Fatal(err, "unable to create controller", "controller", controller.Backup)
}
}
Expand Down Expand Up @@ -766,8 +808,15 @@ func (s *server) runControllers(defaultVolumeSnapshotLocations map[string]string
client.NewDynamicFactory(s.dynamicClient),
s.config.restoreResourcePriorities,
s.kubeClient.CoreV1().Namespaces(),
podvolume.NewRestorerFactory(s.repoLocker, s.repoEnsurer, s.veleroClient, s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(), s.kubeClient, s.sharedInformerFactory.Velero().V1().BackupRepositories().Informer().HasSynced, s.logger),
podvolume.NewRestorerFactory(
s.repoLocker,
s.repoEnsurer,
s.veleroClient,
s.kubeClient.CoreV1(),
s.kubeClient.CoreV1(),
s.kubeClient,
s.logger,
),
s.config.podVolumeOperationTimeout,
s.config.resourceTerminatingTimeout,
s.logger,
Expand Down
Loading

0 comments on commit 02fa8c3

Please sign in to comment.