Organize Resource Deployment with Tiers

ClusterProfile/Profile instances let deploy add-ons and applications (Helm charts or Kubernetes resources) across a set of managed clusters. Sometimes there might be a need to tweak deployments for specific clusters (a subset of the original group) within that group. Previously, creating a new ClusterProfile/Profile targeting a subset of clusters with resources already managed by another profile resulted in conflicts. Sveltos wouldn't allow deployment for those resources. The concept of ```tier``` is introduced to manage deployment priority for resources targeted by multiple configurations. How it works: 1. Each ClusterProfile/Profile has a new property called __tier__. 2. This tier value controls the deployment order for resources targeting the same cluster element (e.g., a Kubernetes object or Helm chart). 3. By default, the first configuration to reach the cluster "wins" and deploys the resource. 4. Tiers override this behavior. When conflicts occur, the configuration with the lowest tier value takes precedence and deploys the resource. Higher tier values represent lower priority. 5. The default tier value is 100. Benefits: 1. Finer control over resource deployment: Tiers allow you to fine-tune deployments within your cluster, especially when multiple configurations manage the same resources. 2. Conflict resolution: Tiers ensure predictable outcomes when multiple configurations target the same resource. The configuration with the most critical deployment (lowest tier) takes priority. Fixes #305
projectsveltos · May 14, 2024 · cae4728 · cae4728
1 parent 5fc4f75
commit cae4728
Show file tree

Hide file tree

Showing 31 changed files with 1,044 additions and 146 deletions.
diff --git a/Makefile b/Makefile
@@ -205,7 +205,7 @@ kind-test: test create-cluster fv ## Build docker image; start kind cluster; loa
 
 .PHONY: fv
 fv: $(KUBECTL) $(GINKGO) ## Run Sveltos Controller tests using existing cluster
- cd test/fv; $(GINKGO) -nodes $(NUM_NODES) --label-filter='FV' --v --trace --randomize-all
+ cd test/fv; $(GINKGO) -nodes $(NUM_NODES) --label-filter='FV1' --v --trace --randomize-all
 
 .PHONY: fv-sharding
 fv-sharding: $(KUBECTL) $(GINKGO) ## Run Sveltos Controller tests using existing cluster

diff --git a/api/v1alpha1/clustersummary_types.go b/api/v1alpha1/clustersummary_types.go
@@ -21,6 +21,7 @@ import (
  "fmt"
 
  "github.com/pkg/errors"
+ corev1 "k8s.io/api/core/v1"
  apierrors "k8s.io/apimachinery/pkg/api/errors"
  metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  "k8s.io/apimachinery/pkg/runtime/schema"
@@ -114,18 +115,29 @@ type FeatureSummary struct {
  LastAppliedTime *metav1.Time `json:"lastAppliedTime,omitempty"`
 }
 
+// ConflictSummary contains a summary of conflicts with other profiles
+// per cluster feature.
+type ConflictSummary struct {
+ // FeatureID is an indentifier of the feature whose status is reported
+ FeatureID FeatureID `json:"featureID"`
+
+ // ConflictingProfiles is the list of Sveltos profiles currently
+ // conflicting with this clusterSummary instance
+ ConflictingProfiles []corev1.ObjectReference `json:"conflictingProfiles"`
+}
+
 // HelChartStatus specifies whether ClusterSummary is successfully managing
 // an helm chart or not
 // +kubebuilder:validation:Enum:=Managing;Conflict
 type HelmChartStatus string
 
 const (
  // HelChartStatusManaging indicates helm chart is successfully being managed
- HelChartStatusManaging = HelmChartStatus("Managing")
+ HelmChartStatusManaging = HelmChartStatus("Managing")
 
  // HelChartStatusConflict indicates there is a conflict with another
  // ClusterSummary to manage the helm chart
- HelChartStatusConflict = HelmChartStatus("Conflict")
+ HelmChartStatusConflict = HelmChartStatus("Conflict")
 )
 
 type HelmChartSummary struct {
@@ -245,15 +257,15 @@ func GetProfileOwnerReference(clusterSummary *ClusterSummary) (*metav1.OwnerRefe
  return nil, fmt.Errorf("(Cluster)Profile owner not found")
 }
 
-// GetProfileOwner returns the (Cluster)Profile owning this clusterSummary.
+// GetProfileOwnerAndTier returns the (Cluster)Profile owning this clusterSummary and its tier.
 // Returns nil if (Cluster)Profile does not exist anymore.
-func GetProfileOwner(ctx context.Context, c client.Client, clusterSummary *ClusterSummary,
-) (client.Object, error) {
+func GetProfileOwnerAndTier(ctx context.Context, c client.Client, clusterSummary *ClusterSummary,
+) (client.Object, int32, error) {
 
  for _, ref := range clusterSummary.OwnerReferences {
  gv, err := schema.ParseGroupVersion(ref.APIVersion)
  if err != nil {
- return nil, errors.WithStack(err)
+ return nil, 0, errors.WithStack(err)
  }
  if gv.Group != GroupVersion.Group {
  continue
@@ -264,24 +276,24 @@ func GetProfileOwner(ctx context.Context, c client.Client, clusterSummary *Clust
  err := c.Get(ctx, types.NamespacedName{Name: ref.Name}, clusterProfile)
  if err != nil {
  if apierrors.IsNotFound(err) {
- return nil, nil
+ return nil, 0, nil
  }
- return nil, err
+ return nil, 0, err
  }
- return clusterProfile, nil
+ return clusterProfile, clusterProfile.Spec.Tier, nil
  } else if ref.Kind == ProfileKind {
  profile := &Profile{}
  err := c.Get(ctx,
  types.NamespacedName{Namespace: clusterSummary.Namespace, Name: ref.Name},
  profile)
  if err != nil {
  if apierrors.IsNotFound(err) {
- return nil, nil
+ return nil, 0, nil
  }
- return nil, err
+ return nil, 0, err
  }
- return profile, nil
+ return profile, profile.Spec.Tier, nil
  }
  }
- return nil, nil
+ return nil, 0, nil
 }
diff --git a/api/v1alpha1/spec.go b/api/v1alpha1/spec.go
@@ -514,6 +514,28 @@ type Spec struct {
  // +optional
  SyncMode SyncMode `json:"syncMode,omitempty"`
 
+ // Tier controls the order of deployment for ClusterProfile or Profile resources targeting
+ // the same cluster resources.
+ // Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes
+ // resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it.
+ // Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest**
+ // Tier value takes priority and deploys the resource.
+ // Higher Tier values represent lower priority. The default Tier value is 100.
+ // Using Tiers provides finer control over resource deployment within your cluster, particularly useful
+ // when multiple configurations manage the same resources.
+ // +kubebuilder:default:=100
+ // +kubebuilder:validation:Minimum=1
+ // +optional
+ Tier int32 `json:"tier,omitempty"`
+
+ // By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after
+ // encountering the first conflict (e.g., another ClusterProfile already deployed the resource).
+ // If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even
+ // if conflicts are detected for previous resources.
+ // +kubebuilder:default:=false
+ // +optional
+ ContinueOnConflict bool `json:"continueOnConflict,omitempty"`
+
  // The maximum number of clusters that can be updated concurrently.
  // Value can be an absolute number (ex: 5) or a percentage of desired cluster (ex: 10%).
  // Defaults to 100%.

diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/cmd/main.go b/cmd/main.go
@@ -79,6 +79,7 @@ var (
  restConfigBurst int
  webhookPort int
  syncPeriod time.Duration
+ conflictRetryTime time.Duration
  version string
  healthAddr string
  profilerAddress string
@@ -227,6 +228,11 @@ func initFlags(fs *pflag.FlagSet) {
  fs.DurationVar(&syncPeriod, "sync-period", defaultSyncPeriod*time.Minute,
  fmt.Sprintf("The minimum interval at which watched resources are reconciled (e.g. 15m). Default: %d minutes",
  defaultSyncPeriod))
+
+ const defaultConflictRetryTime = 30
+ fs.DurationVar(&conflictRetryTime, "conflict-retry-time", defaultConflictRetryTime*time.Second,
+ fmt.Sprintf("The minimum interval at which watched ClusterProfile with conflicts are retried. Defaul: %d seconds",
+ defaultConflictRetryTime))
 }
 
 func setupIndexes(ctx context.Context, mgr ctrl.Manager) {
@@ -420,6 +426,7 @@ func getClusterSummaryReconciler(ctx context.Context, mgr manager.Manager) *cont
  ReferenceMap: make(map[corev1.ObjectReference]*libsveltosset.Set),
  PolicyMux: sync.Mutex{},
  ConcurrentReconciles: concurrentReconciles,
+ ConflictRetryTime: conflictRetryTime,
  Logger: ctrl.Log.WithName("clustersummaryreconciler"),
  }
 }

diff --git a/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml b/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml
@@ -105,6 +105,14 @@ spec:
  clusterSelector:
  description: ClusterSelector identifies clusters to associate to.
  type: string
+ continueOnConflict:
+ default: false
+ description: |-
+ By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after
+ encountering the first conflict (e.g., another ClusterProfile already deployed the resource).
+ If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even
+ if conflicts are detected for previous resources.
+ type: boolean
  dependsOn:
  description: |-
  DependsOn specifies a list of other ClusterProfiles that this instance depends on.
@@ -663,6 +671,21 @@ spec:
  - resource
  type: object
  type: array
+ tier:
+ default: 100
+ description: |-
+ Tier controls the order of deployment for ClusterProfile or Profile resources targeting
+ the same cluster resources.
+ Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes
+ resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it.
+ Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest**
+ Tier value takes priority and deploys the resource.
+ Higher Tier values represent lower priority. The default Tier value is 100.
+ Using Tiers provides finer control over resource deployment within your cluster, particularly useful
+ when multiple configurations manage the same resources.
+ format: int32
+ minimum: 1
+ type: integer
  validateHealths:
  description: |-
  ValidateHealths is a slice of Lua functions to run against

diff --git a/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml b/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml
@@ -121,6 +121,14 @@ spec:
  description: ClusterSelector identifies clusters to associate
  to.
  type: string
+ continueOnConflict:
+ default: false
+ description: |-
+ By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after
+ encountering the first conflict (e.g., another ClusterProfile already deployed the resource).
+ If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even
+ if conflicts are detected for previous resources.
+ type: boolean
  dependsOn:
  description: |-
  DependsOn specifies a list of other ClusterProfiles that this instance depends on.
@@ -681,6 +689,21 @@ spec:
  - resource
  type: object
  type: array
+ tier:
+ default: 100
+ description: |-
+ Tier controls the order of deployment for ClusterProfile or Profile resources targeting
+ the same cluster resources.
+ Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes
+ resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it.
+ Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest**
+ Tier value takes priority and deploys the resource.
+ Higher Tier values represent lower priority. The default Tier value is 100.
+ Using Tiers provides finer control over resource deployment within your cluster, particularly useful
+ when multiple configurations manage the same resources.
+ format: int32
+ minimum: 1
+ type: integer
  validateHealths:
  description: |-
  ValidateHealths is a slice of Lua functions to run against

diff --git a/config/crd/bases/config.projectsveltos.io_profiles.yaml b/config/crd/bases/config.projectsveltos.io_profiles.yaml
@@ -105,6 +105,14 @@ spec:
  clusterSelector:
  description: ClusterSelector identifies clusters to associate to.
  type: string
+ continueOnConflict:
+ default: false
+ description: |-
+ By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after
+ encountering the first conflict (e.g., another ClusterProfile already deployed the resource).
+ If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even
+ if conflicts are detected for previous resources.
+ type: boolean
  dependsOn:
  description: |-
  DependsOn specifies a list of other ClusterProfiles that this instance depends on.
@@ -663,6 +671,21 @@ spec:
  - resource
  type: object
  type: array
+ tier:
+ default: 100
+ description: |-
+ Tier controls the order of deployment for ClusterProfile or Profile resources targeting
+ the same cluster resources.
+ Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes
+ resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it.
+ Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest**
+ Tier value takes priority and deploys the resource.
+ Higher Tier values represent lower priority. The default Tier value is 100.
+ Using Tiers provides finer control over resource deployment within your cluster, particularly useful
+ when multiple configurations manage the same resources.
+ format: int32
+ minimum: 1
+ type: integer
  validateHealths:
  description: |-
  ValidateHealths is a slice of Lua functions to run against