From cae4728bf853bce49da402c4bb377a0042375f24 Mon Sep 17 00:00:00 2001 From: mgianluc Date: Fri, 10 May 2024 18:37:57 +0200 Subject: [PATCH] Organize Resource Deployment with Tiers ClusterProfile/Profile instances let deploy add-ons and applications (Helm charts or Kubernetes resources) across a set of managed clusters. Sometimes there might be a need to tweak deployments for specific clusters (a subset of the original group) within that group. Previously, creating a new ClusterProfile/Profile targeting a subset of clusters with resources already managed by another profile resulted in conflicts. Sveltos wouldn't allow deployment for those resources. The concept of ```tier``` is introduced to manage deployment priority for resources targeted by multiple configurations. How it works: 1. Each ClusterProfile/Profile has a new property called __tier__. 2. This tier value controls the deployment order for resources targeting the same cluster element (e.g., a Kubernetes object or Helm chart). 3. By default, the first configuration to reach the cluster "wins" and deploys the resource. 4. Tiers override this behavior. When conflicts occur, the configuration with the lowest tier value takes precedence and deploys the resource. Higher tier values represent lower priority. 5. The default tier value is 100. Benefits: 1. Finer control over resource deployment: Tiers allow you to fine-tune deployments within your cluster, especially when multiple configurations manage the same resources. 2. Conflict resolution: Tiers ensure predictable outcomes when multiple configurations target the same resource. The configuration with the most critical deployment (lowest tier) takes priority. Fixes #305 --- Makefile | 2 +- api/v1alpha1/clustersummary_types.go | 38 ++-- api/v1alpha1/spec.go | 22 ++ api/v1alpha1/zz_generated.deepcopy.go | 32 ++- cmd/main.go | 7 + ...fig.projectsveltos.io_clusterprofiles.yaml | 23 ++ ...ig.projectsveltos.io_clustersummaries.yaml | 23 ++ .../config.projectsveltos.io_profiles.yaml | 23 ++ controllers/chartmanager/chartmanager.go | 110 ++++++--- controllers/chartmanager/chartmanager_test.go | 44 +++- controllers/clustersummary_controller.go | 22 +- controllers/clustersummary_controller_test.go | 6 +- controllers/conflicts.go | 99 +++++++++ controllers/handlers_helm.go | 190 +++++++++++++--- controllers/handlers_helm_test.go | 21 +- controllers/handlers_kustomize.go | 5 + controllers/handlers_kustomize_test.go | 3 + controllers/handlers_resources.go | 18 +- controllers/handlers_resources_test.go | 3 + controllers/handlers_utils.go | 167 ++++++++++++-- controllers/handlers_utils_test.go | 7 +- controllers/utils.go | 22 ++ controllers/utils_test.go | 4 +- go.mod | 2 +- go.sum | 4 +- manifest/manifest.yaml | 69 ++++++ pkg/scope/clustersummary_test.go | 4 +- test/fv/dryrun_test.go | 6 +- test/fv/missing_reference_test.go | 2 +- test/fv/profile_test.go | 4 +- test/fv/tier_test.go | 208 ++++++++++++++++++ 31 files changed, 1044 insertions(+), 146 deletions(-) create mode 100644 controllers/conflicts.go create mode 100644 test/fv/tier_test.go diff --git a/Makefile b/Makefile index c3754503..94dbdf60 100644 --- a/Makefile +++ b/Makefile @@ -205,7 +205,7 @@ kind-test: test create-cluster fv ## Build docker image; start kind cluster; loa .PHONY: fv fv: $(KUBECTL) $(GINKGO) ## Run Sveltos Controller tests using existing cluster - cd test/fv; $(GINKGO) -nodes $(NUM_NODES) --label-filter='FV' --v --trace --randomize-all + cd test/fv; $(GINKGO) -nodes $(NUM_NODES) --label-filter='FV1' --v --trace --randomize-all .PHONY: fv-sharding fv-sharding: $(KUBECTL) $(GINKGO) ## Run Sveltos Controller tests using existing cluster diff --git a/api/v1alpha1/clustersummary_types.go b/api/v1alpha1/clustersummary_types.go index 27ff9844..c55b8206 100644 --- a/api/v1alpha1/clustersummary_types.go +++ b/api/v1alpha1/clustersummary_types.go @@ -21,6 +21,7 @@ import ( "fmt" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -114,6 +115,17 @@ type FeatureSummary struct { LastAppliedTime *metav1.Time `json:"lastAppliedTime,omitempty"` } +// ConflictSummary contains a summary of conflicts with other profiles +// per cluster feature. +type ConflictSummary struct { + // FeatureID is an indentifier of the feature whose status is reported + FeatureID FeatureID `json:"featureID"` + + // ConflictingProfiles is the list of Sveltos profiles currently + // conflicting with this clusterSummary instance + ConflictingProfiles []corev1.ObjectReference `json:"conflictingProfiles"` +} + // HelChartStatus specifies whether ClusterSummary is successfully managing // an helm chart or not // +kubebuilder:validation:Enum:=Managing;Conflict @@ -121,11 +133,11 @@ type HelmChartStatus string const ( // HelChartStatusManaging indicates helm chart is successfully being managed - HelChartStatusManaging = HelmChartStatus("Managing") + HelmChartStatusManaging = HelmChartStatus("Managing") // HelChartStatusConflict indicates there is a conflict with another // ClusterSummary to manage the helm chart - HelChartStatusConflict = HelmChartStatus("Conflict") + HelmChartStatusConflict = HelmChartStatus("Conflict") ) type HelmChartSummary struct { @@ -245,15 +257,15 @@ func GetProfileOwnerReference(clusterSummary *ClusterSummary) (*metav1.OwnerRefe return nil, fmt.Errorf("(Cluster)Profile owner not found") } -// GetProfileOwner returns the (Cluster)Profile owning this clusterSummary. +// GetProfileOwnerAndTier returns the (Cluster)Profile owning this clusterSummary and its tier. // Returns nil if (Cluster)Profile does not exist anymore. -func GetProfileOwner(ctx context.Context, c client.Client, clusterSummary *ClusterSummary, -) (client.Object, error) { +func GetProfileOwnerAndTier(ctx context.Context, c client.Client, clusterSummary *ClusterSummary, +) (client.Object, int32, error) { for _, ref := range clusterSummary.OwnerReferences { gv, err := schema.ParseGroupVersion(ref.APIVersion) if err != nil { - return nil, errors.WithStack(err) + return nil, 0, errors.WithStack(err) } if gv.Group != GroupVersion.Group { continue @@ -264,11 +276,11 @@ func GetProfileOwner(ctx context.Context, c client.Client, clusterSummary *Clust err := c.Get(ctx, types.NamespacedName{Name: ref.Name}, clusterProfile) if err != nil { if apierrors.IsNotFound(err) { - return nil, nil + return nil, 0, nil } - return nil, err + return nil, 0, err } - return clusterProfile, nil + return clusterProfile, clusterProfile.Spec.Tier, nil } else if ref.Kind == ProfileKind { profile := &Profile{} err := c.Get(ctx, @@ -276,12 +288,12 @@ func GetProfileOwner(ctx context.Context, c client.Client, clusterSummary *Clust profile) if err != nil { if apierrors.IsNotFound(err) { - return nil, nil + return nil, 0, nil } - return nil, err + return nil, 0, err } - return profile, nil + return profile, profile.Spec.Tier, nil } } - return nil, nil + return nil, 0, nil } diff --git a/api/v1alpha1/spec.go b/api/v1alpha1/spec.go index 9744d0fb..69e2ad54 100644 --- a/api/v1alpha1/spec.go +++ b/api/v1alpha1/spec.go @@ -514,6 +514,28 @@ type Spec struct { // +optional SyncMode SyncMode `json:"syncMode,omitempty"` + // Tier controls the order of deployment for ClusterProfile or Profile resources targeting + // the same cluster resources. + // Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + // resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + // Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + // Tier value takes priority and deploys the resource. + // Higher Tier values represent lower priority. The default Tier value is 100. + // Using Tiers provides finer control over resource deployment within your cluster, particularly useful + // when multiple configurations manage the same resources. + // +kubebuilder:default:=100 + // +kubebuilder:validation:Minimum=1 + // +optional + Tier int32 `json:"tier,omitempty"` + + // By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + // encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + // If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + // if conflicts are detected for previous resources. + // +kubebuilder:default:=false + // +optional + ContinueOnConflict bool `json:"continueOnConflict,omitempty"` + // The maximum number of clusters that can be updated concurrently. // Value can be an absolute number (ex: 5) or a percentage of desired cluster (ex: 10%). // Defaults to 100%. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 7bb1b4f3..555b3823 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -22,8 +22,8 @@ package v1alpha1 import ( apiv1alpha1 "github.com/projectsveltos/libsveltos/api/v1alpha1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -442,7 +442,7 @@ func (in *Clusters) DeepCopyInto(out *Clusters) { } if in.Clusters != nil { in, out := &in.Clusters, &out.Clusters - *out = make([]corev1.ObjectReference, len(*in)) + *out = make([]v1.ObjectReference, len(*in)) copy(*out, *in) } } @@ -457,6 +457,26 @@ func (in *Clusters) DeepCopy() *Clusters { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConflictSummary) DeepCopyInto(out *ConflictSummary) { + *out = *in + if in.ConflictingProfiles != nil { + in, out := &in.ConflictingProfiles, &out.ConflictingProfiles + *out = make([]v1.ObjectReference, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConflictSummary. +func (in *ConflictSummary) DeepCopy() *ConflictSummary { + if in == nil { + return nil + } + out := new(ConflictSummary) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DryRunReconciliationError) DeepCopyInto(out *DryRunReconciliationError) { *out = *in @@ -605,7 +625,7 @@ func (in *HelmOptions) DeepCopyInto(out *HelmOptions) { *out = *in if in.Timeout != nil { in, out := &in.Timeout, &out.Timeout - *out = new(v1.Duration) + *out = new(metav1.Duration) **out = **in } if in.Labels != nil { @@ -839,7 +859,7 @@ func (in *Spec) DeepCopyInto(out *Spec) { *out = *in if in.ClusterRefs != nil { in, out := &in.ClusterRefs, &out.ClusterRefs - *out = make([]corev1.ObjectReference, len(*in)) + *out = make([]v1.ObjectReference, len(*in)) copy(*out, *in) } if in.SetRefs != nil { @@ -919,7 +939,7 @@ func (in *Status) DeepCopyInto(out *Status) { *out = *in if in.MatchingClusterRefs != nil { in, out := &in.MatchingClusterRefs, &out.MatchingClusterRefs - *out = make([]corev1.ObjectReference, len(*in)) + *out = make([]v1.ObjectReference, len(*in)) copy(*out, *in) } in.UpdatingClusters.DeepCopyInto(&out.UpdatingClusters) diff --git a/cmd/main.go b/cmd/main.go index a6b59223..3812f655 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -79,6 +79,7 @@ var ( restConfigBurst int webhookPort int syncPeriod time.Duration + conflictRetryTime time.Duration version string healthAddr string profilerAddress string @@ -227,6 +228,11 @@ func initFlags(fs *pflag.FlagSet) { fs.DurationVar(&syncPeriod, "sync-period", defaultSyncPeriod*time.Minute, fmt.Sprintf("The minimum interval at which watched resources are reconciled (e.g. 15m). Default: %d minutes", defaultSyncPeriod)) + + const defaultConflictRetryTime = 30 + fs.DurationVar(&conflictRetryTime, "conflict-retry-time", defaultConflictRetryTime*time.Second, + fmt.Sprintf("The minimum interval at which watched ClusterProfile with conflicts are retried. Defaul: %d seconds", + defaultConflictRetryTime)) } func setupIndexes(ctx context.Context, mgr ctrl.Manager) { @@ -420,6 +426,7 @@ func getClusterSummaryReconciler(ctx context.Context, mgr manager.Manager) *cont ReferenceMap: make(map[corev1.ObjectReference]*libsveltosset.Set), PolicyMux: sync.Mutex{}, ConcurrentReconciles: concurrentReconciles, + ConflictRetryTime: conflictRetryTime, Logger: ctrl.Log.WithName("clustersummaryreconciler"), } } diff --git a/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml b/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml index 6bc9f37d..bb0bef4a 100644 --- a/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml +++ b/config/crd/bases/config.projectsveltos.io_clusterprofiles.yaml @@ -105,6 +105,14 @@ spec: clusterSelector: description: ClusterSelector identifies clusters to associate to. type: string + continueOnConflict: + default: false + description: |- + By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + if conflicts are detected for previous resources. + type: boolean dependsOn: description: |- DependsOn specifies a list of other ClusterProfiles that this instance depends on. @@ -663,6 +671,21 @@ spec: - resource type: object type: array + tier: + default: 100 + description: |- + Tier controls the order of deployment for ClusterProfile or Profile resources targeting + the same cluster resources. + Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + Tier value takes priority and deploys the resource. + Higher Tier values represent lower priority. The default Tier value is 100. + Using Tiers provides finer control over resource deployment within your cluster, particularly useful + when multiple configurations manage the same resources. + format: int32 + minimum: 1 + type: integer validateHealths: description: |- ValidateHealths is a slice of Lua functions to run against diff --git a/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml b/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml index 7390dd30..f03722d4 100644 --- a/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml +++ b/config/crd/bases/config.projectsveltos.io_clustersummaries.yaml @@ -121,6 +121,14 @@ spec: description: ClusterSelector identifies clusters to associate to. type: string + continueOnConflict: + default: false + description: |- + By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + if conflicts are detected for previous resources. + type: boolean dependsOn: description: |- DependsOn specifies a list of other ClusterProfiles that this instance depends on. @@ -681,6 +689,21 @@ spec: - resource type: object type: array + tier: + default: 100 + description: |- + Tier controls the order of deployment for ClusterProfile or Profile resources targeting + the same cluster resources. + Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + Tier value takes priority and deploys the resource. + Higher Tier values represent lower priority. The default Tier value is 100. + Using Tiers provides finer control over resource deployment within your cluster, particularly useful + when multiple configurations manage the same resources. + format: int32 + minimum: 1 + type: integer validateHealths: description: |- ValidateHealths is a slice of Lua functions to run against diff --git a/config/crd/bases/config.projectsveltos.io_profiles.yaml b/config/crd/bases/config.projectsveltos.io_profiles.yaml index d85348ab..7584aaae 100644 --- a/config/crd/bases/config.projectsveltos.io_profiles.yaml +++ b/config/crd/bases/config.projectsveltos.io_profiles.yaml @@ -105,6 +105,14 @@ spec: clusterSelector: description: ClusterSelector identifies clusters to associate to. type: string + continueOnConflict: + default: false + description: |- + By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + if conflicts are detected for previous resources. + type: boolean dependsOn: description: |- DependsOn specifies a list of other ClusterProfiles that this instance depends on. @@ -663,6 +671,21 @@ spec: - resource type: object type: array + tier: + default: 100 + description: |- + Tier controls the order of deployment for ClusterProfile or Profile resources targeting + the same cluster resources. + Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + Tier value takes priority and deploys the resource. + Higher Tier values represent lower priority. The default Tier value is 100. + Using Tiers provides finer control over resource deployment within your cluster, particularly useful + when multiple configurations manage the same resources. + format: int32 + minimum: 1 + type: integer validateHealths: description: |- ValidateHealths is a slice of Lua functions to run against diff --git a/controllers/chartmanager/chartmanager.go b/controllers/chartmanager/chartmanager.go index 0cd64ed7..c9cb7b6b 100644 --- a/controllers/chartmanager/chartmanager.go +++ b/controllers/chartmanager/chartmanager.go @@ -33,25 +33,25 @@ import ( // - match same clusters // - reference same set of Helm Chart(s)/Version(s) // Above is a misconfiguration/conflict that needs to be detected. -// Only one ClusterSummary (there is one ClusterSummary for each pair ClusterProfile/CAPI Cluster) can manage an helm +// Only one ClusterSummary (there is one ClusterSummary for each pair ClusterProfile/managed Cluster) can manage an helm // version. // Following client is used to solve such scenarios. One ClusterSummary will get the manager role for a given helm version -// in a given CAPI Cluster. All other ClusterSummaries will report a conflict that requires admin intervention to be resolved. +// in a given managed Cluster. All other ClusterSummaries will report a conflict that requires admin intervention to be resolved. type instance struct { chartMux sync.Mutex // use a Mutex to update chart maps as ClusterSummary MaxConcurrentReconciles is higher than one - // Multiple ClusterSummaries might match same CAPI Cluster and try to deploy same helm release in the same namespace. + // Multiple ClusterSummaries might match same managed Cluster and try to deploy same helm release in the same namespace. // That needs to be flagged as a misconfiguration. // In order to achieve that following map is used. It contains: - // - per CAPI Cluster (key: clusterNamespace/clusterName) + // - per managed Cluster (key: clusterNamespace/clusterName) // - per Release (key: releaseNamespace/releaseName) - // - list of ClusterSummaries that want to deploy above helm release in CAPI Clustem. - // First ClusterSummary able to add entry for a given CAPI Cluster/Release is allowed to manage that release + // - list of ClusterSummaries that want to deploy above helm release in managed Clustem. + // First ClusterSummary able to add entry for a given managed Cluster/Release is allowed to manage that release // canManageChart answers whether a ClusterSummary can manage an helm feature. // Any other ClusterSummary will report the misconfiguration. // - // When ClusterSummary managing a release in a CAPI Cluster is deleted or stops managing that release, the next + // When ClusterSummary managing a release in a managed Cluster is deleted or stops managing that release, the next // ClusterSummary in line will become the new manager. // That is achieved as part of ClusterSummaryReconciler. Such reconciler watches for ClusterSummary changes, when // a ClusterSummary.Spec.ClusterProfileSpec.HelmCharts changes, requeues all ClusterSummaries currently registered @@ -59,7 +59,7 @@ type instance struct { // // Such map is also used in following scenario to detect helm charts which used to be managed by a ClusterSummary // but are not referenced by ClusterSummary anymore: - // - ClusterProfileSpec is listing an Helm chart to be deployed => this helm release is provisioned in the CAPI Cluster; + // - ClusterProfileSpec is listing an Helm chart to be deployed => this helm release is provisioned in the managed Cluster; // - without asking for helm chart to be released, the Helm chart is removed from the ClusterProfileSpec; // - helm chart needs to be withdrawn if no other ClusterSummary is trying to manage it. perClusterChartMap map[string]map[string][]string @@ -104,9 +104,9 @@ func GetChartManagerInstance(ctx context.Context, c client.Client) (*instance, e } // RegisterClusterSummaryForCharts for all the HelmCharts ClusterSummary currently references, -// regisetrs ClusterSummary as one requestor to manage each one of those helm chart in a given -// CAPI Cluster. -// Only first ClusterSummary registering for a given Helm release in a given CAPI Cluster is given +// registers ClusterSummary as one requestor to manage each one of those helm chart in a given +// managed Cluster. +// Only first ClusterSummary registering for a given Helm release in a given managed Cluster is given // the manager role. func (m *instance) RegisterClusterSummaryForCharts(clusterSummary *configv1alpha1.ClusterSummary) { if len(clusterSummary.Spec.ClusterProfileSpec.HelmCharts) == 0 { @@ -130,6 +130,33 @@ func (m *instance) RegisterClusterSummaryForCharts(clusterSummary *configv1alpha } } +// SetManagerForChart set clusterSummary as current manager for this chart +func (m *instance) SetManagerForChart(clusterSummary *configv1alpha1.ClusterSummary, chart *configv1alpha1.HelmChart) { + clusterKey := m.getClusterKey(clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, + clusterSummary.Spec.ClusterType) + clusterSummaryKey := m.getClusterSummaryKey(clusterSummary.Name) + releaseKey := m.GetReleaseKey(chart.ReleaseNamespace, chart.ReleaseName) + + m.chartMux.Lock() + defer m.chartMux.Unlock() + + if _, ok := m.perClusterChartMap[clusterKey]; !ok { + m.perClusterChartMap[clusterKey] = make(map[string][]string) + } + + if _, ok := m.perClusterChartMap[clusterKey][releaseKey]; !ok { + m.perClusterChartMap[clusterKey][releaseKey] = make([]string, 0) + } + + // if cluster was already register, remove it + m.removeClusterSummaryFromChartRegistration(clusterKey, releaseKey, clusterSummaryKey) + + // Add it back as manager + newValue := []string{clusterSummaryKey} + newValue = append(newValue, m.perClusterChartMap[clusterKey][releaseKey]...) + m.perClusterChartMap[clusterKey][releaseKey] = newValue +} + // UnregisterClusterSummaryForChart unregisters ClusterSummary as possible manager for specified chart func (m *instance) UnregisterClusterSummaryForChart(clusterSummary *configv1alpha1.ClusterSummary, chart *configv1alpha1.HelmChart) { @@ -200,15 +227,21 @@ func (m *instance) cleanRegistrations(clusterSummary *configv1alpha1.ClusterSumm } // If ClusterSummary was previously registered to manage this release, consider this entry stale // and remove it. - for i := range m.perClusterChartMap[clusterKey][releaseKey] { - if m.perClusterChartMap[clusterKey][releaseKey][i] == clusterSummaryKey { - // Order is not important. So move the element at index i with last one in order to avoid moving all elements. - length := len(m.perClusterChartMap[clusterKey][releaseKey]) - m.perClusterChartMap[clusterKey][releaseKey][i] = - m.perClusterChartMap[clusterKey][releaseKey][length-1] - m.perClusterChartMap[clusterKey][releaseKey] = m.perClusterChartMap[clusterKey][releaseKey][:length-1] - break - } + m.removeClusterSummaryFromChartRegistration(clusterKey, releaseKey, clusterSummaryKey) + } +} + +func (m *instance) removeClusterSummaryFromChartRegistration(clusterKey, releaseKey, clusterSummaryKey string) { + // If ClusterSummary was previously registered to manage this release, consider this entry stale + // and remove it. + for i := range m.perClusterChartMap[clusterKey][releaseKey] { + if m.perClusterChartMap[clusterKey][releaseKey][i] == clusterSummaryKey { + // Order is not important. So move the element at index i with last one in order to avoid moving all elements. + length := len(m.perClusterChartMap[clusterKey][releaseKey]) + m.perClusterChartMap[clusterKey][releaseKey][i] = + m.perClusterChartMap[clusterKey][releaseKey][length-1] + m.perClusterChartMap[clusterKey][releaseKey] = m.perClusterChartMap[clusterKey][releaseKey][:length-1] + break } } } @@ -265,6 +298,11 @@ func (m *instance) CanManageChart(clusterSummary *configv1alpha1.ClusterSummary, m.chartMux.Lock() defer m.chartMux.Unlock() + if !m.IsChartManaged(clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, + clusterSummary.Spec.ClusterType, chart) { + + return true + } return m.isCurrentlyManager(clusterKey, releaseKey, clusterSummaryKey) } @@ -295,8 +333,30 @@ func (m *instance) GetManagerForChart(clusterNamespace, clusterName string, return m.perClusterChartMap[clusterKey][releaseKey][0], nil } +// IsChartManaged returns true if a ClusterSummary is managing the chart +func (m *instance) IsChartManaged(clusterNamespace, clusterName string, + clusterType libsveltosv1alpha1.ClusterType, chart *configv1alpha1.HelmChart) bool { + + clusterKey := m.getClusterKey(clusterNamespace, clusterName, clusterType) + releaseKey := m.GetReleaseKey(chart.ReleaseNamespace, chart.ReleaseName) + + if _, ok := m.perClusterChartMap[clusterKey]; !ok { + return false + } + + if _, ok := m.perClusterChartMap[clusterKey][releaseKey]; !ok { + return false + } + + if len(m.perClusterChartMap[clusterKey][releaseKey]) == 0 { + return false + } + + return true +} + // GetRegisteredClusterSummaries returns all ClusterSummary currently registered for at -// at least one helm chart in the provided CAPI cluster +// at least one helm chart in the provided managed cluster func (m *instance) GetRegisteredClusterSummaries(clusterNamespace, clusterName string, clusterType libsveltosv1alpha1.ClusterType) []string { @@ -328,7 +388,7 @@ func (m *instance) GetRegisteredClusterSummaries(clusterNamespace, clusterName s } // isCurrentlyManager returns true if clusterSummaryKey is currently the designed manager -// for helm release releaseKey in the CAPI cluster clusterKey +// for helm release releaseKey in the managed cluster clusterKey func (m *instance) isCurrentlyManager(clusterKey, releaseKey, clusterSummaryKey string) bool { if _, ok := m.perClusterChartMap[clusterKey]; !ok { return false @@ -347,7 +407,7 @@ func (m *instance) isCurrentlyManager(clusterKey, releaseKey, clusterSummaryKey return false } -// getClusterKey returns the Key representing a CAPI Cluster +// getClusterKey returns the Key representing a managed Cluster func (m *instance) getClusterKey(clusterNamespace, clusterName string, clusterType libsveltosv1alpha1.ClusterType) string { prefix := "capi" if clusterType == libsveltosv1alpha1.ClusterTypeSveltos { @@ -459,7 +519,7 @@ func (m *instance) addManagers(clusterSummary *configv1alpha1.ClusterSummary) { for i := range clusterSummary.Status.HelmReleaseSummaries { summary := &clusterSummary.Status.HelmReleaseSummaries[i] - if summary.Status == configv1alpha1.HelChartStatusManaging { + if summary.Status == configv1alpha1.HelmChartStatusManaging { releaseKey := m.GetReleaseKey(summary.ReleaseNamespace, summary.ReleaseName) m.addClusterEntry(clusterKey) m.addReleaseEntry(clusterKey, releaseKey) @@ -477,7 +537,7 @@ func (m *instance) addNonManagers(clusterSummary *configv1alpha1.ClusterSummary) for i := range clusterSummary.Status.HelmReleaseSummaries { summary := &clusterSummary.Status.HelmReleaseSummaries[i] - if summary.Status != configv1alpha1.HelChartStatusManaging { + if summary.Status != configv1alpha1.HelmChartStatusManaging { releaseKey := m.GetReleaseKey(summary.ReleaseNamespace, summary.ReleaseName) m.addClusterEntry(clusterKey) m.addReleaseEntry(clusterKey, releaseKey) diff --git a/controllers/chartmanager/chartmanager_test.go b/controllers/chartmanager/chartmanager_test.go index 8b9a02a1..924e3874 100644 --- a/controllers/chartmanager/chartmanager_test.go +++ b/controllers/chartmanager/chartmanager_test.go @@ -120,7 +120,7 @@ var _ = Describe("Chart manager", func() { Expect(manager.CanManageChart(clusterSummary, chart)).To(BeTrue()) manager.UnregisterClusterSummaryForChart(clusterSummary, chart) - Expect(manager.CanManageChart(clusterSummary, chart)).To(BeFalse()) + Expect(manager.CanManageChart(clusterSummary, chart)).To(BeTrue()) }) It("canManageChart return true only for the first registered ClusterSummary", func() { @@ -147,6 +147,40 @@ var _ = Describe("Chart manager", func() { } }) + It("SetManagerForChart registers ClusterSummary as thr instance that manages a given chart", func() { + manager, err := chartmanager.GetChartManagerInstance(context.TODO(), c) + Expect(err).To(BeNil()) + + manager.RegisterClusterSummaryForCharts(clusterSummary) + + tmpClusterSummary := &configv1alpha1.ClusterSummary{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterSummary.Name + randomString(), + }, + Spec: clusterSummary.Spec, + } + + manager.RegisterClusterSummaryForCharts(tmpClusterSummary) + defer removeSubscriptions(c, tmpClusterSummary) + + for i := range clusterSummary.Spec.ClusterProfileSpec.HelmCharts { + chart := &clusterSummary.Spec.ClusterProfileSpec.HelmCharts[i] + By(fmt.Sprintf("Verifying ClusterSummary %s does not manage helm release %s/%s", + tmpClusterSummary.Name, chart.ReleaseNamespace, chart.ReleaseName)) + Expect(manager.CanManageChart(tmpClusterSummary, chart)).To(BeFalse()) + } + + for i := range clusterSummary.Spec.ClusterProfileSpec.HelmCharts { + chart := &clusterSummary.Spec.ClusterProfileSpec.HelmCharts[i] + By(fmt.Sprintf("Setting ClusterSummary %s as manager for chart %s/%s", + tmpClusterSummary.Name, chart.ReleaseNamespace, chart.ReleaseName)) + manager.SetManagerForChart(clusterSummary, chart) + By(fmt.Sprintf("Verifying ClusterSummary %s can manage helm release %s/%s", + tmpClusterSummary.Name, chart.ReleaseNamespace, chart.ReleaseName)) + Expect(manager.CanManageChart(tmpClusterSummary, chart)).To(BeFalse()) + } + }) + It("removeStaleRegistrations removes registration for helm charts not referenced anymore", func() { manager, err := chartmanager.GetChartManagerInstance(context.TODO(), c) Expect(err).To(BeNil()) @@ -299,12 +333,12 @@ var _ = Describe("Chart manager", func() { { ReleaseName: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[0].ReleaseName, ReleaseNamespace: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[0].ReleaseNamespace, - Status: configv1alpha1.HelChartStatusManaging, + Status: configv1alpha1.HelmChartStatusManaging, }, { ReleaseName: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[1].ReleaseName, ReleaseNamespace: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[1].ReleaseNamespace, - Status: configv1alpha1.HelChartStatusConflict, + Status: configv1alpha1.HelmChartStatusConflict, }, }, } @@ -322,12 +356,12 @@ var _ = Describe("Chart manager", func() { { ReleaseName: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[0].ReleaseName, ReleaseNamespace: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[0].ReleaseNamespace, - Status: configv1alpha1.HelChartStatusConflict, + Status: configv1alpha1.HelmChartStatusConflict, }, { ReleaseName: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[1].ReleaseName, ReleaseNamespace: clusterSummary.Spec.ClusterProfileSpec.HelmCharts[1].ReleaseNamespace, - Status: configv1alpha1.HelChartStatusManaging, + Status: configv1alpha1.HelmChartStatusManaging, }, }, }, diff --git a/controllers/clustersummary_controller.go b/controllers/clustersummary_controller.go index 2e404e42..13ddb19c 100644 --- a/controllers/clustersummary_controller.go +++ b/controllers/clustersummary_controller.go @@ -92,7 +92,8 @@ type ClusterSummaryReconciler struct { ReferenceMap map[corev1.ObjectReference]*libsveltosset.Set // key: Referenced object; value: set of all ClusterSummaries referencing the resource ClusterMap map[corev1.ObjectReference]*libsveltosset.Set // key: Sveltos/Cluster; value: set of all ClusterSummaries for that Cluster - ctrl controller.Controller + ConflictRetryTime time.Duration + ctrl controller.Controller } //+kubebuilder:rbac:groups=config.projectsveltos.io,resources=clustersummaries,verbs=get;list;watch;create;update;patch;delete @@ -132,7 +133,7 @@ func (r *ClusterSummaryReconciler) Reconcile(ctx context.Context, req ctrl.Reque } // Fetch the (Cluster)Profile. - profile, err := configv1alpha1.GetProfileOwner(ctx, r.Client, clusterSummary) + profile, _, err := configv1alpha1.GetProfileOwnerAndTier(ctx, r.Client, clusterSummary) if err != nil { logger.Error(err, "Failed to get owner clusterProfile") return reconcile.Result{}, errors.Wrapf( @@ -346,7 +347,14 @@ func (r *ClusterSummaryReconciler) reconcileNormal( } } - if err := r.deploy(ctx, clusterSummaryScope, logger); err != nil { + err = r.deploy(ctx, clusterSummaryScope, logger) + if err != nil { + var conflictErr *deployer.ConflictError + ok := errors.As(err, &conflictErr) + if ok { + logger.V(logs.LogInfo).Error(err, "failed to deploy because of conflict") + return reconcile.Result{Requeue: true, RequeueAfter: r.ConflictRetryTime}, nil + } logger.V(logs.LogInfo).Error(err, "failed to deploy") return reconcile.Result{Requeue: true, RequeueAfter: normalRequeueAfter}, nil } @@ -472,14 +480,14 @@ func (r *ClusterSummaryReconciler) deploy(ctx context.Context, clusterSummarySco clusterSummary := clusterSummaryScope.ClusterSummary logger = logger.WithValues("clusternamespace", clusterSummary.Spec.ClusterNamespace, "clustername", clusterSummary.Spec.ClusterName) - coreResourceErr := r.deployResources(ctx, clusterSummaryScope, logger) + resourceErr := r.deployResources(ctx, clusterSummaryScope, logger) helmErr := r.deployHelm(ctx, clusterSummaryScope, logger) kustomizeError := r.deployKustomizeRefs(ctx, clusterSummaryScope, logger) - if coreResourceErr != nil { - return coreResourceErr + if resourceErr != nil { + return resourceErr } if helmErr != nil { @@ -964,7 +972,7 @@ func (r *ClusterSummaryReconciler) canRemoveFinalizer(ctx context.Context, // A ClusterSummary in DryRun mode can only be removed if also ClusterProfile is marked // for deletion. Otherwise ClusterSummary has to stay and list what would happen if owning // ClusterProfile is moved away from DryRun mode. - profile, err := configv1alpha1.GetProfileOwner(ctx, r.Client, clusterSummaryScope.ClusterSummary) + profile, _, err := configv1alpha1.GetProfileOwnerAndTier(ctx, r.Client, clusterSummaryScope.ClusterSummary) if err != nil { logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to get ClusterProfile %v", err)) return false diff --git a/controllers/clustersummary_controller_test.go b/controllers/clustersummary_controller_test.go index f00a509b..dcffe6ea 100644 --- a/controllers/clustersummary_controller_test.go +++ b/controllers/clustersummary_controller_test.go @@ -272,8 +272,10 @@ var _ = Describe("ClustersummaryController", func() { Expect(c.Get(context.TODO(), types.NamespacedName{Namespace: clusterSummary.Namespace, Name: clusterSummary.Name}, currentClusterSummary)).To(Succeed()) // Verify chart registrations have not changed - Expect(manager.CanManageChart(clusterSummary, ¤tClusterSummary.Spec.ClusterProfileSpec.HelmCharts[0])).To(BeTrue()) - Expect(manager.CanManageChart(clusterSummary, ¤tClusterSummary.Spec.ClusterProfileSpec.HelmCharts[1])).To(BeFalse()) + Expect(manager.IsChartManaged(clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType, + ¤tClusterSummary.Spec.ClusterProfileSpec.HelmCharts[0])).To(BeTrue()) + Expect(manager.IsChartManaged(clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType, + ¤tClusterSummary.Spec.ClusterProfileSpec.HelmCharts[1])).To(BeFalse()) }) It("shouldReconcile returns true when mode is OneTime but not all policies are deployed", func() { diff --git a/controllers/conflicts.go b/controllers/conflicts.go new file mode 100644 index 00000000..cb486b63 --- /dev/null +++ b/controllers/conflicts.go @@ -0,0 +1,99 @@ +/* +Copyright 2024. projectsveltos.io. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + "strconv" + + "github.com/go-logr/logr" + + configv1alpha1 "github.com/projectsveltos/addon-controller/api/v1alpha1" + "github.com/projectsveltos/addon-controller/pkg/scope" + logs "github.com/projectsveltos/libsveltos/lib/logsettings" +) + +// Any Helm chart can be managed by only one ClusterProfile/Profile instance +// Any Kubernetes resources can be managed by only one ClusterProfile/Profile instance +// A conflict arises when a ClusterProfile or Profile tries to manage a resource (chart +// or Kubernetes object) already controlled by another. These conflicts are resolved based on tier. +// Each ClusterProfile or Profile belongs to a specific tier, with lower tiers taking precedence. +// In case of a tie (same tier), the existing owner (the Profile/ClusterProfile currently managing +// the resource) retains control. + +// Determines if a ClusterProfile/Profile can take ownership of a resource +// currently managed by another ClusterProfile/Profile. +// This function considers the tier of the claiming ClusterProfile/Profile +// (represented by 'claimingTier') and the current owner information +// (`currentOwner`). Ownership can be transferred if the claiming profile +// belongs to a lower tier than the current owner. In case of tiers being +// equal, the function returns false to maintain the current ownership. +// +// Args: +// +// currentOwnerTier: The tier of the ClusterProfile/Profile currently with ownership +// claimingTier: The tier of the ClusterProfile/Profile trying to take ownership +// +// Returns: +// - true if the claiming ClusterProfile/Profile has higher ownership priority (lower tier), +// - false otherwise. +func hasHigherOwnershipPriority(currentOwnerTier, claimingTier int32) bool { + return claimingTier < currentOwnerTier +} + +func getTier(tierData string) int32 { + value, err := strconv.ParseInt(tierData, 10, 32) + if err != nil { + // Return tier max value + return 1<<31 - 1 + } + + // Tier minimun value is 1. So this indicates a + // corrupted value. + if value == 0 { + // Return tier max value + return 1<<31 - 1 + } + + return int32(value) +} + +// Resource Ownership change. Queue old owner for reconciliation +func requeueOldOwner(ctx context.Context, featureID configv1alpha1.FeatureID, clusterSummary *configv1alpha1.ClusterSummary, + logger logr.Logger) error { + + c := getManagementClusterClient() + + clusterSummaryScope, err := scope.NewClusterSummaryScope(&scope.ClusterSummaryScopeParams{ + Client: c, + Logger: logger, + ClusterSummary: clusterSummary, + ControllerName: "clustersummary", + }) + + if err != nil { + return err + } + + // Reset the hash a deployment happens again + logger.V(logs.LogDebug).Info(fmt.Sprintf("reset status of ClusterSummary %s/%s", + clusterSummary.Namespace, clusterSummary.Name)) + clusterSummaryScope.SetFeatureStatus(featureID, configv1alpha1.FeatureStatusProvisioning, nil) + + return c.Status().Update(ctx, clusterSummaryScope.ClusterSummary) +} diff --git a/controllers/handlers_helm.go b/controllers/handlers_helm.go index cb75467f..33cdf21f 100644 --- a/controllers/handlers_helm.go +++ b/controllers/handlers_helm.go @@ -1,5 +1,5 @@ /* -Copyright 2022-23. projectsveltos.io. All rights reserved. +Copyright 2022-24. projectsveltos.io. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -288,7 +288,11 @@ func uninstallHelmCharts(ctx context.Context, c client.Client, clusterSummary *c releaseReports := make([]configv1alpha1.ReleaseReport, 0) for i := range clusterSummary.Spec.ClusterProfileSpec.HelmCharts { currentChart := &clusterSummary.Spec.ClusterProfileSpec.HelmCharts[i] - if chartManager.CanManageChart(clusterSummary, currentChart) { + canManage, err := determineChartOwnership(ctx, c, clusterSummary, currentChart, logger) + if err != nil { + return nil, err + } + if canManage { logger.V(logs.LogInfo).Info(fmt.Sprintf("Uninstalling chart %s from repo %s %s)", currentChart.ChartName, currentChart.RepositoryURL, @@ -352,6 +356,11 @@ func helmHash(ctx context.Context, c client.Client, clusterSummaryScope *scope.C // So consider it in the hash config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Reloader) + // If Tier changes, conflicts might be resolved differently + // So consider it in the hash + config += fmt.Sprintf("%d", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Tier) + config += fmt.Sprintf("%t", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict) + clusterSummary := clusterSummaryScope.ClusterSummary if clusterSummary.Spec.ClusterProfileSpec.HelmCharts == nil { return h.Sum(nil), nil @@ -408,17 +417,13 @@ func handleCharts(ctx context.Context, clusterSummary *configv1alpha1.ClusterSum // Here only currently referenced helm releases are considered. If ClusterSummary was managing // an helm release and it is not referencing it anymore, such entry will be removed from ClusterSummary.Status // only after helm release is successfully undeployed. - conflict, err := updateStatusForeferencedHelmReleases(ctx, c, clusterSummary) + conflict, err := updateStatusForeferencedHelmReleases(ctx, c, clusterSummary, logger) if err != nil { return err } - var releaseReports []configv1alpha1.ReleaseReport - var chartDeployed []configv1alpha1.Chart - releaseReports, chartDeployed, err = walkChartsAndDeploy(ctx, c, clusterSummary, kubeconfig, logger) - if err != nil { - return err - } + releaseReports, chartDeployed, deployError := walkChartsAndDeploy(ctx, c, clusterSummary, kubeconfig, logger) + // Even if there is a deployment error do not return just yet. Update various status and clean stale resources. // If there was an helm release previous managed by this ClusterSummary and currently not referenced // anymore, such helm release has been successfully remove at this point. So @@ -426,7 +431,6 @@ func handleCharts(ctx context.Context, clusterSummary *configv1alpha1.ClusterSum if err != nil { return err } - // First get the helm releases currently managed and uninstall all the ones // not referenced anymore. Only if this operation succeeds, removes all stale // helm release registration for this clusterSummary. @@ -436,7 +440,6 @@ func handleCharts(ctx context.Context, clusterSummary *configv1alpha1.ClusterSum return err } releaseReports = append(releaseReports, undeployedReports...) - if clusterSummary.Spec.ClusterProfileSpec.SyncMode != configv1alpha1.SyncModeDryRun { chartManager, mgrErr := chartmanager.GetChartManagerInstance(ctx, c) if mgrErr != nil { @@ -451,19 +454,22 @@ func handleCharts(ctx context.Context, clusterSummary *configv1alpha1.ClusterSum return err } - if conflict { - return fmt.Errorf("conflict managing one or more helm charts") - } - err = updateClusterReportWithHelmReports(ctx, c, clusterSummary, releaseReports) if err != nil { return err } - // In DryRun mode always return an error. if clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1alpha1.SyncModeDryRun { return &configv1alpha1.DryRunReconciliationError{} } + + if deployError != nil { + return deployError + } + if conflict { + return deployer.NewConflictError("conflict managing one or more helm charts") + } + return nil } @@ -472,13 +478,12 @@ func handleCharts(ctx context.Context, clusterSummary *configv1alpha1.ClusterSum func walkChartsAndDeploy(ctx context.Context, c client.Client, clusterSummary *configv1alpha1.ClusterSummary, kubeconfig string, logger logr.Logger) ([]configv1alpha1.ReleaseReport, []configv1alpha1.Chart, error) { - chartManager, err := chartmanager.GetChartManagerInstance(ctx, c) + mgmtResources, err := collectMgmtResources(ctx, clusterSummary) if err != nil { return nil, nil, err } - var mgmtResources map[string]*unstructured.Unstructured - mgmtResources, err = collectMgmtResources(ctx, clusterSummary) + chartManager, err := chartmanager.GetChartManagerInstance(ctx, c) if err != nil { return nil, nil, err } @@ -487,27 +492,35 @@ func walkChartsAndDeploy(ctx context.Context, c client.Client, clusterSummary *c chartDeployed := make([]configv1alpha1.Chart, 0) for i := range clusterSummary.Spec.ClusterProfileSpec.HelmCharts { currentChart := &clusterSummary.Spec.ClusterProfileSpec.HelmCharts[i] + // Eventual conflicts are already resolved before this method is called (in updateStatusForeferencedHelmReleases) + // So it is safe to call CanManageChart here if !chartManager.CanManageChart(clusterSummary, currentChart) { var report *configv1alpha1.ReleaseReport report, err = createReportForUnmanagedHelmRelease(ctx, c, clusterSummary, currentChart, logger) if err != nil { - return nil, nil, err - } else { - releaseReports = append(releaseReports, *report) + return releaseReports, chartDeployed, err } + releaseReports = append(releaseReports, *report) // error is reported above, in updateHelmChartStatus. - continue + if clusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict || + clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1alpha1.SyncModeDryRun { + + continue + } + + return releaseReports, chartDeployed, + deployer.NewConflictError(generateConflictForHelmChart(ctx, clusterSummary, currentChart)) } var report *configv1alpha1.ReleaseReport var currentRelease *releaseInfo currentRelease, report, err = handleChart(ctx, clusterSummary, mgmtResources, currentChart, kubeconfig, logger) if err != nil { - return nil, nil, err + return releaseReports, chartDeployed, err } err = updateValueHashOnHelmChartSummary(ctx, currentChart, clusterSummary, logger) if err != nil { - return nil, nil, err + return releaseReports, chartDeployed, err } releaseReports = append(releaseReports, *report) @@ -533,6 +546,115 @@ func walkChartsAndDeploy(ctx context.Context, c client.Client, clusterSummary *c return releaseReports, chartDeployed, nil } +func generateConflictForHelmChart(ctx context.Context, clusterSummary *configv1alpha1.ClusterSummary, currentChart *configv1alpha1.HelmChart) string { + c := getManagementClusterClient() + + message := fmt.Sprintf("cannot manage chart %s/%s.", currentChart.ReleaseNamespace, currentChart.ReleaseName) + chartManager, err := chartmanager.GetChartManagerInstance(ctx, c) + if err != nil { + return message + } + + var managerName string + managerName, err = chartManager.GetManagerForChart(clusterSummary.Spec.ClusterNamespace, + clusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType, currentChart) + if err != nil { + return message + } + message += fmt.Sprintf(" ClusterSummary %s managing it", managerName) + return message +} + +// determineChartOwnership determines whether the provided cluster summary (`claimingHelmManager`) has the authority to manage +// the given Helm chart (`currentChart`). +// +// It achieves this by first checking if any other cluster summary is currently responsible for managing the chart. +// If no other manager exists, then this cluster summary is eligible and the function returns true. +// +// However, if another cluster summary is already managing the chart, a tier-based comparison is performed to determine +// which cluster summary has higher ownership priority. +// The cluster summary with the higher tier level is authorized to manage the chart. +// +// If this cluster summary wins the ownership competition, the previously managing cluster summary is re-queued for +// reconciliation to ensure it updates its state. +func determineChartOwnership(ctx context.Context, c client.Client, claimingHelmManager *configv1alpha1.ClusterSummary, + currentChart *configv1alpha1.HelmChart, logger logr.Logger) (canManage bool, err error) { + + chartManager, err := chartmanager.GetChartManagerInstance(ctx, c) + if err != nil { + return false, err + } + + l := logger.WithValues("release", fmt.Sprintf("%s:%s", currentChart.ReleaseNamespace, currentChart.ReleaseName)) + + if !chartManager.CanManageChart(claimingHelmManager, currentChart) { + // Another ClusterSummay is already managing this chart. Get the: + // 1. ClusterSummary managing the chart + // 2. Use the ClusterSummary's tiers to decide who should managed it + l.V(logs.LogDebug).Info("conflict detected") + clusterSummaryManaging, err := chartManager.GetManagerForChart(claimingHelmManager.Spec.ClusterNamespace, claimingHelmManager.Spec.ClusterName, + claimingHelmManager.Spec.ClusterType, currentChart) + if err != nil { + return false, err + } + + currentHelmManager := &configv1alpha1.ClusterSummary{} + err = c.Get(ctx, types.NamespacedName{Namespace: claimingHelmManager.Spec.ClusterNamespace, Name: clusterSummaryManaging}, currentHelmManager) + if err != nil { + l.V(logs.LogDebug).Info(fmt.Sprintf("failed to fetch ClusterSummary %s/%s currently managing the chart", + claimingHelmManager.Spec.ClusterNamespace, clusterSummaryManaging)) + return false, err + } + + if hasHigherOwnershipPriority(currentHelmManager.Spec.ClusterProfileSpec.Tier, claimingHelmManager.Spec.ClusterProfileSpec.Tier) { + // New ClusterSummary is taking over managing this chart. So reset helmReleaseSummaries for this chart + // This needs to happen immediately. helmReleaseSummaries are used by Sveltos to rebuild list of which + // clusterSummary is managing an helm chart if pod restarts + err = resetHelmReleaseSummaries(ctx, c, currentHelmManager, currentChart, logger) + if err != nil { + return false, err + } + + // Reset Status of the ClusterSummary previously managing this resource + err = requeueOldOwner(ctx, configv1alpha1.FeatureHelm, currentHelmManager, logger) + if err != nil { + return false, err + } + + // Set current ClusterSummary as the new manager + chartManager.SetManagerForChart(claimingHelmManager, currentChart) + return true, nil + } + + return false, nil + } + + // Nobody else is managing the chart + return true, nil +} + +func resetHelmReleaseSummaries(ctx context.Context, c client.Client, clusterSummary *configv1alpha1.ClusterSummary, + currentChart *configv1alpha1.HelmChart, logger logr.Logger) error { + + for i := range clusterSummary.Status.HelmReleaseSummaries { + hr := clusterSummary.Status.HelmReleaseSummaries[i] + if hr.ReleaseNamespace == currentChart.ReleaseNamespace && + hr.ReleaseName == currentChart.ReleaseName { + + logger.V(logs.LogDebug).Info("removing entry for chart %s/%s in ClusterSummary..Status.HelmReleaseSummaries %s/%s", + currentChart.ReleaseNamespace, currentChart.ReleaseName, clusterSummary.Namespace, clusterSummary.Name) + length := len(clusterSummary.Status.HelmReleaseSummaries) + if i < length-1 { + clusterSummary.Status.HelmReleaseSummaries[i] = clusterSummary.Status.HelmReleaseSummaries[length-1] + } + clusterSummary.Status.HelmReleaseSummaries = clusterSummary.Status.HelmReleaseSummaries[:length-1] + break + } + } + + return c.Status().Update(ctx, clusterSummary) +} + func handleInstall(ctx context.Context, clusterSummary *configv1alpha1.ClusterSummary, mgmtResources map[string]*unstructured.Unstructured, currentChart *configv1alpha1.HelmChart, kubeconfig string, logger logr.Logger) (*configv1alpha1.ReleaseReport, error) { @@ -1263,7 +1385,7 @@ func undeployStaleReleases(ctx context.Context, c client.Client, clusterSummary // allowed to manage. // No action in DryRun mode. func updateStatusForeferencedHelmReleases(ctx context.Context, c client.Client, - clusterSummary *configv1alpha1.ClusterSummary) (bool, error) { + clusterSummary *configv1alpha1.ClusterSummary, logger logr.Logger) (bool, error) { // No-op in DryRun mode if clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1alpha1.SyncModeDryRun { @@ -1300,11 +1422,16 @@ func updateStatusForeferencedHelmReleases(ctx context.Context, c client.Client, helmReleaseSummaries := make([]configv1alpha1.HelmChartSummary, len(currentClusterSummary.Spec.ClusterProfileSpec.HelmCharts)) for i := range currentClusterSummary.Spec.ClusterProfileSpec.HelmCharts { currentChart := ¤tClusterSummary.Spec.ClusterProfileSpec.HelmCharts[i] - if chartManager.CanManageChart(currentClusterSummary, currentChart) { + var canManage bool + canManage, err = determineChartOwnership(ctx, c, clusterSummary, currentChart, logger) + if err != nil { + return err + } + if canManage { helmReleaseSummaries[i] = configv1alpha1.HelmChartSummary{ ReleaseName: currentChart.ReleaseName, ReleaseNamespace: currentChart.ReleaseNamespace, - Status: configv1alpha1.HelChartStatusManaging, + Status: configv1alpha1.HelmChartStatusManaging, ValuesHash: getValueHashFromHelmChartSummary(currentChart, clusterSummary), // if a value is currently stored, keep it. // after chart is deployed such value will be updated } @@ -1312,14 +1439,14 @@ func updateStatusForeferencedHelmReleases(ctx context.Context, c client.Client, } else { var managerName string managerName, err = chartManager.GetManagerForChart(currentClusterSummary.Spec.ClusterNamespace, - currentClusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType, currentChart) + currentClusterSummary.Spec.ClusterName, currentClusterSummary.Spec.ClusterType, currentChart) if err != nil { return err } helmReleaseSummaries[i] = configv1alpha1.HelmChartSummary{ ReleaseName: currentChart.ReleaseName, ReleaseNamespace: currentChart.ReleaseNamespace, - Status: configv1alpha1.HelChartStatusConflict, + Status: configv1alpha1.HelmChartStatusConflict, ConflictMessage: fmt.Sprintf("ClusterSummary %s managing it", managerName), } conflict = true @@ -1332,7 +1459,7 @@ func updateStatusForeferencedHelmReleases(ctx context.Context, c client.Client, // still leave an entry in ClusterSummary.Status for i := range currentClusterSummary.Status.HelmReleaseSummaries { summary := ¤tClusterSummary.Status.HelmReleaseSummaries[i] - if summary.Status == configv1alpha1.HelChartStatusManaging { + if summary.Status == configv1alpha1.HelmChartStatusManaging { if _, ok := currentlyReferenced[helmInfo(summary.ReleaseNamespace, summary.ReleaseName)]; !ok { helmReleaseSummaries = append(helmReleaseSummaries, *summary) } @@ -1534,6 +1661,7 @@ func collectResourcesFromManagedHelmCharts(ctx context.Context, c client.Client, currentChart := &clusterSummary.Spec.ClusterProfileSpec.HelmCharts[i] l := logger.WithValues("chart", currentChart.ChartName, "releaseNamespace", currentChart.ReleaseNamespace) l.V(logs.LogDebug).Info("collecting resources for helm chart") + // Conflicts are already resolved by the time this is invoked. So it is safe to call CanManageChart if chartManager.CanManageChart(clusterSummary, currentChart) { actionConfig, err := actionConfigInit(currentChart.ReleaseNamespace, kubeconfig, getEnableClientCacheValue(currentChart.Options)) diff --git a/controllers/handlers_helm_test.go b/controllers/handlers_helm_test.go index da3cd2b2..54f9a233 100644 --- a/controllers/handlers_helm_test.go +++ b/controllers/handlers_helm_test.go @@ -167,7 +167,7 @@ var _ = Describe("HandlersHelm", func() { kyvernoSummary := configv1alpha1.HelmChartSummary{ ReleaseName: "kyverno", ReleaseNamespace: "kyverno", - Status: configv1alpha1.HelChartStatusManaging, + Status: configv1alpha1.HelmChartStatusManaging, } clusterSummary.Spec.ClusterProfileSpec = configv1alpha1.Spec{ @@ -192,7 +192,8 @@ var _ = Describe("HandlersHelm", func() { manager.RegisterClusterSummaryForCharts(clusterSummary) - conflict, err := controllers.UpdateStatusForeferencedHelmReleases(context.TODO(), c, clusterSummary) + conflict, err := controllers.UpdateStatusForeferencedHelmReleases(context.TODO(), c, clusterSummary, + textlogger.NewLogger(textlogger.NewConfig())) Expect(err).To(BeNil()) Expect(conflict).To(BeFalse()) @@ -202,7 +203,7 @@ var _ = Describe("HandlersHelm", func() { currentClusterSummary)).To(Succeed()) Expect(currentClusterSummary.Status.HelmReleaseSummaries).ToNot(BeNil()) Expect(len(currentClusterSummary.Status.HelmReleaseSummaries)).To(Equal(2)) - Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].Status).To(Equal(configv1alpha1.HelChartStatusManaging)) + Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].Status).To(Equal(configv1alpha1.HelmChartStatusManaging)) Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].ReleaseName).To(Equal(calicoChart.ReleaseName)) Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].ReleaseNamespace).To(Equal(calicoChart.ReleaseNamespace)) @@ -225,7 +226,7 @@ var _ = Describe("HandlersHelm", func() { // List an helm chart non referenced anymore as managed clusterSummary.Status = configv1alpha1.ClusterSummaryStatus{ HelmReleaseSummaries: []configv1alpha1.HelmChartSummary{ - {ReleaseName: randomString(), ReleaseNamespace: randomString(), Status: configv1alpha1.HelChartStatusManaging}, + {ReleaseName: randomString(), ReleaseNamespace: randomString(), Status: configv1alpha1.HelmChartStatusManaging}, }, } @@ -235,7 +236,8 @@ var _ = Describe("HandlersHelm", func() { c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(initObjects...).WithObjects(initObjects...).Build() - conflict, err := controllers.UpdateStatusForeferencedHelmReleases(context.TODO(), c, clusterSummary) + conflict, err := controllers.UpdateStatusForeferencedHelmReleases(context.TODO(), c, clusterSummary, + textlogger.NewLogger(textlogger.NewConfig())) Expect(err).To(BeNil()) Expect(conflict).To(BeFalse()) @@ -267,7 +269,7 @@ var _ = Describe("HandlersHelm", func() { kyvernoSummary := configv1alpha1.HelmChartSummary{ ReleaseName: "kyverno", ReleaseNamespace: "kyverno", - Status: configv1alpha1.HelChartStatusManaging, + Status: configv1alpha1.HelmChartStatusManaging, } clusterSummary.Spec.ClusterProfileSpec = configv1alpha1.Spec{ @@ -280,7 +282,7 @@ var _ = Describe("HandlersHelm", func() { { ReleaseName: contourChart.ReleaseName, ReleaseNamespace: contourChart.ReleaseNamespace, - Status: configv1alpha1.HelChartStatusManaging, + Status: configv1alpha1.HelmChartStatusManaging, }, }, } @@ -305,7 +307,7 @@ var _ = Describe("HandlersHelm", func() { currentClusterSummary)).To(Succeed()) Expect(currentClusterSummary.Status.HelmReleaseSummaries).ToNot(BeNil()) Expect(len(currentClusterSummary.Status.HelmReleaseSummaries)).To(Equal(1)) - Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].Status).To(Equal(configv1alpha1.HelChartStatusManaging)) + Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].Status).To(Equal(configv1alpha1.HelmChartStatusManaging)) Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].ReleaseName).To(Equal(contourChart.ReleaseName)) Expect(currentClusterSummary.Status.HelmReleaseSummaries[0].ReleaseNamespace).To(Equal(contourChart.ReleaseNamespace)) }) @@ -593,6 +595,7 @@ var _ = Describe("Hash methods", func() { kyvernoChart, nginxChart, }, + Tier: 100, }, }, } @@ -613,6 +616,8 @@ var _ = Describe("Hash methods", func() { config := fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.SyncMode) config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Reloader) + config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Tier) + config += fmt.Sprintf("%t", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict) config += render.AsCode(kyvernoChart) config += render.AsCode(nginxChart) h := sha256.New() diff --git a/controllers/handlers_kustomize.go b/controllers/handlers_kustomize.go index ae7fd79f..7a9cef34 100644 --- a/controllers/handlers_kustomize.go +++ b/controllers/handlers_kustomize.go @@ -270,6 +270,11 @@ func kustomizationHash(ctx context.Context, c client.Client, clusterSummaryScope // So consider it in the hash config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Reloader) + // If Tier changes, conflicts might be resolved differently + // So consider it in the hash + config += fmt.Sprintf("%d", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Tier) + config += fmt.Sprintf("%t", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict) + config += render.AsCode(clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.KustomizationRefs) for i := range clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.KustomizationRefs { diff --git a/controllers/handlers_kustomize_test.go b/controllers/handlers_kustomize_test.go index 58b4126a..06afc35b 100644 --- a/controllers/handlers_kustomize_test.go +++ b/controllers/handlers_kustomize_test.go @@ -268,6 +268,7 @@ var _ = Describe("Hash methods", func() { ClusterType: libsveltosv1alpha1.ClusterTypeCapi, ClusterProfileSpec: configv1alpha1.Spec{ KustomizationRefs: make([]configv1alpha1.KustomizationRef, repoNum), + Tier: 100, }, }, } @@ -299,6 +300,8 @@ var _ = Describe("Hash methods", func() { config := fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.SyncMode) config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Reloader) + config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Tier) + config += fmt.Sprintf("%t", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict) config += render.AsCode(clusterSummary.Spec.ClusterProfileSpec.KustomizationRefs) for i := 0; i < repoNum; i++ { config += gitRepositories[i].Status.Artifact.Revision diff --git a/controllers/handlers_resources.go b/controllers/handlers_resources.go index db539770..bc336045 100644 --- a/controllers/handlers_resources.go +++ b/controllers/handlers_resources.go @@ -64,17 +64,12 @@ func deployResources(ctx context.Context, c client.Client, return err } - var localResourceReports []configv1alpha1.ResourceReport - var remoteResourceReports []configv1alpha1.ResourceReport - localResourceReports, remoteResourceReports, err = deployPolicyRefs(ctx, c, remoteRestConfig, + localResourceReports, remoteResourceReports, deployError := deployPolicyRefs(ctx, c, remoteRestConfig, clusterSummary, featureHandler, logger) // Irrespective of error, update deployed gvks. Otherwise cleanup won't happen in case gvkErr := updateDeployedGroupVersionKind(ctx, clusterSummary, configv1alpha1.FeatureResources, localResourceReports, remoteResourceReports, logger) - if err != nil { - return err - } if gvkErr != nil { return gvkErr } @@ -91,7 +86,6 @@ func deployResources(ctx context.Context, c client.Client, return err } - // If we are here there are no conflicts (and error would have been returned by deployPolicyRefs) remoteDeployed := make([]configv1alpha1.Resource, 0) for i := range remoteResourceReports { remoteDeployed = append(remoteDeployed, remoteResourceReports[i].Resource) @@ -130,6 +124,11 @@ func deployResources(ctx context.Context, c client.Client, if clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1alpha1.SyncModeDryRun { return &configv1alpha1.DryRunReconciliationError{} } + + if deployError != nil { + return deployError + } + return validateHealthPolicies(ctx, remoteRestConfig, clusterSummary, configv1alpha1.FeatureResources, logger) } @@ -340,6 +339,11 @@ func resourcesHash(ctx context.Context, c client.Client, clusterSummaryScope *sc // So consider it in the hash config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Reloader) + // If Tier changes, conflicts might be resolved differently + // So consider it in the hash + config += fmt.Sprintf("%d", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Tier) + config += fmt.Sprintf("%t", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict) + clusterSummary := clusterSummaryScope.ClusterSummary for i := range clusterSummary.Spec.ClusterProfileSpec.PolicyRefs { reference := &clusterSummary.Spec.ClusterProfileSpec.PolicyRefs[i] diff --git a/controllers/handlers_resources_test.go b/controllers/handlers_resources_test.go index 21e2a62d..7aa0a332 100644 --- a/controllers/handlers_resources_test.go +++ b/controllers/handlers_resources_test.go @@ -343,6 +343,7 @@ var _ = Describe("Hash methods", func() { Kind: string(libsveltosv1alpha1.ConfigMapReferencedResourceKind), }, }, + Tier: 100, }, }, } @@ -365,6 +366,8 @@ var _ = Describe("Hash methods", func() { config := fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.SyncMode) config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Reloader) + config += fmt.Sprintf("%v", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.Tier) + config += fmt.Sprintf("%t", clusterSummaryScope.ClusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict) config += controllers.GetStringDataSectionHash(configMap1.Data) config += controllers.GetStringDataSectionHash(configMap2.Data) h := sha256.New() diff --git a/controllers/handlers_utils.go b/controllers/handlers_utils.go index 6e595178..9e40c9f2 100644 --- a/controllers/handlers_utils.go +++ b/controllers/handlers_utils.go @@ -287,11 +287,15 @@ func deployUnstructured(ctx context.Context, deployingToMgmtCluster bool, destCo featureID configv1alpha1.FeatureID, clusterSummary *configv1alpha1.ClusterSummary, logger logr.Logger, ) (reports []configv1alpha1.ResourceReport, err error) { - profile, err := configv1alpha1.GetProfileOwner(ctx, getManagementClusterClient(), clusterSummary) + profile, profileTier, err := configv1alpha1.GetProfileOwnerAndTier(ctx, getManagementClusterClient(), clusterSummary) if err != nil { return nil, err } + if profile.GetObjectKind().GroupVersionKind().Kind == configv1alpha1.ProfileKind { + profile.SetName(profileNameToOwnerReferenceName(profile)) + } + conflictErrorMsg := "" reports = make([]configv1alpha1.ResourceReport, 0) for i := range referencedUnstructured { policy := referencedUnstructured[i] @@ -304,7 +308,7 @@ func deployUnstructured(ctx context.Context, deployingToMgmtCluster bool, destCo logger.V(logs.LogDebug).Info(fmt.Sprintf("deploying resource %s %s/%s (deploy to management cluster: %v)", policy.GetKind(), policy.GetNamespace(), policy.GetName(), deployingToMgmtCluster)) - resource, policyHash := getResource(policy, referencedObject, featureID, logger) + resource, policyHash := getResource(policy, referencedObject, profileTier, featureID, logger) // If policy is namespaced, create namespace if not already existing err = createNamespace(ctx, destClient, clusterSummary, policy.GetNamespace()) @@ -320,18 +324,24 @@ func deployUnstructured(ctx context.Context, deployingToMgmtCluster bool, destCo return nil, err } - var exist bool - var currentHash string - exist, currentHash, err = deployer.ValidateObjectForUpdate(ctx, dr, policy, - referencedObject.Kind, referencedObject.Namespace, referencedObject.Name) + var resourceInfo *deployer.ResourceInfo + var requeue bool + resourceInfo, requeue, err = canDeployResource(ctx, dr, policy, referencedObject, profile, profileTier, logger) if err != nil { var conflictErr *deployer.ConflictError ok := errors.As(err, &conflictErr) - // In DryRun mode do not stop here, but report the conflict. - if ok && clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1alpha1.SyncModeDryRun { + if ok { conflictResourceReport := generateConflictResourceReport(ctx, dr, resource) - reports = append(reports, *conflictResourceReport) - continue + if clusterSummary.Spec.ClusterProfileSpec.SyncMode == configv1alpha1.SyncModeDryRun { + reports = append(reports, *conflictResourceReport) + continue + } else { + conflictErrorMsg += conflictResourceReport.Message + if clusterSummary.Spec.ClusterProfileSpec.ContinueOnConflict { + continue + } + return reports, deployer.NewConflictError(conflictErrorMsg) + } } return reports, err } @@ -342,7 +352,7 @@ func deployUnstructured(ctx context.Context, deployingToMgmtCluster bool, destCo addExtraAnnotations(policy, clusterSummary.Spec.ClusterProfileSpec.ExtraAnnotations) if deployingToMgmtCluster { - // When deploying resources in the management cluster, just setting ClusterProfile as OwnerReference is + // When deploying resources in the management cluster, just setting (Cluster)Profile as OwnerReference is // not enough. We also need to track which ClusterSummary is creating the resource. Otherwise while // trying to clean stale resources those objects will be incorrectly removed. // An extra annotation is added here to indicate the clustersummary, so the managed cluster, this @@ -351,29 +361,134 @@ func deployUnstructured(ctx context.Context, deployingToMgmtCluster bool, destCo addAnnotation(policy, clusterSummaryAnnotation, value) } + if requeue { + err = requeueAllOldOwners(ctx, resourceInfo.OwnerReferences, featureID, clusterSummary, logger) + if err != nil { + return reports, err + } + } + err = updateResource(ctx, dr, clusterSummary, policy, logger) if err != nil { return reports, err } resource.LastAppliedTime = &metav1.Time{Time: time.Now()} + reports = append(reports, *generateResourceReport(policyHash, resourceInfo, resource)) + } - if !exist { - reports = append(reports, - configv1alpha1.ResourceReport{Resource: *resource, Action: string(configv1alpha1.CreateResourceAction)}) - } else if policyHash != currentHash { - reports = append(reports, - configv1alpha1.ResourceReport{Resource: *resource, Action: string(configv1alpha1.UpdateResourceAction)}) - } else { - reports = append(reports, - configv1alpha1.ResourceReport{Resource: *resource, Action: string(configv1alpha1.NoResourceAction), - Message: "Object already deployed. And policy referenced by ClusterProfile has not changed since last deployment."}) - } + if conflictErrorMsg != "" { + return reports, deployer.NewConflictError(conflictErrorMsg) } return reports, nil } +// requeueAllOldOwners gets the list of all ClusterProfile/Profile instances currently owning the resource in the +// managed cluster (profiles). For each one, it finds the corresponding ClusterSummary and via requeueOldOwner reset +// the Status so a new reconciliation happens. +func requeueAllOldOwners(ctx context.Context, profileOwners []corev1.ObjectReference, + featureID configv1alpha1.FeatureID, clusterSummary *configv1alpha1.ClusterSummary, logger logr.Logger) error { + + c := getManagementClusterClient() + // Since release v0.30.0 only one profile instance can deploy a resource in a managed + // cluster. Before that though multiple instances could have deployed same resource + // provided all those instances were referencing same ConfigMap/Secret. + // Here we walk over ownerReferences just for backward compatibility + for i := range profileOwners { + var err error + var profileKind string + var profileName types.NamespacedName + switch profileOwners[i].Kind { + case configv1alpha1.ClusterProfileKind: + profileKind = configv1alpha1.ClusterProfileKind + profileName = types.NamespacedName{Name: profileOwners[i].Name} + case configv1alpha1.ProfileKind: + profileKind = configv1alpha1.ProfileKind + profileName = *getProfileNameFromOwnerReferenceName(profileOwners[i].Name) + default: + continue + } + + if err != nil { + return err + } + + // Get ClusterSummary that deployed the resource. + var ownerClusterSummary *configv1alpha1.ClusterSummary + ownerClusterSummary, err = getClusterSummary(ctx, c, profileKind, profileName.Name, + clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, clusterSummary.Spec.ClusterType) + if err != nil { + if apierrors.IsNotFound(err) { + continue + } + return err + } + + err = requeueOldOwner(ctx, featureID, ownerClusterSummary, logger) + if err != nil { + return err + } + } + + return nil +} + +// canDeployResource verifies whether resource can be deployed. Following checks are performed: +// +// - if resource is currently already deployed in the managed cluster, if owned by this (Cluster)Profile/referenced +// resource => it can be updated +// - if resource is currently already deployed in the managed cluster and owned by same (Cluster)Profile but different +// referenced resource => it cannot be updated +// - if resource is currently already deployed in the managed cluster but owned by different (Cluster)Profile +// => it can be updated only if current (Cluster)Profile tier is lower than profile currently deploying the resource +// +// If resource cannot be deployed, return a ConflictError. +// If any other error occurs while doing those verification, the error is returned +func canDeployResource(ctx context.Context, dr dynamic.ResourceInterface, policy *unstructured.Unstructured, + referencedObject *corev1.ObjectReference, profile client.Object, profileTier int32, logger logr.Logger, +) (resourceInfo *deployer.ResourceInfo, requeueOldOwner bool, err error) { + + l := logger.WithValues("resource", + fmt.Sprintf("%s:%s/%s", referencedObject.Kind, referencedObject.Namespace, referencedObject.Name)) + resourceInfo, err = deployer.ValidateObjectForUpdate(ctx, dr, policy, + referencedObject.Kind, referencedObject.Namespace, referencedObject.Name, profile) + if err != nil { + var conflictErr *deployer.ConflictError + ok := errors.As(err, &conflictErr) + if ok { + // There is a conflict. + if hasHigherOwnershipPriority(getTier(resourceInfo.OwnerTier), profileTier) { + l.V(logs.LogDebug).Info("conflict detected but resource ownership can change") + // Because of tier, ownership must change. Which also means current ClusterProfile/Profile + // owning the resource must be requeued for reconciliation + return resourceInfo, true, nil + } + l.V(logs.LogDebug).Info("conflict detected") + // Conflict cannot be resolved in favor of the clustersummary being reconciled. So report the conflict + // error + return resourceInfo, false, conflictErr + } + return nil, false, err + } + + // There was no conflict. Resource can be deployed. + return resourceInfo, false, nil +} + +func generateResourceReport(policyHash string, resourceInfo *deployer.ResourceInfo, resource *configv1alpha1.Resource, +) *configv1alpha1.ResourceReport { + + if !resourceInfo.Exist { + return &configv1alpha1.ResourceReport{Resource: *resource, Action: string(configv1alpha1.CreateResourceAction)} + } else if policyHash != resourceInfo.Hash { + return &configv1alpha1.ResourceReport{Resource: *resource, Action: string(configv1alpha1.UpdateResourceAction)} + } else { + return &configv1alpha1.ResourceReport{Resource: *resource, Action: string(configv1alpha1.NoResourceAction), + Message: "Object already deployed. And policy referenced by ClusterProfile has not changed since last deployment."} + } +} + // addExtraLabels adds ExtraLabels to policy. // If policy already has a label with a key present in `ExtraLabels`, the value from `ExtraLabels` will // override the existing value. @@ -421,7 +536,7 @@ func addExtraAnnotations(policy *unstructured.Unstructured, extraAnnotations map } // getResource returns sveltos Resource and the resource hash hash -func getResource(policy *unstructured.Unstructured, referencedObject *corev1.ObjectReference, +func getResource(policy *unstructured.Unstructured, referencedObject *corev1.ObjectReference, tier int32, featureID configv1alpha1.FeatureID, logger logr.Logger) (resource *configv1alpha1.Resource, policyHash string) { resource = &configv1alpha1.Resource{ @@ -450,6 +565,7 @@ func getResource(policy *unstructured.Unstructured, referencedObject *corev1.Obj addLabel(policy, deployer.ReferenceNamespaceLabel, referencedObject.Namespace) addLabel(policy, reasonLabel, string(featureID)) addAnnotation(policy, deployer.PolicyHash, policyHash) + addAnnotation(policy, deployer.OwnerTier, fmt.Sprintf("%d", tier)) return resource, policyHash } @@ -788,10 +904,13 @@ func undeployStaleResources(ctx context.Context, isMgmtCluster bool, logger.V(logs.LogDebug).Info("removing stale resources") - profile, err := configv1alpha1.GetProfileOwner(ctx, getManagementClusterClient(), clusterSummary) + profile, _, err := configv1alpha1.GetProfileOwnerAndTier(ctx, getManagementClusterClient(), clusterSummary) if err != nil { return nil, err } + if profile.GetObjectKind().GroupVersionKind().Kind == configv1alpha1.ProfileKind { + profile.SetName(profileNameToOwnerReferenceName(profile)) + } undeployed := make([]configv1alpha1.ResourceReport, 0) diff --git a/controllers/handlers_utils_test.go b/controllers/handlers_utils_test.go index 1dec9b37..b6d0819d 100644 --- a/controllers/handlers_utils_test.go +++ b/controllers/handlers_utils_test.go @@ -304,6 +304,7 @@ var _ = Describe("HandlersUtils", func() { controllers.AddLabel(policy, deployer.ReferenceNameLabel, secret.Name) controllers.AddLabel(policy, deployer.ReferenceNamespaceLabel, secret.Namespace) controllers.AddAnnotation(policy, deployer.PolicyHash, policyHash) + controllers.AddAnnotation(policy, deployer.OwnerTier, "100") Expect(testEnv.Client.Create(context.TODO(), policy)) Expect(waitForObject(ctx, testEnv.Client, policy)).To(Succeed()) } @@ -342,7 +343,7 @@ var _ = Describe("HandlersUtils", func() { // Because objects are now existing in the workload cluster but don't match the content // in the secret referenced by ClusterProfile, both services will be reported as updated - // ( if the ClusterProfile were to be changed from DryRun, both service would be updated). + // (if the ClusterProfile were to be changed from DryRun, both service would be updated). resourceReports, err = controllers.DeployContent(context.TODO(), false, testEnv.Config, testEnv.Client, secret, map[string]string{"service": newContent}, clusterSummary, nil, @@ -362,8 +363,8 @@ var _ = Describe("HandlersUtils", func() { validateResourceReports(resourceReports, 0, 0, 0, 2) for i := range resourceReports { rr := &resourceReports[i] - Expect(rr.Message).To(ContainSubstring(fmt.Sprintf("Object currently deployed because of %s %s/%s.", secret.Kind, - secret.Namespace, secret.Name))) + Expect(rr.Message).To(ContainSubstring(fmt.Sprintf("Object Service:%s/service%d currently deployed because of %s %s/%s.", + namespace, i, secret.Kind, secret.Namespace, secret.Name))) } }) diff --git a/controllers/utils.go b/controllers/utils.go index a5220b31..c19f765c 100644 --- a/controllers/utils.go +++ b/controllers/utils.go @@ -448,3 +448,25 @@ func parseMapFromString(data string) (map[string]string, error) { // Return the parsed map return result, nil } + +// Sveltos deployment in managed clusters relies on OwnerReferences to track the responsible profile. +// However, a limitation arises with namespaced Profiles. +// Kubernetes OwnerReferences lack a namespace field, assuming owners reside in the same namespace. +// For Profile resources (namespaced), Sveltos dynamically modifies the owner name to incorporate both +// namespace and name for proper identification. +func profileNameToOwnerReferenceName(profile client.Object) string { + if profile.GetObjectKind().GroupVersionKind().Kind == configv1alpha1.ProfileKind { + return fmt.Sprintf("%s/%s", profile.GetNamespace(), profile.GetName()) + } + + return profile.GetName() +} + +func getProfileNameFromOwnerReferenceName(profileName string) *types.NamespacedName { + result := strings.Split(profileName, "/") + if len(result) == 1 { + // resources deployed by Sveltos before release v0.30.0 did not have profile namespace/name + return &types.NamespacedName{Name: profileName} + } + return &types.NamespacedName{Namespace: result[0], Name: result[1]} +} diff --git a/controllers/utils_test.go b/controllers/utils_test.go index 2ac908d9..36b9210a 100644 --- a/controllers/utils_test.go +++ b/controllers/utils_test.go @@ -204,7 +204,7 @@ var _ = Describe("getClusterProfileOwner ", func() { c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(initObjects...).WithObjects(initObjects...).Build() - owner, err := configv1alpha1.GetProfileOwner(context.TODO(), c, clusterSummary) + owner, _, err := configv1alpha1.GetProfileOwnerAndTier(context.TODO(), c, clusterSummary) Expect(err).To(BeNil()) Expect(owner).ToNot(BeNil()) Expect(owner.GetName()).To(Equal(clusterProfile.Name)) @@ -264,7 +264,7 @@ var _ = Describe("getClusterProfileOwner ", func() { c := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(initObjects...).WithObjects(initObjects...).Build() - owner, err := configv1alpha1.GetProfileOwner(context.TODO(), c, clusterSummary) + owner, _, err := configv1alpha1.GetProfileOwnerAndTier(context.TODO(), c, clusterSummary) Expect(err).To(BeNil()) Expect(owner).To(BeNil()) }) diff --git a/go.mod b/go.mod index 4be75ffe..5d8303d4 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,7 @@ require ( github.com/onsi/ginkgo/v2 v2.17.3 github.com/onsi/gomega v1.33.1 github.com/pkg/errors v0.9.1 - github.com/projectsveltos/libsveltos v0.29.1-0.20240506121921-5f0f0328e94b + github.com/projectsveltos/libsveltos v0.29.2-0.20240514105451-8d864115aeb3 github.com/prometheus/client_golang v1.19.1 github.com/spf13/pflag v1.0.5 github.com/yuin/gopher-lua v1.1.1 diff --git a/go.sum b/go.sum index 58c0fecd..e5e2d613 100644 --- a/go.sum +++ b/go.sum @@ -368,8 +368,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/poy/onpar v1.1.2 h1:QaNrNiZx0+Nar5dLgTVp5mXkyoVFIbepjyEoGSnhbAY= github.com/poy/onpar v1.1.2/go.mod h1:6X8FLNoxyr9kkmnlqpK6LSoiOtrO6MICtWwEuWkLjzg= -github.com/projectsveltos/libsveltos v0.29.1-0.20240506121921-5f0f0328e94b h1:WVznqRebWYjZlCBEgQSW9FCxiKHfcLT2Uhtw071tNeo= -github.com/projectsveltos/libsveltos v0.29.1-0.20240506121921-5f0f0328e94b/go.mod h1:hCFMjAjY3d/h3Nw2ZBREFqDenhqYyZl2lJlybpEcuzM= +github.com/projectsveltos/libsveltos v0.29.2-0.20240514105451-8d864115aeb3 h1:ELFLrYMhMHAgkl4HrDy9iwFJkL93HRUAvYH/jfm0+R4= +github.com/projectsveltos/libsveltos v0.29.2-0.20240514105451-8d864115aeb3/go.mod h1:hCFMjAjY3d/h3Nw2ZBREFqDenhqYyZl2lJlybpEcuzM= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g= diff --git a/manifest/manifest.yaml b/manifest/manifest.yaml index 0042f71f..fe62150e 100644 --- a/manifest/manifest.yaml +++ b/manifest/manifest.yaml @@ -488,6 +488,14 @@ spec: clusterSelector: description: ClusterSelector identifies clusters to associate to. type: string + continueOnConflict: + default: false + description: |- + By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + if conflicts are detected for previous resources. + type: boolean dependsOn: description: |- DependsOn specifies a list of other ClusterProfiles that this instance depends on. @@ -1046,6 +1054,21 @@ spec: - resource type: object type: array + tier: + default: 100 + description: |- + Tier controls the order of deployment for ClusterProfile or Profile resources targeting + the same cluster resources. + Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + Tier value takes priority and deploys the resource. + Higher Tier values represent lower priority. The default Tier value is 100. + Using Tiers provides finer control over resource deployment within your cluster, particularly useful + when multiple configurations manage the same resources. + format: int32 + minimum: 1 + type: integer validateHealths: description: |- ValidateHealths is a slice of Lua functions to run against @@ -1798,6 +1821,14 @@ spec: description: ClusterSelector identifies clusters to associate to. type: string + continueOnConflict: + default: false + description: |- + By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + if conflicts are detected for previous resources. + type: boolean dependsOn: description: |- DependsOn specifies a list of other ClusterProfiles that this instance depends on. @@ -2358,6 +2389,21 @@ spec: - resource type: object type: array + tier: + default: 100 + description: |- + Tier controls the order of deployment for ClusterProfile or Profile resources targeting + the same cluster resources. + Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + Tier value takes priority and deploys the resource. + Higher Tier values represent lower priority. The default Tier value is 100. + Using Tiers provides finer control over resource deployment within your cluster, particularly useful + when multiple configurations manage the same resources. + format: int32 + minimum: 1 + type: integer validateHealths: description: |- ValidateHealths is a slice of Lua functions to run against @@ -2667,6 +2713,14 @@ spec: clusterSelector: description: ClusterSelector identifies clusters to associate to. type: string + continueOnConflict: + default: false + description: |- + By default (when ContinueOnConflict is unset or set to false), Sveltos stops deployment after + encountering the first conflict (e.g., another ClusterProfile already deployed the resource). + If set to true, Sveltos will attempt to deploy remaining resources in the ClusterProfile even + if conflicts are detected for previous resources. + type: boolean dependsOn: description: |- DependsOn specifies a list of other ClusterProfiles that this instance depends on. @@ -3225,6 +3279,21 @@ spec: - resource type: object type: array + tier: + default: 100 + description: |- + Tier controls the order of deployment for ClusterProfile or Profile resources targeting + the same cluster resources. + Imagine two configurations (ClusterProfiles or Profiles) trying to deploy the same resource (a Kubernetes + resource or an helm chart). By default, the first one to reach the cluster "wins" and deploys it. + Tier allows you to override this. When conflicts arise, the ClusterProfile or Profile with the **lowest** + Tier value takes priority and deploys the resource. + Higher Tier values represent lower priority. The default Tier value is 100. + Using Tiers provides finer control over resource deployment within your cluster, particularly useful + when multiple configurations manage the same resources. + format: int32 + minimum: 1 + type: integer validateHealths: description: |- ValidateHealths is a slice of Lua functions to run against diff --git a/pkg/scope/clustersummary_test.go b/pkg/scope/clustersummary_test.go index cc0b0b15..63b5be94 100644 --- a/pkg/scope/clustersummary_test.go +++ b/pkg/scope/clustersummary_test.go @@ -154,7 +154,7 @@ var _ = Describe("ClusterSummaryScope", func() { Expect(found).To(Equal(true)) }) - It("SetFeatureSummary updates ClusterSummary Status FeatureSummary when nil", func() { + It("SetFailureMessage updates ClusterSummary Status FeatureSummary when nil", func() { params := &scope.ClusterSummaryScopeParams{ Client: c, Profile: clusterProfile, @@ -230,7 +230,7 @@ var _ = Describe("ClusterSummaryScope", func() { Expect(clusterSummary.Status.FeatureSummaries[0].Hash).To(Equal(hash)) }) - It("SetFeatureSummary updates ClusterSummary Status FeatureSummary when nil", func() { + It("SetFeatureStatus updates ClusterSummary Status FeatureSummary when nil", func() { params := &scope.ClusterSummaryScopeParams{ Client: c, Profile: clusterProfile, diff --git a/test/fv/dryrun_test.go b/test/fv/dryrun_test.go index 6a60e1ef..25e3d751 100644 --- a/test/fv/dryrun_test.go +++ b/test/fv/dryrun_test.go @@ -267,11 +267,9 @@ var _ = Describe("DryRun", func() { if err != nil { return err } - // Another ClusterProfile is managing this, by referencing same ConfigMap this ClusterProfile is, so no conflict. - // Content of ConfigMap has not changed. Action is actuall NoAction as changing SyncMode will cause reconciliation - // but no update will happen since ConfigMap has not changed since deployment time. + // Another ClusterProfile is managing this, even though by referencing same ConfigMap this ClusterProfile is, so conflict. err = verifyResourceReport(currentClusterReport, "kong", "kong-serviceaccount", - "ServiceAccount", "", string(configv1alpha1.NoResourceAction)) + "ServiceAccount", "", string(configv1alpha1.ConflictResourceAction)) if err != nil { return err } diff --git a/test/fv/missing_reference_test.go b/test/fv/missing_reference_test.go index c3f4955c..78496b24 100644 --- a/test/fv/missing_reference_test.go +++ b/test/fv/missing_reference_test.go @@ -44,7 +44,7 @@ var _ = Describe("Missing Reference", func() { namePrefix = "missing-reference-" ) - It("Deploy and updates resources referenced in ResourceRefs correctly", Label("FV", "EXTENDED"), func() { + It("Deploy and updates resources referenced in ResourceRefs correctly. Handles missing references by reporting an error.", Label("FV", "EXTENDED"), func() { Byf("Create a ClusterProfile matching Cluster %s/%s", kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) clusterProfile := getClusterProfile(namePrefix, map[string]string{key: value}) clusterProfile.Spec.SyncMode = configv1alpha1.SyncModeContinuous diff --git a/test/fv/profile_test.go b/test/fv/profile_test.go index 2896c397..0b724fe1 100644 --- a/test/fv/profile_test.go +++ b/test/fv/profile_test.go @@ -71,7 +71,7 @@ var _ = Describe("Profile", func() { ns := randomString() jobName := randomString() - Byf("Create a configMap with a job") + Byf("Create a configMap with a job %s/%s", ns, jobName) configMap := createConfigMapWithPolicy(defaultNamespace, randomString(), fmt.Sprintf(jobTemplate, ns, jobName)) Expect(k8sClient.Create(context.TODO(), configMap)).To(Succeed()) @@ -106,7 +106,7 @@ var _ = Describe("Profile", func() { Expect(err).To(BeNil()) Expect(workloadClient).ToNot(BeNil()) - Byf("Verifying proper Job is present in the workload cluster") + Byf("Verifying proper Job %s/%s is present in the workload cluster", ns, jobName) Eventually(func() bool { currentJob := &batchv1.Job{} err = workloadClient.Get(context.TODO(), diff --git a/test/fv/tier_test.go b/test/fv/tier_test.go new file mode 100644 index 00000000..89d8fec6 --- /dev/null +++ b/test/fv/tier_test.go @@ -0,0 +1,208 @@ +/* +Copyright 2024. projectsveltos.io. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fv_test + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + + configv1alpha1 "github.com/projectsveltos/addon-controller/api/v1alpha1" + "github.com/projectsveltos/addon-controller/controllers" +) + +var _ = Describe("Helm", Serial, func() { + const ( + namePrefix = "tier-" + ) + + It("Use tier to solve conflicts", Label("FV1", "EXTENDED"), func() { + Byf("Create a ClusterProfile matching Cluster %s/%s", kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) + clusterProfile := getClusterProfile(namePrefix, map[string]string{key: value}) + clusterProfile.Spec.SyncMode = configv1alpha1.SyncModeContinuous + Expect(k8sClient.Create(context.TODO(), clusterProfile)).To(Succeed()) + + verifyClusterProfileMatches(clusterProfile) + + verifyClusterSummary(controllers.ClusterProfileLabelName, + clusterProfile.Name, &clusterProfile.Spec, kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) + + Byf("Update ClusterProfile %s to deploy helm charts", clusterProfile.Name) + currentClusterProfile := &configv1alpha1.ClusterProfile{} + Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: clusterProfile.Name}, currentClusterProfile)).To(Succeed()) + currentClusterProfile.Spec.HelmCharts = []configv1alpha1.HelmChart{ + { + RepositoryURL: "https://kyverno.github.io/kyverno/", + RepositoryName: "kyverno", + ChartName: "kyverno/kyverno", + ChartVersion: "v3.1.1", + ReleaseName: "kyverno-latest", + ReleaseNamespace: "kyverno", + HelmChartAction: configv1alpha1.HelmChartActionInstall, + }, + { + RepositoryURL: "https://prometheus-community.github.io/helm-charts", + RepositoryName: "prometheus-community", + ChartName: "prometheus-community/prometheus", + ChartVersion: "23.4.0", + ReleaseName: "prometheus", + ReleaseNamespace: "prometheus", + HelmChartAction: configv1alpha1.HelmChartActionInstall, + }, + { + RepositoryURL: "https://grafana.github.io/helm-charts", + RepositoryName: "grafana", + ChartName: "grafana/grafana", + ChartVersion: "6.58.9", + ReleaseName: "grafana", + ReleaseNamespace: "grafana", + HelmChartAction: configv1alpha1.HelmChartActionInstall, + }, + } + currentClusterProfile.Spec.Tier = 90 + currentClusterProfile.Spec.ContinueOnConflict = true + + Expect(k8sClient.Update(context.TODO(), currentClusterProfile)).To(Succeed()) + + clusterSummary := verifyClusterSummary(controllers.ClusterProfileLabelName, + currentClusterProfile.Name, ¤tClusterProfile.Spec, + kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) + + Byf("Getting client to access the workload cluster") + workloadClient, err := getKindWorkloadClusterKubeconfig() + Expect(err).To(BeNil()) + Expect(workloadClient).ToNot(BeNil()) + + Byf("Verifying kyverno deployment is created in the workload cluster") + Eventually(func() error { + depl := &appsv1.Deployment{} + return workloadClient.Get(context.TODO(), + types.NamespacedName{Namespace: "kyverno", Name: "kyverno-admission-controller"}, depl) + }, timeout, pollingInterval).Should(BeNil()) + + Byf("Verifying ClusterSummary %s status is set to Deployed for Helm feature", clusterSummary.Name) + verifyFeatureStatusIsProvisioned(kindWorkloadCluster.Namespace, clusterSummary.Name, configv1alpha1.FeatureHelm) + + charts := []configv1alpha1.Chart{ + {ReleaseName: "kyverno-latest", ChartVersion: "3.1.1", Namespace: "kyverno"}, + {ReleaseName: "grafana", ChartVersion: "6.58.9", Namespace: "grafana"}, + {ReleaseName: "prometheus", ChartVersion: "23.4.0", Namespace: "prometheus"}, + } + + verifyClusterConfiguration(configv1alpha1.ClusterProfileKind, clusterProfile.Name, + clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, configv1alpha1.FeatureHelm, + nil, charts) + + Byf("Creating another ClusterProfile matching the cluster") + newClusterProfile := getClusterProfile(namePrefix, map[string]string{key: value}) + newClusterProfile.Spec.SyncMode = configv1alpha1.SyncModeContinuous + Expect(k8sClient.Create(context.TODO(), newClusterProfile)).To(Succeed()) + + Byf("configuring the new clusterProfile to deploy Kyverno") + Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: newClusterProfile.Name}, currentClusterProfile)).To(Succeed()) + currentClusterProfile.Spec.HelmCharts = []configv1alpha1.HelmChart{ + { + RepositoryURL: "https://kyverno.github.io/kyverno/", + RepositoryName: "kyverno", + ChartName: "kyverno/kyverno", + ChartVersion: "v3.1.4", + ReleaseName: "kyverno-latest", + ReleaseNamespace: "kyverno", + HelmChartAction: configv1alpha1.HelmChartActionInstall, + }, + } + Expect(k8sClient.Update(context.TODO(), currentClusterProfile)).To(Succeed()) + + newClusterSummary := verifyClusterSummary(controllers.ClusterProfileLabelName, + currentClusterProfile.Name, ¤tClusterProfile.Spec, + kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) + + Byf("Verifying new ClusterSummary reports a conflict") + Eventually(func() bool { + currentClusterSummary := &configv1alpha1.ClusterSummary{} + err = k8sClient.Get(context.TODO(), + types.NamespacedName{Namespace: newClusterSummary.Namespace, Name: newClusterSummary.Name}, currentClusterSummary) + if err != nil { + return false + } + if len(currentClusterSummary.Status.HelmReleaseSummaries) != 1 { + return false + } + return currentClusterSummary.Status.HelmReleaseSummaries[0].Status == configv1alpha1.HelmChartStatusConflict + }, timeout, pollingInterval).Should(BeTrue()) + + verifyClusterConfiguration(configv1alpha1.ClusterProfileKind, clusterProfile.Name, + clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, configv1alpha1.FeatureHelm, + nil, charts) + + Byf("Changing ClusterProfile %s tier", newClusterProfile.Name) + Expect(k8sClient.Get(context.TODO(), types.NamespacedName{Name: newClusterProfile.Name}, currentClusterProfile)).To(Succeed()) + currentClusterProfile.Spec.Tier = 50 + Expect(k8sClient.Update(context.TODO(), currentClusterProfile)).To(Succeed()) + + verifyClusterSummary(controllers.ClusterProfileLabelName, + currentClusterProfile.Name, ¤tClusterProfile.Spec, + kindWorkloadCluster.Namespace, kindWorkloadCluster.Name) + + Byf("Verifying new ClusterSummary does not report a conflict") + Eventually(func() bool { + currentClusterSummary := &configv1alpha1.ClusterSummary{} + err = k8sClient.Get(context.TODO(), + types.NamespacedName{Namespace: newClusterSummary.Namespace, Name: newClusterSummary.Name}, currentClusterSummary) + if err != nil { + return false + } + if len(currentClusterSummary.Status.HelmReleaseSummaries) != 1 { + return false + } + return currentClusterSummary.Status.HelmReleaseSummaries[0].Status == configv1alpha1.HelmChartStatusManaging + }, timeout, pollingInterval).Should(BeTrue()) + + charts = []configv1alpha1.Chart{ + {ReleaseName: "grafana", ChartVersion: "6.58.9", Namespace: "grafana"}, + {ReleaseName: "prometheus", ChartVersion: "23.4.0", Namespace: "prometheus"}, + } + + verifyClusterConfiguration(configv1alpha1.ClusterProfileKind, clusterProfile.Name, + clusterSummary.Spec.ClusterNamespace, clusterSummary.Spec.ClusterName, configv1alpha1.FeatureHelm, + nil, charts) + + charts = []configv1alpha1.Chart{ + {ReleaseName: "kyverno-latest", ChartVersion: "3.1.4", Namespace: "kyverno"}, + } + + verifyClusterConfiguration(configv1alpha1.ClusterProfileKind, newClusterProfile.Name, + newClusterSummary.Spec.ClusterNamespace, newClusterSummary.Spec.ClusterName, configv1alpha1.FeatureHelm, + nil, charts) + + deleteClusterProfile(clusterProfile) + deleteClusterProfile(newClusterProfile) + + Byf("Verifying kyverno deployment is removed from workload cluster") + Eventually(func() bool { + depl := &appsv1.Deployment{} + err = workloadClient.Get(context.TODO(), + types.NamespacedName{Namespace: "kyverno", Name: "kyverno-latest"}, depl) + return apierrors.IsNotFound(err) + }, timeout, pollingInterval).Should(BeTrue()) + }) +})