Skip to content

Commit 4954fe3

Browse files
committed
add scheduler cluster deletion and minor fixes
1 parent 5f62323 commit 4954fe3

File tree

12 files changed

+142
-18
lines changed

12 files changed

+142
-18
lines changed

api/clusters/v1alpha1/accessrequest_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ type AccessRequestStatus struct {
3838
CommonStatus `json:",inline"`
3939

4040
// Phase is the current phase of the request.
41+
// +kubebuilder:default=Pending
42+
// +kubebuilder:validation:Enum=Pending;Granted;Denied
4143
Phase RequestPhase `json:"phase"`
4244

4345
// SecretRef holds the reference to the secret that contains the actual credentials.

api/clusters/v1alpha1/cluster_types.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
88
"k8s.io/apimachinery/pkg/runtime"
9+
"k8s.io/apimachinery/pkg/util/sets"
910
)
1011

1112
// ClusterSpec defines the desired state of Cluster
@@ -131,12 +132,19 @@ func (cs *ClusterStatus) SetProviderStatus(from any) error {
131132

132133
// GetTenancyCount returns the number of ClusterRequests currently pointing to this cluster.
133134
// This is determined by counting the finalizers that have the corresponding prefix.
135+
// Note that only unique finalizers are counted, so if there are multiple identical request finalizers
136+
// (which should not happen), this method's return value might not match the actual number of finalizers with the prefix.
134137
func (c *Cluster) GetTenancyCount() int {
135-
count := 0
138+
return c.GetRequestUIDs().Len()
139+
}
140+
141+
// GetRequestUIDs returns the UIDs of all ClusterRequests that have marked this cluster with a corresponding finalizer.
142+
func (c *Cluster) GetRequestUIDs() sets.Set[string] {
143+
res := sets.New[string]()
136144
for _, fin := range c.Finalizers {
137145
if strings.HasPrefix(fin, RequestFinalizerOnClusterPrefix) {
138-
count++
146+
res.Insert(strings.TrimPrefix(fin, RequestFinalizerOnClusterPrefix))
139147
}
140148
}
141-
return count
149+
return res
142150
}

api/clusters/v1alpha1/clusterrequest_types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ type ClusterRequestStatus struct {
1616
CommonStatus `json:",inline"`
1717

1818
// Phase is the current phase of the request.
19+
// +kubebuilder:default=Pending
20+
// +kubebuilder:validation:Enum=Pending;Granted;Denied
1921
Phase RequestPhase `json:"phase"`
2022

2123
// Cluster is the reference to the Cluster that was returned as a result of a granted request.

api/crds/manifests/clusters.openmcp.cloud_accessrequests.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,12 @@ spec:
209209
format: int64
210210
type: integer
211211
phase:
212+
default: Pending
212213
description: Phase is the current phase of the request.
214+
enum:
215+
- Pending
216+
- Granted
217+
- Denied
213218
type: string
214219
reason:
215220
description: Reason is expected to contain a CamelCased string that

api/crds/manifests/clusters.openmcp.cloud_clusterrequests.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,12 @@ spec:
130130
format: int64
131131
type: integer
132132
phase:
133+
default: Pending
133134
description: Phase is the current phase of the request.
135+
enum:
136+
- Pending
137+
- Granted
138+
- Denied
134139
type: string
135140
reason:
136141
description: Reason is expected to contain a CamelCased string that

docs/controller/scheduler.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,17 @@ scheduler:
3333
tenancy: Exclusive
3434
platform:
3535
template:
36+
metadata:
37+
labels:
38+
clusters.openmcp.cloud/delete-without-requests: "false"
3639
spec:
3740
profile: gcp-large
3841
tenancy: Shared
3942
onboarding:
4043
template:
44+
metadata:
45+
labels:
46+
clusters.openmcp.cloud/delete-without-requests: "false"
4147
spec:
4248
profile: gcp-workerless
4349
tenancy: Shared
@@ -108,3 +114,9 @@ If the cluster template from the configuration for the requested purpose has `me
108114
For clusters with `Exclusive` tenancy, or for `Shared` ones with a limited tenancy count, the scheduler uses `metadata.generateName` from the cluster template or defaults it to `<purpose>-`, if not set.
109115

110116
For clusters with unlimited tenancy count, `metadata.generateName` takes precedence, if specified in the template, with `metadata.name` being evaluated second. If neither is specified, `<purpose>` is used as `metadata.name` (as there should be only one instance of this cluster in this namespace due to the unlimited tenancy count, there is no need to add a randomized suffix to the name).
117+
118+
## Deletion of Clusters
119+
120+
By default, the scheduler marks every `Cluster` that it has created itself with a `clusters.openmcp.cloud/delete-without-requests: "true"` label. When a `ClusterRequest` is deleted, the scheduler removes the request's finalizers from all clusters and if it was the last request finalizer on that cluster and the cluster has the aforementioned label, the scheduler will delete the cluster.
121+
122+
To prevent the scheduler from deleting a cluster that was created by it after the last request finalizer has been removed from the `Cluster` resource, add the label with any value except `"true"` to the cluster's template in the scheduler configuration.

internal/config/config_scheduler.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ type SchedulerSelectors struct {
7070
Requests *metav1.LabelSelector `json:"requests,omitempty"`
7171
}
7272
type CompletedSchedulerSelectors struct {
73-
Clusters labels.Selector
74-
ClusterRequests labels.Selector
73+
Clusters labels.Selector
74+
Requests labels.Selector
7575
}
7676

7777
func (c *SchedulerConfig) Default(_ *field.Path) error {
@@ -175,7 +175,7 @@ func (c *SchedulerConfig) Complete(fldPath *field.Path) error {
175175
}
176176
if c.Selectors.Requests != nil {
177177
var err error
178-
c.CompletedSelectors.ClusterRequests, err = metav1.LabelSelectorAsSelector(c.Selectors.Requests)
178+
c.CompletedSelectors.Requests, err = metav1.LabelSelectorAsSelector(c.Selectors.Requests)
179179
if err != nil {
180180
return field.Invalid(fldPath.Child("selectors").Child("requests"), c.Selectors.Requests, err.Error())
181181
}
@@ -184,8 +184,8 @@ func (c *SchedulerConfig) Complete(fldPath *field.Path) error {
184184
if c.CompletedSelectors.Clusters == nil {
185185
c.CompletedSelectors.Clusters = labels.Everything()
186186
}
187-
if c.CompletedSelectors.ClusterRequests == nil {
188-
c.CompletedSelectors.ClusterRequests = labels.Everything()
187+
if c.CompletedSelectors.Requests == nil {
188+
c.CompletedSelectors.Requests = labels.Everything()
189189
}
190190

191191
for purpose, definition := range c.PurposeMappings {

internal/controllers/scheduler/controller.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
8181
log.Info("Resource not found")
8282
return ReconcileResult{}
8383
}
84-
return ReconcileResult{ReconcileError: errutils.WithReason(fmt.Errorf("unable to get resource '%s' from cluster: %w", req.NamespacedName.String(), err), cconst.ReasonOnboardingClusterInteractionProblem)}
84+
return ReconcileResult{ReconcileError: errutils.WithReason(fmt.Errorf("unable to get resource '%s' from cluster: %w", req.NamespacedName.String(), err), cconst.ReasonPlatformClusterInteractionProblem)}
8585
}
8686

8787
// handle operation annotation
@@ -95,7 +95,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
9595
case clustersv1alpha1.OperationAnnotationValueReconcile:
9696
log.Debug("Removing reconcile operation annotation from resource")
9797
if err := ctrlutils.EnsureAnnotation(ctx, r.PlatformCluster.Client(), cr, clustersv1alpha1.OperationAnnotation, "", true, ctrlutils.DELETE); err != nil {
98-
return ReconcileResult{ReconcileError: errutils.WithReason(fmt.Errorf("error removing operation annotation: %w", err), cconst.ReasonOnboardingClusterInteractionProblem)}
98+
return ReconcileResult{ReconcileError: errutils.WithReason(fmt.Errorf("error removing operation annotation: %w", err), cconst.ReasonPlatformClusterInteractionProblem)}
9999
}
100100
}
101101
}
@@ -116,7 +116,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
116116
if controllerutil.AddFinalizer(cr, clustersv1alpha1.ClusterRequestFinalizer) {
117117
log.Info("Adding finalizer")
118118
if err := r.PlatformCluster.Client().Patch(ctx, cr, client.MergeFrom(rr.OldObject)); err != nil {
119-
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer on resource '%s': %w", req.NamespacedName.String(), err), cconst.ReasonOnboardingClusterInteractionProblem)
119+
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer on resource '%s': %w", req.NamespacedName.String(), err), cconst.ReasonPlatformClusterInteractionProblem)
120120
return rr
121121
}
122122
}
@@ -147,7 +147,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
147147
}
148148
clusterList := &clustersv1alpha1.ClusterList{}
149149
if err := r.PlatformCluster.Client().List(ctx, clusterList, client.InNamespace(namespace), client.MatchingLabelsSelector{Selector: cDef.CompletedSelector}); err != nil {
150-
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error listing Clusters: %w", err), cconst.ReasonOnboardingClusterInteractionProblem)
150+
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error listing Clusters: %w", err), cconst.ReasonPlatformClusterInteractionProblem)
151151
return rr
152152
}
153153
clusters := make([]*clustersv1alpha1.Cluster, len(clusterList.Items))
@@ -233,7 +233,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
233233
if controllerutil.AddFinalizer(cluster, fin) {
234234
log.Debug("Adding finalizer to cluster", "clusterName", cluster.Name, "clusterNamespace", cluster.Namespace, "finalizer", fin)
235235
if err := r.PlatformCluster.Client().Patch(ctx, cluster, client.MergeFrom(oldCluster)); err != nil {
236-
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer '%s' on cluster '%s/%s': %w", fin, cluster.Namespace, cluster.Name, err), cconst.ReasonOnboardingClusterInteractionProblem)
236+
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer '%s' on cluster '%s/%s': %w", fin, cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem)
237237
return rr
238238
}
239239
}
@@ -303,7 +303,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
303303
rr.ReconcileError = errutils.WithReason(fmt.Errorf("Cluster '%s/%s' already exists, this is not supposed to happen", cluster.Namespace, cluster.Name), cconst.ReasonInternalError)
304304
return rr
305305
}
306-
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error creating cluster '%s/%s': %w", cluster.Namespace, cluster.Name, err), cconst.ReasonOnboardingClusterInteractionProblem)
306+
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error creating cluster '%s/%s': %w", cluster.Namespace, cluster.Name, err), cconst.ReasonPlatformClusterInteractionProblem)
307307
return rr
308308
}
309309
}
@@ -322,7 +322,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
322322
fin := cr.FinalizerForCluster()
323323
clusterList := &clustersv1alpha1.ClusterList{}
324324
if err := r.PlatformCluster.Client().List(ctx, clusterList, client.MatchingLabelsSelector{Selector: r.Config.CompletedSelectors.Clusters}); err != nil {
325-
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error listing Clusters: %w", err), cconst.ReasonOnboardingClusterInteractionProblem)
325+
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error listing Clusters: %w", err), cconst.ReasonPlatformClusterInteractionProblem)
326326
return rr
327327
}
328328
clusters := make([]*clustersv1alpha1.Cluster, len(clusterList.Items))
@@ -344,7 +344,17 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
344344
oldCluster := c.DeepCopy()
345345
if controllerutil.RemoveFinalizer(c, fin) {
346346
if err := r.PlatformCluster.Client().Patch(ctx, c, client.MergeFrom(oldCluster)); err != nil {
347-
errs.Append(errutils.WithReason(fmt.Errorf("error patching finalizer '%s' on cluster '%s/%s': %w", fin, c.Namespace, c.Name, err), cconst.ReasonOnboardingClusterInteractionProblem))
347+
errs.Append(errutils.WithReason(fmt.Errorf("error patching finalizer '%s' on cluster '%s/%s': %w", fin, c.Namespace, c.Name, err), cconst.ReasonPlatformClusterInteractionProblem))
348+
}
349+
}
350+
if c.GetTenancyCount() == 0 && ctrlutils.HasLabelWithValue(c, clustersv1alpha1.DeleteWithoutRequestsLabel, "true") {
351+
log.Info("Deleting cluster without requests", "clusterName", c.Name, "clusterNamespace", c.Namespace)
352+
if err := r.PlatformCluster.Client().Delete(ctx, c); err != nil {
353+
if apierrors.IsNotFound(err) {
354+
log.Info("Cluster already deleted", "clusterName", c.Name, "clusterNamespace", c.Namespace)
355+
} else {
356+
errs.Append(errutils.WithReason(fmt.Errorf("error deleting cluster '%s/%s': %w", c.Namespace, c.Name, err), cconst.ReasonPlatformClusterInteractionProblem))
357+
}
348358
}
349359
}
350360
}
@@ -357,7 +367,7 @@ func (r *ClusterScheduler) reconcile(ctx context.Context, log logging.Logger, re
357367
if controllerutil.RemoveFinalizer(cr, clustersv1alpha1.ClusterRequestFinalizer) {
358368
log.Info("Removing finalizer")
359369
if err := r.PlatformCluster.Client().Patch(ctx, cr, client.MergeFrom(rr.OldObject)); err != nil {
360-
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer on resource '%s': %w", req.NamespacedName.String(), err), cconst.ReasonOnboardingClusterInteractionProblem)
370+
rr.ReconcileError = errutils.WithReason(fmt.Errorf("error patching finalizer on resource '%s': %w", req.NamespacedName.String(), err), cconst.ReasonPlatformClusterInteractionProblem)
361371
return rr
362372
}
363373
}
@@ -374,9 +384,10 @@ func (r *ClusterScheduler) SetupWithManager(mgr ctrl.Manager) error {
374384
// watch ClusterRequest resources
375385
For(&clustersv1alpha1.ClusterRequest{}).
376386
WithEventFilter(predicate.And(
377-
ctrlutils.LabelSelectorPredicate(r.Config.CompletedSelectors.ClusterRequests),
387+
ctrlutils.LabelSelectorPredicate(r.Config.CompletedSelectors.Requests),
378388
predicate.Or(
379389
predicate.GenerationChangedPredicate{},
390+
ctrlutils.DeletionTimestampChangedPredicate{},
380391
ctrlutils.GotAnnotationPredicate(clustersv1alpha1.OperationAnnotation, clustersv1alpha1.OperationAnnotationValueReconcile),
381392
ctrlutils.LostAnnotationPredicate(clustersv1alpha1.OperationAnnotation, clustersv1alpha1.OperationAnnotationValueIgnore),
382393
),

internal/controllers/scheduler/controller_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
. "github.com/onsi/gomega"
99
. "github.com/onsi/gomega/gstruct"
1010

11+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1112
"k8s.io/apimachinery/pkg/runtime"
1213
"k8s.io/apimachinery/pkg/util/uuid"
1314
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -399,4 +400,50 @@ var _ = Describe("Scheduler", func() {
399400
Expect(req3.Status.Cluster.Namespace).To(Equal("foo"))
400401
})
401402

403+
FIt("should handle the delete-without-requests label correctly", func() {
404+
_, env := defaultTestSetup("testdata", "test-05")
405+
406+
// should create a new cluster
407+
req := &clustersv1alpha1.ClusterRequest{}
408+
Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey("delete", "foo"), req)).To(Succeed())
409+
Expect(req.Status.Cluster).To(BeNil())
410+
411+
env.ShouldReconcile(testutils.RequestFromObject(req))
412+
Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req)).To(Succeed())
413+
Expect(req.Status.Cluster).ToNot(BeNil())
414+
cluster := &clustersv1alpha1.Cluster{}
415+
Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey(req.Status.Cluster.Name, req.Status.Cluster.Namespace), cluster)).To(Succeed())
416+
Expect(cluster.Labels).To(HaveKeyWithValue(clustersv1alpha1.DeleteWithoutRequestsLabel, "true"))
417+
418+
// should delete the cluster
419+
Expect(env.Client().Delete(env.Ctx, req)).To(Succeed())
420+
env.ShouldReconcile(testutils.RequestFromObject(req))
421+
Eventually(func() bool {
422+
return apierrors.IsNotFound(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req), req))
423+
}, 3).Should(BeTrue(), "Request should be deleted")
424+
Eventually(func() bool {
425+
return apierrors.IsNotFound(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(cluster), cluster))
426+
}, 3).Should(BeTrue(), "Cluster should be deleted")
427+
428+
// should create a new cluster
429+
req2 := &clustersv1alpha1.ClusterRequest{}
430+
Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey("no-delete", "foo"), req2)).To(Succeed())
431+
Expect(req2.Status.Cluster).To(BeNil())
432+
433+
env.ShouldReconcile(testutils.RequestFromObject(req2))
434+
Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req2), req2)).To(Succeed())
435+
Expect(req2.Status.Cluster).ToNot(BeNil())
436+
Expect(env.Client().Get(env.Ctx, ctrlutils.ObjectKey(req2.Status.Cluster.Name, req2.Status.Cluster.Namespace), cluster)).To(Succeed())
437+
Expect(cluster.Labels).To(HaveKeyWithValue(clustersv1alpha1.DeleteWithoutRequestsLabel, "false"))
438+
439+
// should not delete the cluster
440+
Expect(env.Client().Delete(env.Ctx, req2)).To(Succeed())
441+
env.ShouldReconcile(testutils.RequestFromObject(req2))
442+
Eventually(func() bool {
443+
return apierrors.IsNotFound(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(req2), req2))
444+
}, 3).Should(BeTrue(), "Request should be deleted")
445+
Expect(env.Client().Get(env.Ctx, client.ObjectKeyFromObject(cluster), cluster)).To(Succeed(), "Cluster should not be deleted")
446+
Expect(cluster.DeletionTimestamp).To(BeZero(), "Cluster should not be marked for deletion")
447+
})
448+
402449
})
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: clusters.openmcp.cloud/v1alpha1
2+
kind: ClusterRequest
3+
metadata:
4+
name: delete
5+
namespace: foo
6+
uid: 4d6aa495-54c0-4df2-bc7b-05b103f02e69
7+
spec:
8+
purpose: delete

0 commit comments

Comments
 (0)