From 3041cd624d5809ec444910ecdee8ff2b582100ec Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Fri, 20 Oct 2023 12:15:18 +0200 Subject: [PATCH 1/8] feat: add remediation retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If `.Spec.Remediation` is not defined, it will retry indefinitely. If `.Spec.Remediation.Retries` is defined, it will stop requeuing after it reached the limit. ``` ❯ date && \ kubectl logs -n flux-system \ --selector='app.kubernetes.io/name=tf-controller' \ --tail=5000 --since-time="2023-10-20T12:40:00.000Z" \ | grep -E '(Reconciliation failed, retry)|(reached maximum number)' \ | jq --raw-output '{"ts": .ts, "msg": .msg}' Fri Oct 20 02:50:27 PM CEST 2023 { "ts": "2023-10-20T12:41:30.591Z", "msg": "Reconciliation failed, retry (1/3) after 15s. Generation: 1" } { "ts": "2023-10-20T12:42:07.218Z", "msg": "Reconciliation failed, retry (2/3) after 15s. Generation: 1" } { "ts": "2023-10-20T12:42:43.926Z", "msg": "Reconciliation failed, retry (3/3) after 15s. Generation: 1" } { "ts": "2023-10-20T12:43:20.842Z", "msg": "Resource reached maximum number of retries. Generation: 1" } ``` Resolves #870 Signed-off-by: Balazs Nadasdi --- api/v1alpha2/terraform_types.go | 46 +++++++++++++++++++ charts/tf-controller/templates/crds.yaml | 18 ++++++++ .../infra.contrib.fluxcd.io_terraforms.yaml | 16 +++++++ config/tilt/test/tf-dev-subject.yaml | 2 + controllers/tf_controller.go | 37 ++++++++++++++- 5 files changed, 118 insertions(+), 1 deletion(-) diff --git a/api/v1alpha2/terraform_types.go b/api/v1alpha2/terraform_types.go index f1e8e90e..acd1af5b 100644 --- a/api/v1alpha2/terraform_types.go +++ b/api/v1alpha2/terraform_types.go @@ -261,6 +261,11 @@ type TerraformSpec struct { // BranchPlanner configuration. // +optional BranchPlanner *BranchPlanner `json:"branchPlanner,omitempty"` + + // Remediation holds the remediation configuration for when the reconciliation + // fails. The default is to not perform any action. + // +optional + Remediation *Remediation `json:"remediation,omitempty"` } type BranchPlanner struct { @@ -271,6 +276,14 @@ type BranchPlanner struct { EnablePathScope bool `json:"enablePathScope"` } +type Remediation struct { + // Retries is the number of retries that should be attempted on failures + // before bailing. Defaults to '0', a negative integer equals to unlimited + // retries. + // +optional + Retries int64 `json:"retries,omitempty"` +} + type CloudSpec struct { // +required Organization string `json:"organization"` @@ -386,6 +399,11 @@ type TerraformStatus struct { // +optional Lock LockStatus `json:"lock,omitempty"` + + // ReconciliationFailures is the counter to track the number of reconciliation + // failures. + // +optional + ReconciliationFailures int64 `json:"reconciliationFailures,omitempty"` } // LockStatus defines the observed state of a Terraform State Lock @@ -900,6 +918,34 @@ func (in *Terraform) GetRunnerHostname(target string, clusterDomain string) stri } } +func (in *Terraform) GetRetries() int64 { + if in.Spec.Remediation == nil { + return 0 + } + + return in.Spec.Remediation.Retries +} + +func (in *Terraform) GetReconciliationFailures() int64 { + return in.Status.ReconciliationFailures +} + +func (in *Terraform) IncrementReconciliationFailures() { + in.Status.ReconciliationFailures++ +} + +func (in *Terraform) ResetReconciliationFailures() { + in.Status.ReconciliationFailures = 0 +} + +func (in *Terraform) ShouldRetry() bool { + if in.Spec.Remediation == nil { + return true + } + + return in.GetReconciliationFailures() <= in.Spec.Remediation.Retries +} + func (in *TerraformSpec) GetAlwaysCleanupRunnerPod() bool { if in.AlwaysCleanupRunnerPod == nil { return true diff --git a/charts/tf-controller/templates/crds.yaml b/charts/tf-controller/templates/crds.yaml index 09c20817..68313876 100644 --- a/charts/tf-controller/templates/crds.yaml +++ b/charts/tf-controller/templates/crds.yaml @@ -5269,6 +5269,17 @@ spec: description: RefreshBeforeApply forces refreshing of the state before the apply step. type: boolean + remediation: + description: Remediation holds the remediation configuration for when + the reconciliation fails. The default is to not perform any action. + properties: + retries: + description: Retries is the number of retries that should be attempted + on failures before bailing. Defaults to '0', a negative integer + equals to unlimited retries. + format: int64 + type: integer + type: object retryInterval: description: The interval at which to retry a previously failed reconciliation. The default value is 15 when not specified. @@ -10035,6 +10046,8 @@ spec: - sourceRef type: object status: + default: + observedGeneration: -1 description: TerraformStatus defines the observed state of Terraform properties: availableOutputs: @@ -10197,6 +10210,11 @@ spec: pending: type: string type: object + reconciliationFailures: + description: ReconciliationFailures is the counter to track the number + of reconciliation failures. + format: int64 + type: integer type: object type: object served: true diff --git a/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml b/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml index 42a59790..d2604f73 100644 --- a/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml +++ b/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml @@ -5267,6 +5267,17 @@ spec: description: RefreshBeforeApply forces refreshing of the state before the apply step. type: boolean + remediation: + description: Remediation holds the remediation configuration for when + the reconciliation fails. The default is to not perform any action. + properties: + retries: + description: Retries is the number of retries that should be attempted + on failures before bailing. Defaults to '0', a negative integer + equals to unlimited retries. + format: int64 + type: integer + type: object retryInterval: description: The interval at which to retry a previously failed reconciliation. The default value is 15 when not specified. @@ -10206,6 +10217,11 @@ spec: pending: type: string type: object + reconciliationFailures: + description: ReconciliationFailures is the counter to track the number + of reconciliation failures. + format: int64 + type: integer type: object type: object served: true diff --git a/config/tilt/test/tf-dev-subject.yaml b/config/tilt/test/tf-dev-subject.yaml index 388c81ea..47c10657 100644 --- a/config/tilt/test/tf-dev-subject.yaml +++ b/config/tilt/test/tf-dev-subject.yaml @@ -27,6 +27,8 @@ spec: path: ./ interval: 20s approvePlan: auto + remediation: + retries: 3 sourceRef: kind: GitRepository name: helloworld diff --git a/controllers/tf_controller.go b/controllers/tf_controller.go index dfcff7e6..62de3555 100644 --- a/controllers/tf_controller.go +++ b/controllers/tf_controller.go @@ -429,6 +429,14 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // reconcile Terraform by applying the latest revision traceLog.Info("Run reconcile for the Terraform resource") reconciledTerraform, reconcileErr := r.reconcile(ctx, runnerClient, *terraform.DeepCopy(), sourceObj, reconciliationLoopID) + + // Check remediation. + if reconcileErr == nil { + reconciledTerraform.ResetReconciliationFailures() + } else { + reconciledTerraform.IncrementReconciliationFailures() + } + traceLog.Info("Patch the status of the Terraform resource") if err := r.patchStatus(ctx, req.NamespacedName, reconciledTerraform.Status); err != nil { log.Error(err, "unable to update status after the reconciliation is complete") @@ -445,6 +453,11 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( terraform.GetRetryInterval().String()), "revision", sourceObj.GetArtifact().Revision) + + if !terraform.ShouldRetry() { + return ctrl.Result{Requeue: false}, nil + } + return ctrl.Result{RequeueAfter: terraform.GetRetryInterval()}, nil } else if reconcileErr != nil { // broadcast the reconciliation failure and requeue at the specified retry interval @@ -455,7 +468,29 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( sourceObj.GetArtifact().Revision) traceLog.Info("Record an event for the failure") r.event(ctx, *reconciledTerraform, sourceObj.GetArtifact().Revision, eventv1.EventSeverityError, reconcileErr.Error(), nil) - return ctrl.Result{RequeueAfter: terraform.GetRetryInterval()}, nil + + if !reconciledTerraform.ShouldRetry() { + log.Info(fmt.Sprintf("Resource reached maximum number of retries. Generation: %d", reconciledTerraform.GetGeneration())) + return ctrl.Result{Requeue: false}, nil + } + + if reconciledTerraform.Spec.Remediation != nil { + log.Info(fmt.Sprintf( + "Reconciliation failed, retry (%d/%d) after %s. Generation: %d", + reconciledTerraform.GetReconciliationFailures(), + reconciledTerraform.Spec.Remediation.Retries, + reconciledTerraform.GetRetryInterval(), + reconciledTerraform.GetGeneration(), + )) + } else { + log.Info(fmt.Sprintf( + "Reconciliation failed, retry after %s. Generation: %d", + reconciledTerraform.GetRetryInterval(), + reconciledTerraform.GetGeneration(), + )) + } + + return ctrl.Result{RequeueAfter: reconciledTerraform.GetRetryInterval()}, nil } log.Info(fmt.Sprintf("Reconciliation completed. Generation: %d", reconciledTerraform.GetGeneration())) From 8934278c82b62e44b5cdb0b0716df263136578d5 Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Fri, 20 Oct 2023 14:54:29 +0200 Subject: [PATCH 2/8] testing: add retry tests Retry test surfaced an issue, fixed that too. * Did not reset retry counter. * Retried more than it should (wrong check). References: * https://github.com/weaveworks/tf-controller/issues/870 Signed-off-by: Balazs Nadasdi --- api/v1alpha2/terraform_types.go | 2 +- .../tc000243_remediation_retry_test.go | 210 ++++++++++++++++++ controllers/tf_controller.go | 28 ++- docs/References/terraform.md | 77 +++++++ 4 files changed, 311 insertions(+), 6 deletions(-) create mode 100644 controllers/tc000243_remediation_retry_test.go diff --git a/api/v1alpha2/terraform_types.go b/api/v1alpha2/terraform_types.go index acd1af5b..354102ef 100644 --- a/api/v1alpha2/terraform_types.go +++ b/api/v1alpha2/terraform_types.go @@ -943,7 +943,7 @@ func (in *Terraform) ShouldRetry() bool { return true } - return in.GetReconciliationFailures() <= in.Spec.Remediation.Retries + return in.GetReconciliationFailures() < in.Spec.Remediation.Retries } func (in *TerraformSpec) GetAlwaysCleanupRunnerPod() bool { diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go new file mode 100644 index 00000000..19183e97 --- /dev/null +++ b/controllers/tc000243_remediation_retry_test.go @@ -0,0 +1,210 @@ +package controllers + +import ( + "context" + "fmt" + "testing" + "time" + + . "github.com/onsi/gomega" + + sourcev1 "github.com/fluxcd/source-controller/api/v1" + infrav1 "github.com/weaveworks/tf-controller/api/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// +kubebuilder:docs-gen:collapse=Imports + +func Test_000243_remediation_retry_test(t *testing.T) { + Spec("This spec describes the behaviour of a Terraform resource that obtains a bad tar.gz file blob from its Source reference.") + It("should report the error and stop reconcile.") + + const ( + sourceName = "test-tf-controller-retry" + terraformName = "retry" + ) + g := NewWithT(t) + ctx := context.Background() + + Given("a GitRepository") + By("defining a new GitRepository resource") + testRepo := sourcev1.GitRepository{ + ObjectMeta: metav1.ObjectMeta{ + Name: sourceName, + Namespace: "flux-system", + }, + Spec: sourcev1.GitRepositorySpec{ + URL: "https://github.com/openshift-fluxv2-poc/podinfo", + Reference: &sourcev1.GitRepositoryRef{ + Branch: "master", + }, + Interval: metav1.Duration{Duration: time.Second * 30}, + }, + } + + By("creating the GitRepository resource in the cluster.") + It("should be created successfully.") + g.Expect(k8sClient.Create(ctx, &testRepo)).Should(Succeed()) + + Given("the GitRepository's reconciled status.") + By("setting the GitRepository's status, with the downloadable *bad* BLOB's URL, with the correct checksum.") + updatedTime := time.Now() + testRepo.Status = sourcev1.GitRepositoryStatus{ + ObservedGeneration: int64(1), + Conditions: []metav1.Condition{ + { + Type: "Ready", + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{Time: updatedTime}, + Reason: "GitOperationSucceed", + Message: "Fetched revision: master/b8e362c206e3d0cbb7ed22ced771a0056455a2fb", + }, + }, + Artifact: &sourcev1.Artifact{ + Path: "gitrepository/flux-system/test-tf-controller/b8e362c206e3d0cbb7ed22ced771a0056455a2fb.tar.gz", + URL: server.URL() + "/bad.tar.gz", + Revision: "master/b8e362c206e3d0cbb7ed22ced771a0056455a2fb", + Digest: "sha256:196d115c43583ccd10107d631d8a594be542a75911f9832a5ec2c1e22b65387b", + LastUpdateTime: metav1.Time{Time: updatedTime}, + }, + } + It("should be updated successfully.") + g.Expect(k8sClient.Status().Update(ctx, &testRepo)).Should(Succeed()) + defer waitResourceToBeDelete(g, &testRepo) + + Given("a Terraform object with auto approve, and attaching it to the bad GitRepository resource.") + By("creating a new TF resource and attaching to the bad repo via `sourceRef`.") + var helloWorldTF infrav1.Terraform + err := helloWorldTF.FromBytes([]byte(fmt.Sprintf(` +apiVersion: infra.contrib.fluxcd.io/v1alpha2 +kind: Terraform +metadata: + name: %s + namespace: flux-system +spec: + approvePlan: auto + path: ./terraform-hello-world-example + remediation: + retries: 3 + retryInterval: 5s + sourceRef: + kind: GitRepository + name: %s + namespace: flux-system + interval: 10s +`, terraformName, sourceName)), runnerServer.Scheme) + g.Expect(err).ToNot(HaveOccurred()) + + It("should be created and attached successfully.") + g.Expect(k8sClient.Create(ctx, &helloWorldTF)).Should(Succeed()) + defer waitResourceToBeDelete(g, &helloWorldTF) + + By("checking that the TF resource existed inside the cluster.") + helloWorldTFKey := types.NamespacedName{Namespace: "flux-system", Name: terraformName} + createdHelloWorldTF := infrav1.Terraform{} + g.Eventually(func() bool { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + It("should be reconciled and contain some status conditions.") + By("checking that the TF resource's status conditions has 1 element.") + g.Eventually(func() int { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return -1 + } + return len(createdHelloWorldTF.Status.Conditions) + }, timeout, interval).Should(Equal(1)) + + It("should be an error") + By("checking that the Ready's reason of the TF resource become `ArtifactFailed`.") + type failedStatusCheckResult struct { + Type string + Reason string + Message string + Status metav1.ConditionStatus + Retries int64 + } + expected := failedStatusCheckResult{ + Type: "Ready", + Reason: "ArtifactFailed", + Message: "rpc error: code = Unknown desc = failed to untar artifact, error: requires gzip-compressed body: gzip: invalid header", + Status: metav1.ConditionFalse, + Retries: 3, + } + g.Eventually(func() interface{} { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return nil + } + for _, c := range createdHelloWorldTF.Status.Conditions { + if c.Type == "Ready" { + return failedStatusCheckResult{ + Type: c.Type, + Reason: c.Reason, + Message: c.Message, + Status: c.Status, + Retries: createdHelloWorldTF.Status.ReconciliationFailures, + } + } + } + return createdHelloWorldTF.Status + }, timeout, interval).Should(Equal(expected)) + + // It should never reach 4 when Retry is set to 3. + expected = failedStatusCheckResult{ + Retries: 4, + } + time.Sleep(15 * time.Second) + g.Eventually(func() interface{} { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return nil + } + for _, c := range createdHelloWorldTF.Status.Conditions { + if c.Type == "Ready" { + return failedStatusCheckResult{ + Retries: createdHelloWorldTF.Status.ReconciliationFailures, + } + } + } + return createdHelloWorldTF.Status + }, timeout, interval).Should(Not(Equal(expected))) + + // After changing the resource, retry count should be set back to 0. + // With setting Retries lower than the previous one, we can check if it was + // reset to 0 as it would never reach 2 from 3. + createdHelloWorldTF.Spec.Remediation.Retries = 2 + g.Expect(k8sClient.Update(ctx, &createdHelloWorldTF)).Should(Succeed()) + + expected = failedStatusCheckResult{ + Type: "Ready", + Reason: "ArtifactFailed", + Message: "rpc error: code = Unknown desc = failed to untar artifact, error: requires gzip-compressed body: gzip: invalid header", + Status: metav1.ConditionFalse, + Retries: 2, + } + g.Eventually(func() interface{} { + err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) + if err != nil { + return nil + } + for _, c := range createdHelloWorldTF.Status.Conditions { + if c.Type == "Ready" { + return failedStatusCheckResult{ + Type: c.Type, + Reason: c.Reason, + Message: c.Message, + Status: c.Status, + Retries: createdHelloWorldTF.Status.ReconciliationFailures, + } + } + } + return createdHelloWorldTF.Status + }, timeout, interval).Should(Equal(expected)) +} diff --git a/controllers/tf_controller.go b/controllers/tf_controller.go index 62de3555..62f56146 100644 --- a/controllers/tf_controller.go +++ b/controllers/tf_controller.go @@ -295,6 +295,28 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( r.RecordReadiness(ctx, &terraform) } + // Reset retry count if necessary. + revisionChanged := sourceObj.GetArtifact().Revision != terraform.Status.LastAttemptedRevision + generationChanges := terraform.Generation != terraform.Status.ObservedGeneration + if revisionChanged || generationChanges { + log.Info("Reset reconciliation failures count. Reason: resource changed") + terraform.ResetReconciliationFailures() + if err := r.patchStatus(ctx, req.NamespacedName, terraform.Status); err != nil { + log.Error(err, "unable to update status after planing") + return ctrl.Result{Requeue: true}, err + } + } + + if !terraform.ShouldRetry() { + log.Info(fmt.Sprintf( + "Resource reached maximum number of retries (%d/%d). Generation: %d", + terraform.GetReconciliationFailures(), + terraform.Spec.Remediation.Retries, + terraform.GetGeneration(), + )) + return ctrl.Result{Requeue: false}, nil + } + if !isBeingDeleted(terraform) { // case 1: // If revision is changed, and there's no intend to apply, @@ -432,6 +454,7 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Check remediation. if reconcileErr == nil { + log.Info("Reset reconciliation failures count. Reason: successful reconciliation") reconciledTerraform.ResetReconciliationFailures() } else { reconciledTerraform.IncrementReconciliationFailures() @@ -469,11 +492,6 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( traceLog.Info("Record an event for the failure") r.event(ctx, *reconciledTerraform, sourceObj.GetArtifact().Revision, eventv1.EventSeverityError, reconcileErr.Error(), nil) - if !reconciledTerraform.ShouldRetry() { - log.Info(fmt.Sprintf("Resource reached maximum number of retries. Generation: %d", reconciledTerraform.GetGeneration())) - return ctrl.Result{Requeue: false}, nil - } - if reconciledTerraform.Spec.Remediation != nil { log.Info(fmt.Sprintf( "Reconciliation failed, retry (%d/%d) after %s. Generation: %d", diff --git a/docs/References/terraform.md b/docs/References/terraform.md index 997387e5..e464b28d 100644 --- a/docs/References/terraform.md +++ b/docs/References/terraform.md @@ -672,6 +672,40 @@ string +

Remediation +

+

+(Appears on: +TerraformSpec) +

+
+
+ + + + + + + + + + + + + +
FieldDescription
+retries
+ +int64 + +
+(Optional) +

Retries is the number of retries that should be attempted on failures +before bailing. Defaults to ‘0’, a negative integer equals to unlimited +retries.

+
+
+

ResourceInventory

@@ -1857,6 +1891,21 @@ BranchPlanner

BranchPlanner configuration.

+ + +remediation
+ + +Remediation + + + + +(Optional) +

Remediation holds the remediation configuration for when the reconciliation +fails. The default is to not perform any action.

+ + @@ -2393,6 +2442,21 @@ BranchPlanner

BranchPlanner configuration.

+ + +remediation
+ + +Remediation + + + + +(Optional) +

Remediation holds the remediation configuration for when the reconciliation +fails. The default is to not perform any action.

+ + @@ -2586,6 +2650,19 @@ LockStatus (Optional) + + +reconciliationFailures
+ +int64 + + + +(Optional) +

ReconciliationFailures is the counter to track the number of reconciliation +failures.

+ + From e6789b345ea490679ec7c126b1f8101b5a7cfeb7 Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Mon, 6 Nov 2023 11:33:18 +0100 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: Michael Bridgen Signed-off-by: Balazs Nadasdi --- api/v1alpha2/terraform_types.go | 8 ++++---- controllers/tc000243_remediation_retry_test.go | 5 +---- controllers/tf_controller.go | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/api/v1alpha2/terraform_types.go b/api/v1alpha2/terraform_types.go index 354102ef..f81717f6 100644 --- a/api/v1alpha2/terraform_types.go +++ b/api/v1alpha2/terraform_types.go @@ -262,7 +262,7 @@ type TerraformSpec struct { // +optional BranchPlanner *BranchPlanner `json:"branchPlanner,omitempty"` - // Remediation holds the remediation configuration for when the reconciliation + // Remediation specifies what the controller should do when reconciliation // fails. The default is to not perform any action. // +optional Remediation *Remediation `json:"remediation,omitempty"` @@ -278,7 +278,7 @@ type BranchPlanner struct { type Remediation struct { // Retries is the number of retries that should be attempted on failures - // before bailing. Defaults to '0', a negative integer equals to unlimited + // before bailing. Defaults to '0', a negative integer denotes unlimited // retries. // +optional Retries int64 `json:"retries,omitempty"` @@ -400,8 +400,8 @@ type TerraformStatus struct { // +optional Lock LockStatus `json:"lock,omitempty"` - // ReconciliationFailures is the counter to track the number of reconciliation - // failures. + // ReconciliationFailures is the number of reconciliation + // failures since the last success or update. // +optional ReconciliationFailures int64 `json:"reconciliationFailures,omitempty"` } diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go index 19183e97..4d9886ab 100644 --- a/controllers/tc000243_remediation_retry_test.go +++ b/controllers/tc000243_remediation_retry_test.go @@ -105,10 +105,7 @@ spec: createdHelloWorldTF := infrav1.Terraform{} g.Eventually(func() bool { err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) - if err != nil { - return false - } - return true + return err == nil }, timeout, interval).Should(BeTrue()) It("should be reconciled and contain some status conditions.") diff --git a/controllers/tf_controller.go b/controllers/tf_controller.go index 62f56146..3d264191 100644 --- a/controllers/tf_controller.go +++ b/controllers/tf_controller.go @@ -302,7 +302,7 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( log.Info("Reset reconciliation failures count. Reason: resource changed") terraform.ResetReconciliationFailures() if err := r.patchStatus(ctx, req.NamespacedName, terraform.Status); err != nil { - log.Error(err, "unable to update status after planing") + log.Error(err, "unable to update status after planning") return ctrl.Result{Requeue: true}, err } } From 632a54bc55ffd609b0f55e7aa6320560117f37c3 Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Mon, 6 Nov 2023 11:33:27 +0100 Subject: [PATCH 4/8] fix: ShouldRetry return value and remove double check - ShouldRetry return value should be true when Retries is less than 0 - We have to check only once if we hit limit or not. Easier to do that at the beginning. - add new status condition + use that in tests Signed-off-by: Balazs Nadasdi --- api/v1alpha2/terraform_types.go | 37 ++++++++++++++--- .../infra.contrib.fluxcd.io_terraforms.yaml | 10 ++--- .../tc000243_remediation_retry_test.go | 40 +++++++++---------- controllers/tf_controller.go | 40 +++++++++++-------- docs/References/terraform.md | 10 ++--- 5 files changed, 83 insertions(+), 54 deletions(-) diff --git a/api/v1alpha2/terraform_types.go b/api/v1alpha2/terraform_types.go index f81717f6..d59ea79f 100644 --- a/api/v1alpha2/terraform_types.go +++ b/api/v1alpha2/terraform_types.go @@ -556,11 +556,12 @@ const ( // These constants are the Condition Types that the Terraform Resource works with const ( - ConditionTypeApply = "Apply" - ConditionTypeHealthCheck = "HealthCheck" - ConditionTypeOutput = "Output" - ConditionTypePlan = "Plan" - ConditionTypeStateLocked = "StateLocked" + ConditionTypeApply = "Apply" + ConditionTypeHealthCheck = "HealthCheck" + ConditionTypeOutput = "Output" + ConditionTypePlan = "Plan" + ConditionTypeStateLocked = "StateLocked" + ConditionRetryLimitReached = "RetryLimitReached" ) // Webhook stages @@ -844,6 +845,30 @@ func TerraformStateLocked(terraform Terraform, lockID, message string) Terraform return terraform } +// TerraformReachedLimit will set a new condition on the Terraform resource +// indicating that the resource has reached its retry limit. +func TerraformReachedLimit(terraform Terraform) Terraform { + newCondition := metav1.Condition{ + Type: ConditionRetryLimitReached, + Status: metav1.ConditionTrue, + Reason: "ReachedHitRetryLimit", + Message: "Resource reached maximum number of retries.", + } + apimeta.SetStatusCondition(terraform.GetStatusConditions(), newCondition) + SetTerraformReadiness(&terraform, metav1.ConditionFalse, newCondition.Reason, newCondition.Message, "") + + return terraform +} + +// TerraformResetRetry will set a new condition on the Terraform resource +// indicating that the resource retry count has been reset. +func TerraformResetRetry(terraform Terraform) Terraform { + apimeta.RemoveStatusCondition(terraform.GetStatusConditions(), ConditionRetryLimitReached) + terraform.ResetReconciliationFailures() + + return terraform +} + // HasDrift returns true if drift has been detected since the last successful apply func (in Terraform) HasDrift() bool { for _, condition := range in.Status.Conditions { @@ -939,7 +964,7 @@ func (in *Terraform) ResetReconciliationFailures() { } func (in *Terraform) ShouldRetry() bool { - if in.Spec.Remediation == nil { + if in.Spec.Remediation == nil || in.Spec.Remediation.Retries < 0 { return true } diff --git a/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml b/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml index d2604f73..ad8237e9 100644 --- a/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml +++ b/config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml @@ -5268,13 +5268,13 @@ spec: the apply step. type: boolean remediation: - description: Remediation holds the remediation configuration for when - the reconciliation fails. The default is to not perform any action. + description: Remediation specifies what the controller should do when + reconciliation fails. The default is to not perform any action. properties: retries: description: Retries is the number of retries that should be attempted on failures before bailing. Defaults to '0', a negative integer - equals to unlimited retries. + denotes unlimited retries. format: int64 type: integer type: object @@ -10218,8 +10218,8 @@ spec: type: string type: object reconciliationFailures: - description: ReconciliationFailures is the counter to track the number - of reconciliation failures. + description: ReconciliationFailures is the number of reconciliation + failures since the last success or update. format: int64 type: integer type: object diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go index 4d9886ab..d03e5b3a 100644 --- a/controllers/tc000243_remediation_retry_test.go +++ b/controllers/tc000243_remediation_retry_test.go @@ -118,8 +118,8 @@ spec: return len(createdHelloWorldTF.Status.Conditions) }, timeout, interval).Should(Equal(1)) - It("should be an error") - By("checking that the Ready's reason of the TF resource become `ArtifactFailed`.") + It("should stop retry") + By("when reached retry limit specified in .Spec.Remediation.Retries") type failedStatusCheckResult struct { Type string Reason string @@ -128,11 +128,10 @@ spec: Retries int64 } expected := failedStatusCheckResult{ - Type: "Ready", - Reason: "ArtifactFailed", - Message: "rpc error: code = Unknown desc = failed to untar artifact, error: requires gzip-compressed body: gzip: invalid header", - Status: metav1.ConditionFalse, - Retries: 3, + Type: infrav1.ConditionRetryLimitReached, + Reason: "ReachedHitRetryLimit", + Message: "Resource reached maximum number of retries.", + Status: metav1.ConditionTrue, } g.Eventually(func() interface{} { err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) @@ -140,23 +139,20 @@ spec: return nil } for _, c := range createdHelloWorldTF.Status.Conditions { - if c.Type == "Ready" { + if c.Type == expected.Type { return failedStatusCheckResult{ Type: c.Type, Reason: c.Reason, Message: c.Message, Status: c.Status, - Retries: createdHelloWorldTF.Status.ReconciliationFailures, } } } return createdHelloWorldTF.Status }, timeout, interval).Should(Equal(expected)) + g.Expect(createdHelloWorldTF.Status.ReconciliationFailures).To(Equal(int64(3))) - // It should never reach 4 when Retry is set to 3. - expected = failedStatusCheckResult{ - Retries: 4, - } + // After 15s, it's still 3 and didn't go higher. time.Sleep(15 * time.Second) g.Eventually(func() interface{} { err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) @@ -165,14 +161,14 @@ spec: } for _, c := range createdHelloWorldTF.Status.Conditions { if c.Type == "Ready" { - return failedStatusCheckResult{ - Retries: createdHelloWorldTF.Status.ReconciliationFailures, - } + return createdHelloWorldTF.Status.ReconciliationFailures } } return createdHelloWorldTF.Status - }, timeout, interval).Should(Not(Equal(expected))) + }, timeout, interval).Should(Not(BeNumerically(">", createdHelloWorldTF.Spec.Remediation.Retries))) + It("should restart retry count") + By("when resource was updated") // After changing the resource, retry count should be set back to 0. // With setting Retries lower than the previous one, we can check if it was // reset to 0 as it would never reach 2 from 3. @@ -180,10 +176,10 @@ spec: g.Expect(k8sClient.Update(ctx, &createdHelloWorldTF)).Should(Succeed()) expected = failedStatusCheckResult{ - Type: "Ready", - Reason: "ArtifactFailed", - Message: "rpc error: code = Unknown desc = failed to untar artifact, error: requires gzip-compressed body: gzip: invalid header", - Status: metav1.ConditionFalse, + Type: infrav1.ConditionRetryLimitReached, + Reason: "ReachedHitRetryLimit", + Message: "Resource reached maximum number of retries.", + Status: metav1.ConditionTrue, Retries: 2, } g.Eventually(func() interface{} { @@ -192,7 +188,7 @@ spec: return nil } for _, c := range createdHelloWorldTF.Status.Conditions { - if c.Type == "Ready" { + if c.Type == expected.Type { return failedStatusCheckResult{ Type: c.Type, Reason: c.Reason, diff --git a/controllers/tf_controller.go b/controllers/tf_controller.go index 3d264191..d307e1d3 100644 --- a/controllers/tf_controller.go +++ b/controllers/tf_controller.go @@ -300,23 +300,13 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( generationChanges := terraform.Generation != terraform.Status.ObservedGeneration if revisionChanged || generationChanges { log.Info("Reset reconciliation failures count. Reason: resource changed") - terraform.ResetReconciliationFailures() + terraform = infrav1.TerraformResetRetry(terraform) if err := r.patchStatus(ctx, req.NamespacedName, terraform.Status); err != nil { log.Error(err, "unable to update status after planning") return ctrl.Result{Requeue: true}, err } } - if !terraform.ShouldRetry() { - log.Info(fmt.Sprintf( - "Resource reached maximum number of retries (%d/%d). Generation: %d", - terraform.GetReconciliationFailures(), - terraform.Spec.Remediation.Retries, - terraform.GetGeneration(), - )) - return ctrl.Result{Requeue: false}, nil - } - if !isBeingDeleted(terraform) { // case 1: // If revision is changed, and there's no intend to apply, @@ -448,6 +438,27 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } } + if !terraform.ShouldRetry() { + // `ShouldRetry` will return true if .Spec.Remediation is nil. + // The code doesn't reach this block if .Spec.Remediation is nil. + log.Info(fmt.Sprintf( + "Resource reached maximum number of retries (%d/%d). Generation: %d", + terraform.GetReconciliationFailures(), + terraform.Spec.Remediation.Retries, + terraform.GetGeneration(), + )) + + terraform = infrav1.TerraformReachedLimit(terraform) + + traceLog.Info("Patch the status of the Terraform resource") + if err := r.patchStatus(ctx, req.NamespacedName, terraform.Status); err != nil { + log.Error(err, "unable to update status after the reconciliation is complete") + return ctrl.Result{Requeue: true}, err + } + + return ctrl.Result{Requeue: false}, nil + } + // reconcile Terraform by applying the latest revision traceLog.Info("Run reconcile for the Terraform resource") reconciledTerraform, reconcileErr := r.reconcile(ctx, runnerClient, *terraform.DeepCopy(), sourceObj, reconciliationLoopID) @@ -455,7 +466,8 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Check remediation. if reconcileErr == nil { log.Info("Reset reconciliation failures count. Reason: successful reconciliation") - reconciledTerraform.ResetReconciliationFailures() + terraform := infrav1.TerraformResetRetry(*reconciledTerraform) + reconciledTerraform = &terraform } else { reconciledTerraform.IncrementReconciliationFailures() } @@ -477,10 +489,6 @@ func (r *TerraformReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( "revision", sourceObj.GetArtifact().Revision) - if !terraform.ShouldRetry() { - return ctrl.Result{Requeue: false}, nil - } - return ctrl.Result{RequeueAfter: terraform.GetRetryInterval()}, nil } else if reconcileErr != nil { // broadcast the reconciliation failure and requeue at the specified retry interval diff --git a/docs/References/terraform.md b/docs/References/terraform.md index e464b28d..f38ee62c 100644 --- a/docs/References/terraform.md +++ b/docs/References/terraform.md @@ -698,7 +698,7 @@ int64 (Optional)

Retries is the number of retries that should be attempted on failures -before bailing. Defaults to ‘0’, a negative integer equals to unlimited +before bailing. Defaults to ‘0’, a negative integer denotes unlimited retries.

@@ -1902,7 +1902,7 @@ Remediation (Optional) -

Remediation holds the remediation configuration for when the reconciliation +

Remediation specifies what the controller should do when reconciliation fails. The default is to not perform any action.

@@ -2453,7 +2453,7 @@ Remediation (Optional) -

Remediation holds the remediation configuration for when the reconciliation +

Remediation specifies what the controller should do when reconciliation fails. The default is to not perform any action.

@@ -2659,8 +2659,8 @@ int64 (Optional) -

ReconciliationFailures is the counter to track the number of reconciliation -failures.

+

ReconciliationFailures is the number of reconciliation +failures since the last success or update.

From 3edf39c657779df5174aa50bfd47ae4e5ad618cb Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Thu, 16 Nov 2023 15:04:07 +0100 Subject: [PATCH 5/8] patch: use StalledCondition instead of custom condition type Additional changes: - resetReconciliationFailures is not exported anymore to prevent external sources calling it causing an inconsistent state of the resource. - test: compare the ObservedGeneration of the condition after a wait. - add revision check to test - remove SetTerraformReadiness from TerraformReachedLimit References: - https://github.com/weaveworks/tf-controller/pull/1061#discussion_r1394218753 - https://github.com/weaveworks/tf-controller/pull/1061#discussion_r1394296954 - https://github.com/weaveworks/tf-controller/pull/1061#discussion_r1394497060 Signed-off-by: Balazs Nadasdi --- api/v1alpha2/terraform_types.go | 25 ++++++----- .../tc000243_remediation_retry_test.go | 43 +++++++++++++------ 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/api/v1alpha2/terraform_types.go b/api/v1alpha2/terraform_types.go index d59ea79f..68286647 100644 --- a/api/v1alpha2/terraform_types.go +++ b/api/v1alpha2/terraform_types.go @@ -529,8 +529,9 @@ const ( // The potential reasons that are associated with condition types const ( - ArtifactFailedReason = "ArtifactFailed" AccessDeniedReason = "AccessDenied" + ArtifactFailedReason = "ArtifactFailed" + RetryLimitReachedReason = "RetryLimitReached" DeletionBlockedByDependants = "DeletionBlockedByDependantsReason" DependencyNotReadyReason = "DependencyNotReady" DriftDetectedReason = "DriftDetected" @@ -556,12 +557,11 @@ const ( // These constants are the Condition Types that the Terraform Resource works with const ( - ConditionTypeApply = "Apply" - ConditionTypeHealthCheck = "HealthCheck" - ConditionTypeOutput = "Output" - ConditionTypePlan = "Plan" - ConditionTypeStateLocked = "StateLocked" - ConditionRetryLimitReached = "RetryLimitReached" + ConditionTypeApply = "Apply" + ConditionTypeHealthCheck = "HealthCheck" + ConditionTypeOutput = "Output" + ConditionTypePlan = "Plan" + ConditionTypeStateLocked = "StateLocked" ) // Webhook stages @@ -849,13 +849,12 @@ func TerraformStateLocked(terraform Terraform, lockID, message string) Terraform // indicating that the resource has reached its retry limit. func TerraformReachedLimit(terraform Terraform) Terraform { newCondition := metav1.Condition{ - Type: ConditionRetryLimitReached, + Type: meta.StalledCondition, Status: metav1.ConditionTrue, - Reason: "ReachedHitRetryLimit", + Reason: RetryLimitReachedReason, Message: "Resource reached maximum number of retries.", } apimeta.SetStatusCondition(terraform.GetStatusConditions(), newCondition) - SetTerraformReadiness(&terraform, metav1.ConditionFalse, newCondition.Reason, newCondition.Message, "") return terraform } @@ -863,8 +862,8 @@ func TerraformReachedLimit(terraform Terraform) Terraform { // TerraformResetRetry will set a new condition on the Terraform resource // indicating that the resource retry count has been reset. func TerraformResetRetry(terraform Terraform) Terraform { - apimeta.RemoveStatusCondition(terraform.GetStatusConditions(), ConditionRetryLimitReached) - terraform.ResetReconciliationFailures() + apimeta.RemoveStatusCondition(terraform.GetStatusConditions(), meta.StalledCondition) + terraform.resetReconciliationFailures() return terraform } @@ -959,7 +958,7 @@ func (in *Terraform) IncrementReconciliationFailures() { in.Status.ReconciliationFailures++ } -func (in *Terraform) ResetReconciliationFailures() { +func (in *Terraform) resetReconciliationFailures() { in.Status.ReconciliationFailures = 0 } diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go index d03e5b3a..b0b9d3fd 100644 --- a/controllers/tc000243_remediation_retry_test.go +++ b/controllers/tc000243_remediation_retry_test.go @@ -8,6 +8,7 @@ import ( . "github.com/onsi/gomega" + "github.com/fluxcd/pkg/apis/meta" sourcev1 "github.com/fluxcd/source-controller/api/v1" infrav1 "github.com/weaveworks/tf-controller/api/v1alpha2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -128,8 +129,8 @@ spec: Retries int64 } expected := failedStatusCheckResult{ - Type: infrav1.ConditionRetryLimitReached, - Reason: "ReachedHitRetryLimit", + Type: meta.StalledCondition, + Reason: infrav1.RetryLimitReachedReason, Message: "Resource reached maximum number of retries.", Status: metav1.ConditionTrue, } @@ -154,18 +155,32 @@ spec: // After 15s, it's still 3 and didn't go higher. time.Sleep(15 * time.Second) + + recheckHelloWorldTF := infrav1.Terraform{} g.Eventually(func() interface{} { - err := k8sClient.Get(ctx, helloWorldTFKey, &createdHelloWorldTF) - if err != nil { - return nil + return k8sClient.Get(ctx, helloWorldTFKey, &recheckHelloWorldTF) + }, timeout, interval).Should(Succeed()) + + var retryCondition *metav1.Condition + for _, cond := range recheckHelloWorldTF.Status.Conditions { + if cond.Type == meta.StalledCondition && cond.Reason == infrav1.RetryLimitReachedReason { + retryCondition = &cond + break } - for _, c := range createdHelloWorldTF.Status.Conditions { - if c.Type == "Ready" { - return createdHelloWorldTF.Status.ReconciliationFailures - } + } + var originalRetryCondition *metav1.Condition + for _, cond := range createdHelloWorldTF.Status.Conditions { + if cond.Type == meta.StalledCondition && cond.Reason == infrav1.RetryLimitReachedReason { + originalRetryCondition = &cond + break } - return createdHelloWorldTF.Status - }, timeout, interval).Should(Not(BeNumerically(">", createdHelloWorldTF.Spec.Remediation.Retries))) + } + + g.Expect(retryCondition).ToNot(BeNil(), "Terraform resource should have retry limit reached status condition") + g.Expect(originalRetryCondition).ToNot(BeNil(), "Terraform resource should have retry limit reached status condition") + g.Expect(recheckHelloWorldTF.Status.ReconciliationFailures).To(Equal(createdHelloWorldTF.Status.ReconciliationFailures)) + g.Expect(retryCondition.ObservedGeneration).To(Equal(originalRetryCondition.ObservedGeneration)) + g.Expect(recheckHelloWorldTF.Status.LastAttemptedRevision).To(Equal(testRepo.Status.Artifact.Revision)) It("should restart retry count") By("when resource was updated") @@ -176,8 +191,8 @@ spec: g.Expect(k8sClient.Update(ctx, &createdHelloWorldTF)).Should(Succeed()) expected = failedStatusCheckResult{ - Type: infrav1.ConditionRetryLimitReached, - Reason: "ReachedHitRetryLimit", + Type: meta.StalledCondition, + Reason: infrav1.RetryLimitReachedReason, Message: "Resource reached maximum number of retries.", Status: metav1.ConditionTrue, Retries: 2, @@ -188,7 +203,7 @@ spec: return nil } for _, c := range createdHelloWorldTF.Status.Conditions { - if c.Type == expected.Type { + if c.Type == expected.Type && c.Reason == expected.Reason { return failedStatusCheckResult{ Type: c.Type, Reason: c.Reason, From 2c4e7d54cfffb08d3c2c4268eb426ce399667b46 Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Thu, 23 Nov 2023 17:00:22 +0100 Subject: [PATCH 6/8] remove sleep and call Reconcile manually Signed-off-by: Balazs Nadasdi --- controllers/tc000243_remediation_retry_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go index b0b9d3fd..dc9c9318 100644 --- a/controllers/tc000243_remediation_retry_test.go +++ b/controllers/tc000243_remediation_retry_test.go @@ -7,6 +7,7 @@ import ( "time" . "github.com/onsi/gomega" + controllerruntime "sigs.k8s.io/controller-runtime" "github.com/fluxcd/pkg/apis/meta" sourcev1 "github.com/fluxcd/source-controller/api/v1" @@ -153,8 +154,14 @@ spec: }, timeout, interval).Should(Equal(expected)) g.Expect(createdHelloWorldTF.Status.ReconciliationFailures).To(Equal(int64(3))) - // After 15s, it's still 3 and didn't go higher. - time.Sleep(15 * time.Second) + reconcileResult, err := reconciler.Reconcile(ctx, controllerruntime.Request{ + NamespacedName: types.NamespacedName{ + Namespace: createdHelloWorldTF.GetNamespace(), + Name: createdHelloWorldTF.GetName(), + }, + }) + g.Expect(err).To(Succeed()) + g.Expect(reconcileResult.Requeue).To(BeFalse()) recheckHelloWorldTF := infrav1.Terraform{} g.Eventually(func() interface{} { From 8c4ef451bf40af6cb22afcef1fc0e215e3e7d431 Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Thu, 23 Nov 2023 21:30:48 +0100 Subject: [PATCH 7/8] Apply suggestions from code review Co-authored-by: Michael Bridgen --- controllers/tc000243_remediation_retry_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go index dc9c9318..48ded787 100644 --- a/controllers/tc000243_remediation_retry_test.go +++ b/controllers/tc000243_remediation_retry_test.go @@ -19,8 +19,8 @@ import ( // +kubebuilder:docs-gen:collapse=Imports func Test_000243_remediation_retry_test(t *testing.T) { - Spec("This spec describes the behaviour of a Terraform resource that obtains a bad tar.gz file blob from its Source reference.") - It("should report the error and stop reconcile.") + Spec("This spec describes the retry behaviour of a Terraform resource that repeatedly fails.") + It("should retry up to the limit given in .spec.remediation.retries, and start again only when the spec changes.") const ( sourceName = "test-tf-controller-retry" From 88eff5a7f354a8d954b27d7157de0d52c54f862c Mon Sep 17 00:00:00 2001 From: Balazs Nadasdi Date: Fri, 24 Nov 2023 09:44:14 +0100 Subject: [PATCH 8/8] refactor: use FindStatusCondition from apimachinery meta in test Changes: - Instead of two loops, use `apimeta.FindStatusCondition` to check if the stalled condition is has the same properties after a reconciliation loop. - regenerate `zz_generated.deepcopy.go`. - return `nil` in test where the value is not used. Signed-off-by: Balazs Nadasdi --- api/v1alpha2/zz_generated.deepcopy.go | 20 ++++++++++++++ .../tc000243_remediation_retry_test.go | 27 +++++++------------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go index b980d713..cdfd87b4 100644 --- a/api/v1alpha2/zz_generated.deepcopy.go +++ b/api/v1alpha2/zz_generated.deepcopy.go @@ -222,6 +222,21 @@ func (in *ReadInputsFromSecretSpec) DeepCopy() *ReadInputsFromSecretSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Remediation) DeepCopyInto(out *Remediation) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Remediation. +func (in *Remediation) DeepCopy() *Remediation { + if in == nil { + return nil + } + out := new(Remediation) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ResourceInventory) DeepCopyInto(out *ResourceInventory) { *out = *in @@ -580,6 +595,11 @@ func (in *TerraformSpec) DeepCopyInto(out *TerraformSpec) { *out = new(BranchPlanner) **out = **in } + if in.Remediation != nil { + in, out := &in.Remediation, &out.Remediation + *out = new(Remediation) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TerraformSpec. diff --git a/controllers/tc000243_remediation_retry_test.go b/controllers/tc000243_remediation_retry_test.go index 48ded787..b60edb89 100644 --- a/controllers/tc000243_remediation_retry_test.go +++ b/controllers/tc000243_remediation_retry_test.go @@ -12,6 +12,7 @@ import ( "github.com/fluxcd/pkg/apis/meta" sourcev1 "github.com/fluxcd/source-controller/api/v1" infrav1 "github.com/weaveworks/tf-controller/api/v1alpha2" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ) @@ -168,25 +169,15 @@ spec: return k8sClient.Get(ctx, helloWorldTFKey, &recheckHelloWorldTF) }, timeout, interval).Should(Succeed()) - var retryCondition *metav1.Condition - for _, cond := range recheckHelloWorldTF.Status.Conditions { - if cond.Type == meta.StalledCondition && cond.Reason == infrav1.RetryLimitReachedReason { - retryCondition = &cond - break - } - } - var originalRetryCondition *metav1.Condition - for _, cond := range createdHelloWorldTF.Status.Conditions { - if cond.Type == meta.StalledCondition && cond.Reason == infrav1.RetryLimitReachedReason { - originalRetryCondition = &cond - break - } - } + originalCondition := apimeta.FindStatusCondition(createdHelloWorldTF.Status.Conditions, meta.StalledCondition) + recheckCondition := apimeta.FindStatusCondition(recheckHelloWorldTF.Status.Conditions, meta.StalledCondition) - g.Expect(retryCondition).ToNot(BeNil(), "Terraform resource should have retry limit reached status condition") - g.Expect(originalRetryCondition).ToNot(BeNil(), "Terraform resource should have retry limit reached status condition") + g.Expect(recheckCondition).ToNot(BeNil(), "Terraform resource should have retry limit reached status condition") + g.Expect(originalCondition).ToNot(BeNil(), "Terraform resource should have retry limit reached status condition") + g.Expect(originalCondition.Reason).To(Equal(infrav1.RetryLimitReachedReason)) + g.Expect(originalCondition.Reason).To(Equal(recheckCondition.Reason)) g.Expect(recheckHelloWorldTF.Status.ReconciliationFailures).To(Equal(createdHelloWorldTF.Status.ReconciliationFailures)) - g.Expect(retryCondition.ObservedGeneration).To(Equal(originalRetryCondition.ObservedGeneration)) + g.Expect(recheckCondition.ObservedGeneration).To(Equal(originalCondition.ObservedGeneration)) g.Expect(recheckHelloWorldTF.Status.LastAttemptedRevision).To(Equal(testRepo.Status.Artifact.Revision)) It("should restart retry count") @@ -220,6 +211,6 @@ spec: } } } - return createdHelloWorldTF.Status + return nil }, timeout, interval).Should(Equal(expected)) }