Skip to content

Commit

Permalink
Merge pull request #1061 from weaveworks/870-retry
Browse files Browse the repository at this point in the history
feat: add remediation retry
  • Loading branch information
yitsushi committed Nov 27, 2023
2 parents 6dd08bc + 88eff5a commit 7389304
Show file tree
Hide file tree
Showing 8 changed files with 482 additions and 2 deletions.
72 changes: 71 additions & 1 deletion api/v1alpha2/terraform_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ type TerraformSpec struct {
// BranchPlanner configuration.
// +optional
BranchPlanner *BranchPlanner `json:"branchPlanner,omitempty"`

// Remediation specifies what the controller should do when reconciliation
// fails. The default is to not perform any action.
// +optional
Remediation *Remediation `json:"remediation,omitempty"`
}

type BranchPlanner struct {
Expand All @@ -271,6 +276,14 @@ type BranchPlanner struct {
EnablePathScope bool `json:"enablePathScope"`
}

type Remediation struct {
// Retries is the number of retries that should be attempted on failures
// before bailing. Defaults to '0', a negative integer denotes unlimited
// retries.
// +optional
Retries int64 `json:"retries,omitempty"`
}

type CloudSpec struct {
// +required
Organization string `json:"organization"`
Expand Down Expand Up @@ -386,6 +399,11 @@ type TerraformStatus struct {

// +optional
Lock LockStatus `json:"lock,omitempty"`

// ReconciliationFailures is the number of reconciliation
// failures since the last success or update.
// +optional
ReconciliationFailures int64 `json:"reconciliationFailures,omitempty"`
}

// LockStatus defines the observed state of a Terraform State Lock
Expand Down Expand Up @@ -511,8 +529,9 @@ const (

// The potential reasons that are associated with condition types
const (
ArtifactFailedReason = "ArtifactFailed"
AccessDeniedReason = "AccessDenied"
ArtifactFailedReason = "ArtifactFailed"
RetryLimitReachedReason = "RetryLimitReached"
DeletionBlockedByDependants = "DeletionBlockedByDependantsReason"
DependencyNotReadyReason = "DependencyNotReady"
DriftDetectedReason = "DriftDetected"
Expand Down Expand Up @@ -826,6 +845,29 @@ func TerraformStateLocked(terraform Terraform, lockID, message string) Terraform
return terraform
}

// TerraformReachedLimit will set a new condition on the Terraform resource
// indicating that the resource has reached its retry limit.
func TerraformReachedLimit(terraform Terraform) Terraform {
newCondition := metav1.Condition{
Type: meta.StalledCondition,
Status: metav1.ConditionTrue,
Reason: RetryLimitReachedReason,
Message: "Resource reached maximum number of retries.",
}
apimeta.SetStatusCondition(terraform.GetStatusConditions(), newCondition)

return terraform
}

// TerraformResetRetry will set a new condition on the Terraform resource
// indicating that the resource retry count has been reset.
func TerraformResetRetry(terraform Terraform) Terraform {
apimeta.RemoveStatusCondition(terraform.GetStatusConditions(), meta.StalledCondition)
terraform.resetReconciliationFailures()

return terraform
}

// HasDrift returns true if drift has been detected since the last successful apply
func (in Terraform) HasDrift() bool {
for _, condition := range in.Status.Conditions {
Expand Down Expand Up @@ -900,6 +942,34 @@ func (in *Terraform) GetRunnerHostname(target string, clusterDomain string) stri
}
}

func (in *Terraform) GetRetries() int64 {
if in.Spec.Remediation == nil {
return 0
}

return in.Spec.Remediation.Retries
}

func (in *Terraform) GetReconciliationFailures() int64 {
return in.Status.ReconciliationFailures
}

func (in *Terraform) IncrementReconciliationFailures() {
in.Status.ReconciliationFailures++
}

func (in *Terraform) resetReconciliationFailures() {
in.Status.ReconciliationFailures = 0
}

func (in *Terraform) ShouldRetry() bool {
if in.Spec.Remediation == nil || in.Spec.Remediation.Retries < 0 {
return true
}

return in.GetReconciliationFailures() < in.Spec.Remediation.Retries
}

func (in *TerraformSpec) GetAlwaysCleanupRunnerPod() bool {
if in.AlwaysCleanupRunnerPod == nil {
return true
Expand Down
20 changes: 20 additions & 0 deletions api/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions charts/tf-controller/templates/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5269,6 +5269,17 @@ spec:
description: RefreshBeforeApply forces refreshing of the state before
the apply step.
type: boolean
remediation:
description: Remediation holds the remediation configuration for when
the reconciliation fails. The default is to not perform any action.
properties:
retries:
description: Retries is the number of retries that should be attempted
on failures before bailing. Defaults to '0', a negative integer
equals to unlimited retries.
format: int64
type: integer
type: object
retryInterval:
description: The interval at which to retry a previously failed reconciliation.
The default value is 15 when not specified.
Expand Down Expand Up @@ -10035,6 +10046,8 @@ spec:
- sourceRef
type: object
status:
default:
observedGeneration: -1
description: TerraformStatus defines the observed state of Terraform
properties:
availableOutputs:
Expand Down Expand Up @@ -10197,6 +10210,11 @@ spec:
pending:
type: string
type: object
reconciliationFailures:
description: ReconciliationFailures is the counter to track the number
of reconciliation failures.
format: int64
type: integer
type: object
type: object
served: true
Expand Down
16 changes: 16 additions & 0 deletions config/crd/bases/infra.contrib.fluxcd.io_terraforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5267,6 +5267,17 @@ spec:
description: RefreshBeforeApply forces refreshing of the state before
the apply step.
type: boolean
remediation:
description: Remediation specifies what the controller should do when
reconciliation fails. The default is to not perform any action.
properties:
retries:
description: Retries is the number of retries that should be attempted
on failures before bailing. Defaults to '0', a negative integer
denotes unlimited retries.
format: int64
type: integer
type: object
retryInterval:
description: The interval at which to retry a previously failed reconciliation.
The default value is 15 when not specified.
Expand Down Expand Up @@ -10206,6 +10217,11 @@ spec:
pending:
type: string
type: object
reconciliationFailures:
description: ReconciliationFailures is the number of reconciliation
failures since the last success or update.
format: int64
type: integer
type: object
type: object
served: true
Expand Down
2 changes: 2 additions & 0 deletions config/tilt/test/tf-dev-subject.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ spec:
path: ./
interval: 20s
approvePlan: auto
remediation:
retries: 3
sourceRef:
kind: GitRepository
name: helloworld
Expand Down
Loading

0 comments on commit 7389304

Please sign in to comment.