diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index ebde3b211..57921ede3 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -826,6 +826,9 @@ spec: mirrorWeight: description: Weight of traffic to be mirrored type: number + primaryReadyThreshold: + description: Percentage of pods that need to be available to consider primary as ready + type: number match: description: A/B testing match conditions type: array diff --git a/charts/flagger/crds/crd.yaml b/charts/flagger/crds/crd.yaml index ebde3b211..57921ede3 100644 --- a/charts/flagger/crds/crd.yaml +++ b/charts/flagger/crds/crd.yaml @@ -826,6 +826,9 @@ spec: mirrorWeight: description: Weight of traffic to be mirrored type: number + primaryReadyThreshold: + description: Percentage of pods that need to be available to consider primary as ready + type: number match: description: A/B testing match conditions type: array diff --git a/docs/gitbook/usage/how-it-works.md b/docs/gitbook/usage/how-it-works.md index d2a77c525..88f7baec9 100644 --- a/docs/gitbook/usage/how-it-works.md +++ b/docs/gitbook/usage/how-it-works.md @@ -335,6 +335,10 @@ Spec: # total number of iterations # used for A/B Testing and Blue/Green iterations: + # threshold of primary pods that need to be available to consider it ready + # before starting rollout. this is optional and the default is 100 + # percentage (0-100) + primaryReadyThreshold: 100 # canary match conditions # used for A/B Testing match: diff --git a/kustomize/base/flagger/crd.yaml b/kustomize/base/flagger/crd.yaml index ebde3b211..57921ede3 100644 --- a/kustomize/base/flagger/crd.yaml +++ b/kustomize/base/flagger/crd.yaml @@ -826,6 +826,9 @@ spec: mirrorWeight: description: Weight of traffic to be mirrored type: number + primaryReadyThreshold: + description: Percentage of pods that need to be available to consider primary as ready + type: number match: description: A/B testing match conditions type: array diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 8c8c2be2e..3d0a6c9cc 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -29,6 +29,7 @@ const ( CanaryKind = "Canary" ProgressDeadlineSeconds = 600 AnalysisInterval = 60 * time.Second + PrimaryReadyThreshold = 100 MetricInterval = "1m" ) @@ -229,6 +230,9 @@ type CanaryAnalysis struct { // Max number of failed checks before the canary is terminated Threshold int `json:"threshold"` + // Percentage of pods that need to be available to consider primary as ready + PrimaryReadyThreshold *int `json:"primaryReadyThreshold,omitempty"` + // Alert list for this canary analysis Alerts []CanaryAlert `json:"alerts,omitempty"` @@ -440,6 +444,14 @@ func (c *Canary) GetAnalysisThreshold() int { return 1 } +// GetAnalysisPrimaryReadyThreshold returns the canary primaryReadyThreshold (default 100) +func (c *Canary) GetAnalysisPrimaryReadyThreshold() int { + if c.GetAnalysis().PrimaryReadyThreshold != nil { + return *c.GetAnalysis().PrimaryReadyThreshold + } + return PrimaryReadyThreshold +} + // GetMetricInterval returns the metric interval default value (1m) func (c *Canary) GetMetricInterval() string { return MetricInterval diff --git a/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go b/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go index 28fa6465b..8bd1a4c30 100644 --- a/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go @@ -202,6 +202,11 @@ func (in *CanaryAnalysis) DeepCopyInto(out *CanaryAnalysis) { *out = make([]int, len(*in)) copy(*out, *in) } + if in.PrimaryReadyThreshold != nil { + in, out := &in.PrimaryReadyThreshold, &out.PrimaryReadyThreshold + *out = new(int) + **out = **in + } if in.Alerts != nil { in, out := &in.Alerts, &out.Alerts *out = make([]CanaryAlert, len(*in)) diff --git a/pkg/canary/daemonset_ready.go b/pkg/canary/daemonset_ready.go index 39890967a..d31625785 100644 --- a/pkg/canary/daemonset_ready.go +++ b/pkg/canary/daemonset_ready.go @@ -36,7 +36,7 @@ func (c *DaemonSetController) IsPrimaryReady(cd *flaggerv1.Canary) error { return fmt.Errorf("daemonset %s.%s get query error: %w", primaryName, cd.Namespace, err) } - _, err = c.isDaemonSetReady(cd, primary) + _, err = c.isDaemonSetReady(cd, primary, cd.GetAnalysisPrimaryReadyThreshold()) if err != nil { return fmt.Errorf("primary daemonset %s.%s not ready: %w", primaryName, cd.Namespace, err) } @@ -52,7 +52,7 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) return true, fmt.Errorf("daemonset %s.%s get query error: %w", targetName, cd.Namespace, err) } - retryable, err := c.isDaemonSetReady(cd, canary) + retryable, err := c.isDaemonSetReady(cd, canary, 100) if err != nil { return retryable, fmt.Errorf("canary damonset %s.%s not ready with retryable %v: %w", targetName, cd.Namespace, retryable, err) @@ -62,11 +62,14 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) // isDaemonSetReady determines if a daemonset is ready by checking the number of old version daemons // reference: https://github.com/kubernetes/kubernetes/blob/5232ad4a00ec93942d0b2c6359ee6cd1201b46bc/pkg/kubectl/rollout_status.go#L110 -func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet) (bool, error) { +func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet, readyThreshold int) (bool, error) { if daemonSet.Generation <= daemonSet.Status.ObservedGeneration { + readyThresholdRatio := float32(readyThreshold) / float32(100) + // calculate conditions newCond := daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled - availableCond := daemonSet.Status.NumberAvailable < daemonSet.Status.DesiredNumberScheduled + readyThresholdDesiredReplicas := int32(float32(daemonSet.Status.DesiredNumberScheduled) * readyThresholdRatio) + availableCond := daemonSet.Status.NumberAvailable < readyThresholdDesiredReplicas if !newCond && !availableCond { return true, nil } @@ -83,8 +86,8 @@ func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet * return true, fmt.Errorf("waiting for rollout to finish: %d out of %d new pods have been updated", daemonSet.Status.UpdatedNumberScheduled, daemonSet.Status.DesiredNumberScheduled) } else if availableCond { - return true, fmt.Errorf("waiting for rollout to finish: %d of %d updated pods are available", - daemonSet.Status.NumberAvailable, daemonSet.Status.DesiredNumberScheduled) + return true, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated pods are available", + daemonSet.Status.NumberAvailable, readyThresholdDesiredReplicas, readyThreshold) } } return true, fmt.Errorf("waiting for rollout to finish: observed daemonset generation less than desired generation") diff --git a/pkg/canary/daemonset_ready_test.go b/pkg/canary/daemonset_ready_test.go index 5d214852c..6864486db 100644 --- a/pkg/canary/daemonset_ready_test.go +++ b/pkg/canary/daemonset_ready_test.go @@ -48,7 +48,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) { // observed generation is less than desired generation ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}} ds.Status.ObservedGeneration-- - retryable, err := mocks.controller.isDaemonSetReady(cd, ds) + retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 100) require.Error(t, err) require.True(t, retryable) @@ -58,7 +58,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) { DesiredNumberScheduled: 1, NumberAvailable: 1, }} - retryable, err = mocks.controller.isDaemonSetReady(cd, ds) + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100) require.NoError(t, err) require.True(t, retryable) @@ -69,7 +69,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) { }} cd.Status.LastTransitionTime = metav1.Now() cd.Spec.ProgressDeadlineSeconds = int32p(-1e6) - retryable, err = mocks.controller.isDaemonSetReady(cd, ds) + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100) require.Error(t, err) require.False(t, retryable) @@ -80,7 +80,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) { NumberAvailable: 1, }} cd.Spec.ProgressDeadlineSeconds = int32p(1e6) - retryable, err = mocks.controller.isDaemonSetReady(cd, ds) + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100) require.Error(t, err) require.True(t, retryable) require.True(t, strings.Contains(err.Error(), "new pods")) @@ -91,7 +91,51 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) { DesiredNumberScheduled: 1, NumberAvailable: 0, }} - retryable, err = mocks.controller.isDaemonSetReady(cd, ds) + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100) + require.Error(t, err) + require.True(t, retryable) + require.True(t, strings.Contains(err.Error(), "available")) +} + +func TestDaemonSetController_isDaemonSetReady_readyThreshold(t *testing.T) { + dc := daemonsetConfigs{name: "podinfo", label: "name", labelValue: "podinfo"} + mocks := newDaemonSetFixture(dc) + cd := &flaggerv1.Canary{} + + // observed generation is less than desired generation + ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}} + ds.Status.ObservedGeneration-- + retryable, err := mocks.controller.isDaemonSetReady(cd, ds) + require.Error(t, err) + require.True(t, retryable) + + // succeeded + ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{ + UpdatedNumberScheduled: 1, + DesiredNumberScheduled: 1, + NumberAvailable: 1, + }} + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50) + require.NoError(t, err) + require.True(t, retryable) + + // waiting for updated ones to be available, 50% available, ok + ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{ + UpdatedNumberScheduled: 1, + DesiredNumberScheduled: 4, + NumberAvailable: 2, + }} + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50) + require.NoError(t, err) + require.True(t, retryable) + + // waiting for updated ones to be available, less than 50% available + ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{ + UpdatedNumberScheduled: 1, + DesiredNumberScheduled: 4, + NumberAvailable: 1, + }} + retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50) require.Error(t, err) require.True(t, retryable) require.True(t, strings.Contains(err.Error(), "available")) diff --git a/pkg/canary/deployment_ready.go b/pkg/canary/deployment_ready.go index 45a87c4d7..b3dc78f2b 100644 --- a/pkg/canary/deployment_ready.go +++ b/pkg/canary/deployment_ready.go @@ -37,7 +37,7 @@ func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) error { return fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err) } - _, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds()) + _, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold()) if err != nil { return fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err) } @@ -59,7 +59,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err) } - retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds()) + retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), 100) if err != nil { return retryable, fmt.Errorf( "canary deployment %s.%s not ready: %w", @@ -71,7 +71,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error) // isDeploymentReady determines if a deployment is ready by checking the status conditions // if a deployment has exceeded the progress deadline it returns a non retriable error -func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int) (bool, error) { +func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) { retriable := true if deployment.Generation <= deployment.Status.ObservedGeneration { progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing) @@ -86,6 +86,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, } } + readyThresholdRatio := float32(readyThreshold) / float32(100) + readyThresholdUpdatedReplicas = int32(float32(deployment.Status.UpdatedReplicas) * readyThresholdRatio) + if progress != nil && progress.Reason == "ProgressDeadlineExceeded" { return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName()) } else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas { @@ -94,9 +97,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, } else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas { return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination", deployment.Status.Replicas-deployment.Status.UpdatedReplicas) - } else if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas { - return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d updated replicas are available", - deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas) + } else if deployment.Status.AvailableReplicas < readyThresholdUpdatedReplicas { + return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available", + deployment.Status.AvailableReplicas, readyThresholdUpdatedReplicas, readyThreshold) } } else { return true, fmt.Errorf( diff --git a/pkg/canary/deployment_ready_test.go b/pkg/canary/deployment_ready_test.go index 9dfbc47fb..8719ee113 100644 --- a/pkg/canary/deployment_ready_test.go +++ b/pkg/canary/deployment_ready_test.go @@ -45,7 +45,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { // observed generation is less than desired generation dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}} - retryable, err := mocks.controller.isDeploymentReady(dp, 0) + retryable, err := mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, retryable) assert.True(t, strings.Contains(err.Error(), "generation")) @@ -57,7 +57,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { ReadyReplicas: 1, AvailableReplicas: 1, }} - retryable, err = mocks.controller.isDeploymentReady(dp, 0) + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.NoError(t, err) assert.True(t, retryable) @@ -67,7 +67,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { }, Spec: appsv1.DeploymentSpec{ Replicas: int32p(2), }} - _, err = mocks.controller.isDeploymentReady(dp, 0) + _, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "new replicas")) @@ -76,7 +76,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { UpdatedReplicas: 1, Replicas: 2, }} - _, err = mocks.controller.isDeploymentReady(dp, 0) + _, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "termination")) @@ -85,7 +85,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { UpdatedReplicas: 2, AvailableReplicas: 1, }} - _, err = mocks.controller.isDeploymentReady(dp, 0) + _, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "available")) @@ -94,7 +94,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Reason: "ProgressDeadlineExceeded"}}, }} - retryable, err = mocks.controller.isDeploymentReady(dp, 0) + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100) assert.Error(t, err) assert.False(t, retryable) assert.True(t, strings.Contains(err.Error(), "exceeded")) @@ -113,7 +113,48 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) { UpdatedReplicas: 2, AvailableReplicas: 1, }} - retryable, err = mocks.controller.isDeploymentReady(dp, 5) + retryable, err = mocks.controller.isDeploymentReady(dp, 5, 100) assert.Error(t, err) assert.False(t, retryable) } + +func TestDeploymentController_isDeploymentReady_readyThreshold(t *testing.T) { + dc := deploymentConfigs{name: "podinfo", label: "name", labelValue: "podinfo"} + mocks := newDeploymentFixture(dc) + + // observed generation is less than desired generation + dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}} + retryable, err := mocks.controller.isDeploymentReady(dp, 0, 50) + assert.Error(t, err) + assert.True(t, retryable) + assert.True(t, strings.Contains(err.Error(), "generation")) + + // ok + dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{ + Replicas: 1, + UpdatedReplicas: 1, + ReadyReplicas: 1, + AvailableReplicas: 1, + }} + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50) + assert.NoError(t, err) + assert.True(t, retryable) + + // waiting for updated ones to be available, 50% is available, ok + dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 4, + AvailableReplicas: 2, + }} + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50) + assert.NoError(t, err) + assert.True(t, retryable) + + // waiting for updated ones to be available, less than 50% available + dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 4, + AvailableReplicas: 1, + }} + retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50) + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "available")) +}