Skip to content

Commit

Permalink
[feat] primaryReadyThreshold: allow configuring threshold for primary
Browse files Browse the repository at this point in the history
see fluxcd#639

Signed-off-by: Mahdi Dibaiee <mdibaiee@pm.me>
  • Loading branch information
mdibaiee committed Nov 11, 2021
1 parent 9c7db58 commit c9eebe8
Show file tree
Hide file tree
Showing 10 changed files with 145 additions and 24 deletions.
3 changes: 3 additions & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array
Expand Down
3 changes: 3 additions & 0 deletions charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array
Expand Down
4 changes: 4 additions & 0 deletions docs/gitbook/usage/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ Spec:
# total number of iterations
# used for A/B Testing and Blue/Green
iterations:
# threshold of primary pods that need to be available to consider it ready
# before starting rollout. this is optional and the default is 100
# percentage (0-100)
primaryReadyThreshold: 100
# canary match conditions
# used for A/B Testing
match:
Expand Down
3 changes: 3 additions & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array
Expand Down
12 changes: 12 additions & 0 deletions pkg/apis/flagger/v1beta1/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const (
CanaryKind = "Canary"
ProgressDeadlineSeconds = 600
AnalysisInterval = 60 * time.Second
PrimaryReadyThreshold = 100
MetricInterval = "1m"
)

Expand Down Expand Up @@ -229,6 +230,9 @@ type CanaryAnalysis struct {
// Max number of failed checks before the canary is terminated
Threshold int `json:"threshold"`

// Percentage of pods that need to be available to consider primary as ready
PrimaryReadyThreshold *int `json:"primaryReadyThreshold,omitempty"`

// Alert list for this canary analysis
Alerts []CanaryAlert `json:"alerts,omitempty"`

Expand Down Expand Up @@ -440,6 +444,14 @@ func (c *Canary) GetAnalysisThreshold() int {
return 1
}

// GetAnalysisPrimaryReadyThreshold returns the canary primaryReadyThreshold (default 100)
func (c *Canary) GetAnalysisPrimaryReadyThreshold() int {
if c.GetAnalysis().PrimaryReadyThreshold != nil {
return *c.GetAnalysis().PrimaryReadyThreshold
}
return PrimaryReadyThreshold
}

// GetMetricInterval returns the metric interval default value (1m)
func (c *Canary) GetMetricInterval() string {
return MetricInterval
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 9 additions & 6 deletions pkg/canary/daemonset_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func (c *DaemonSetController) IsPrimaryReady(cd *flaggerv1.Canary) error {
return fmt.Errorf("daemonset %s.%s get query error: %w", primaryName, cd.Namespace, err)
}

_, err = c.isDaemonSetReady(cd, primary)
_, err = c.isDaemonSetReady(cd, primary, cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return fmt.Errorf("primary daemonset %s.%s not ready: %w", primaryName, cd.Namespace, err)
}
Expand All @@ -52,7 +52,7 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
return true, fmt.Errorf("daemonset %s.%s get query error: %w", targetName, cd.Namespace, err)
}

retryable, err := c.isDaemonSetReady(cd, canary)
retryable, err := c.isDaemonSetReady(cd, canary, 100)
if err != nil {
return retryable, fmt.Errorf("canary damonset %s.%s not ready with retryable %v: %w",
targetName, cd.Namespace, retryable, err)
Expand All @@ -62,11 +62,14 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)

// isDaemonSetReady determines if a daemonset is ready by checking the number of old version daemons
// reference: https://github.com/kubernetes/kubernetes/blob/5232ad4a00ec93942d0b2c6359ee6cd1201b46bc/pkg/kubectl/rollout_status.go#L110
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet) (bool, error) {
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet, readyThreshold int) (bool, error) {
if daemonSet.Generation <= daemonSet.Status.ObservedGeneration {
readyThresholdRatio := float32(readyThreshold) / float32(100)

// calculate conditions
newCond := daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled
availableCond := daemonSet.Status.NumberAvailable < daemonSet.Status.DesiredNumberScheduled
readyThresholdDesiredReplicas := int32(float32(daemonSet.Status.DesiredNumberScheduled) * readyThresholdRatio)
availableCond := daemonSet.Status.NumberAvailable < readyThresholdDesiredReplicas
if !newCond && !availableCond {
return true, nil
}
Expand All @@ -83,8 +86,8 @@ func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *
return true, fmt.Errorf("waiting for rollout to finish: %d out of %d new pods have been updated",
daemonSet.Status.UpdatedNumberScheduled, daemonSet.Status.DesiredNumberScheduled)
} else if availableCond {
return true, fmt.Errorf("waiting for rollout to finish: %d of %d updated pods are available",
daemonSet.Status.NumberAvailable, daemonSet.Status.DesiredNumberScheduled)
return true, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated pods are available",
daemonSet.Status.NumberAvailable, readyThresholdDesiredReplicas, readyThreshold)
}
}
return true, fmt.Errorf("waiting for rollout to finish: observed daemonset generation less than desired generation")
Expand Down
54 changes: 49 additions & 5 deletions pkg/canary/daemonset_ready_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
// observed generation is less than desired generation
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}}
ds.Status.ObservedGeneration--
retryable, err := mocks.controller.isDaemonSetReady(cd, ds)
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)

Expand All @@ -58,7 +58,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
DesiredNumberScheduled: 1,
NumberAvailable: 1,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.NoError(t, err)
require.True(t, retryable)

Expand All @@ -69,7 +69,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
}}
cd.Status.LastTransitionTime = metav1.Now()
cd.Spec.ProgressDeadlineSeconds = int32p(-1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.False(t, retryable)

Expand All @@ -80,7 +80,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
NumberAvailable: 1,
}}
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "new pods"))
Expand All @@ -91,7 +91,51 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
DesiredNumberScheduled: 1,
NumberAvailable: 0,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "available"))
}

func TestDaemonSetController_isDaemonSetReady_readyThreshold(t *testing.T) {
dc := daemonsetConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
mocks := newDaemonSetFixture(dc)
cd := &flaggerv1.Canary{}

// observed generation is less than desired generation
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}}
ds.Status.ObservedGeneration--
retryable, err := mocks.controller.isDaemonSetReady(cd, ds)
require.Error(t, err)
require.True(t, retryable)

// succeeded
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 1,
DesiredNumberScheduled: 1,
NumberAvailable: 1,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.NoError(t, err)
require.True(t, retryable)

// waiting for updated ones to be available, 50% available, ok
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 1,
DesiredNumberScheduled: 4,
NumberAvailable: 2,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.NoError(t, err)
require.True(t, retryable)

// waiting for updated ones to be available, less than 50% available
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 1,
DesiredNumberScheduled: 4,
NumberAvailable: 1,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "available"))
Expand Down
15 changes: 9 additions & 6 deletions pkg/canary/deployment_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) error {
return fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err)
}

_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds())
_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err)
}
Expand All @@ -59,7 +59,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err)
}

retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds())
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), 100)
if err != nil {
return retryable, fmt.Errorf(
"canary deployment %s.%s not ready: %w",
Expand All @@ -71,7 +71,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)

// isDeploymentReady determines if a deployment is ready by checking the status conditions
// if a deployment has exceeded the progress deadline it returns a non retriable error
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int) (bool, error) {
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) {
retriable := true
if deployment.Generation <= deployment.Status.ObservedGeneration {
progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing)
Expand All @@ -86,6 +86,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
}
}

readyThresholdRatio := float32(readyThreshold) / float32(100)
readyThresholdUpdatedReplicas = int32(float32(deployment.Status.UpdatedReplicas) * readyThresholdRatio)

if progress != nil && progress.Reason == "ProgressDeadlineExceeded" {
return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName())
} else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas {
Expand All @@ -94,9 +97,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
} else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination",
deployment.Status.Replicas-deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d updated replicas are available",
deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < readyThresholdUpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available",
deployment.Status.AvailableReplicas, readyThresholdUpdatedReplicas, readyThreshold)
}
} else {
return true, fmt.Errorf(
Expand Down
55 changes: 48 additions & 7 deletions pkg/canary/deployment_ready_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {

// observed generation is less than desired generation
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
retryable, err := mocks.controller.isDeploymentReady(dp, 0)
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, retryable)
assert.True(t, strings.Contains(err.Error(), "generation"))
Expand All @@ -57,7 +57,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
ReadyReplicas: 1,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.NoError(t, err)
assert.True(t, retryable)

Expand All @@ -67,7 +67,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
}, Spec: appsv1.DeploymentSpec{
Replicas: int32p(2),
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "new replicas"))

Expand All @@ -76,7 +76,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 1,
Replicas: 2,
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "termination"))

Expand All @@ -85,7 +85,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 2,
AvailableReplicas: 1,
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "available"))

Expand All @@ -94,7 +94,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
Conditions: []appsv1.DeploymentCondition{
{Type: appsv1.DeploymentProgressing, Reason: "ProgressDeadlineExceeded"}},
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.False(t, retryable)
assert.True(t, strings.Contains(err.Error(), "exceeded"))
Expand All @@ -113,7 +113,48 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 2,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 5)
retryable, err = mocks.controller.isDeploymentReady(dp, 5, 100)
assert.Error(t, err)
assert.False(t, retryable)
}

func TestDeploymentController_isDeploymentReady_readyThreshold(t *testing.T) {
dc := deploymentConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
mocks := newDeploymentFixture(dc)

// observed generation is less than desired generation
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 50)
assert.Error(t, err)
assert.True(t, retryable)
assert.True(t, strings.Contains(err.Error(), "generation"))

// ok
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
Replicas: 1,
UpdatedReplicas: 1,
ReadyReplicas: 1,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.NoError(t, err)
assert.True(t, retryable)

// waiting for updated ones to be available, 50% is available, ok
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
UpdatedReplicas: 4,
AvailableReplicas: 2,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.NoError(t, err)
assert.True(t, retryable)

// waiting for updated ones to be available, less than 50% available
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
UpdatedReplicas: 4,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "available"))
}

0 comments on commit c9eebe8

Please sign in to comment.