Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement progressive promotion #593

Merged
merged 4 commits into from
May 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ spec:
description: Max traffic percentage routed to canary
type: number
stepWeight:
description: Incremental traffic percentage step
description: Incremental traffic percentage step for the analysis phase
type: number
stepWeightPromotion:
description: Incremental traffic percentage step for the promotion phase
type: number
mirror:
description: Mirror traffic to canary
Expand Down
5 changes: 4 additions & 1 deletion charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ spec:
description: Max traffic percentage routed to canary
type: number
stepWeight:
description: Incremental traffic percentage step
description: Incremental traffic percentage step for the analysis phase
type: number
stepWeightPromotion:
description: Incremental traffic percentage step for the promotion phase
type: number
mirror:
description: Mirror traffic to canary
Expand Down
9 changes: 8 additions & 1 deletion docs/gitbook/usage/deployment-strategies.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,16 @@ Spec:
# canary increment step
# percentage (0-100)
stepWeight: 2
# promotion increment step (default 100)
# percentage (0-100)
stepWeightPromotion: 100
# deploy straight to production without
# the metrics and webhook checks
skipAnalysis: false
```
The above analysis, if it succeeds, will run for 25 minutes while validating the HTTP metrics and webhooks every minute.
You can determine the minimum time that it takes to validate and promote a canary deployment using this formula:
You can determine the minimum time it takes to validate and promote a canary deployment using this formula:
```
interval * (maxWeight / stepWeight)
Expand All @@ -61,6 +64,10 @@ And the time it takes for a canary to be rollback when the metrics or webhook ch
interval * threshold
```

When `stepWeightPromotion` is specified, the promotion phase happens in stages,
the traffic is routed back to the primary pods in a progressive manner,
the primary weight is increased until it reaches 100%.

In emergency cases, you may want to skip the analysis phase and ship changes directly to production.
At any time you can set the `spec.skipAnalysis: true`.
When skip analysis is enabled, Flagger checks if the canary deployment is healthy and
Expand Down
3 changes: 3 additions & 0 deletions docs/gitbook/usage/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ Spec:
# canary increment step
# percentage (0-100)
stepWeight:
# promotion increment step
# percentage (0-100)
stepWeightPromotion:
# total number of iterations
# used for A/B Testing and Blue/Green
iterations:
Expand Down
5 changes: 4 additions & 1 deletion kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ spec:
description: Max traffic percentage routed to canary
type: number
stepWeight:
description: Incremental traffic percentage step
description: Incremental traffic percentage step for the analysis phase
type: number
stepWeightPromotion:
description: Incremental traffic percentage step for the promotion phase
type: number
mirror:
description: Mirror traffic to canary
Expand Down
6 changes: 5 additions & 1 deletion pkg/apis/flagger/v1beta1/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,14 @@ type CanaryAnalysis struct {
// +optional
MaxWeight int `json:"maxWeight,omitempty"`

// Incremental traffic percentage step
// Incremental traffic percentage step for analysis phase
// +optional
StepWeight int `json:"stepWeight,omitempty"`

// Incremental traffic percentage step for promotion phase
// +optional
StepWeightPromotion int `json:"stepWeightPromotion,omitempty"`

// Max number of failed checks before the canary is terminated
Threshold int `json:"threshold"`

Expand Down
77 changes: 60 additions & 17 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,23 +251,9 @@ func (c *Controller) advanceCanary(name string, namespace string) {
}
}

// route all traffic to primary if analysis has succeeded
// route traffic back to primary if analysis has succeeded
if cd.Status.Phase == flaggerv1.CanaryPhasePromoting {
if provider != "kubernetes" {
c.recordEventInfof(cd, "Routing all traffic to primary")
if err := meshRouter.SetRoutes(cd, 100, 0, false); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}
c.recorder.SetWeight(cd, 100, 0)
}

// update status phase
if err := canaryController.SetStatusPhase(cd, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}

c.runPromotionTrafficShift(cd, canaryController, meshRouter, provider, canaryWeight, primaryWeight)
return
}

Expand Down Expand Up @@ -332,7 +318,7 @@ func (c *Controller) advanceCanary(name string, namespace string) {
}

// use blue/green strategy for kubernetes provider
if provider == "kubernetes" {
if provider == flaggerv1.KubernetesProvider {
if len(cd.GetAnalysis().Match) > 0 {
c.recordEventWarningf(cd, "A/B testing is not supported when using the kubernetes provider")
cd.GetAnalysis().Match = nil
Expand Down Expand Up @@ -363,6 +349,63 @@ func (c *Controller) advanceCanary(name string, namespace string) {

}

func (c *Controller) runPromotionTrafficShift(canary *flaggerv1.Canary, canaryController canary.Controller,
meshRouter router.Interface, provider string, canaryWeight int, primaryWeight int) {
// finalize promotion since no traffic shifting is possible for Kubernetes CNI
if provider == flaggerv1.KubernetesProvider {
if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
return
}

// route all traffic to primary in one go when promotion step wight is not set
if canary.Spec.Analysis.StepWeightPromotion == 0 {
c.recordEventInfof(canary, "Routing all traffic to primary")
if err := meshRouter.SetRoutes(canary, 100, 0, false); err != nil {
c.recordEventWarningf(canary, "%v", err)
return
}
c.recorder.SetWeight(canary, 100, 0)
if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
return
}

// increment the primary traffic weight until it reaches 100%
if canaryWeight > 0 {
primaryWeight += canary.GetAnalysis().StepWeightPromotion
if primaryWeight > 100 {
primaryWeight = 100
}
canaryWeight -= canary.GetAnalysis().StepWeightPromotion
if canaryWeight < 0 {
canaryWeight = 0
}
if err := meshRouter.SetRoutes(canary, primaryWeight, canaryWeight, false); err != nil {
c.recordEventWarningf(canary, "%v", err)
return
}
c.recorder.SetWeight(canary, primaryWeight, canaryWeight)
c.recordEventInfof(canary, "Advance %s.%s primary weight %v", canary.Name, canary.Namespace, primaryWeight)

// finalize promotion
if primaryWeight == 100 {
if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
} else {
if err := canaryController.SetStatusWeight(canary, canaryWeight); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
}
}

return

}

func (c *Controller) runCanary(canary *flaggerv1.Canary, canaryController canary.Controller,
meshRouter router.Interface, mirrored bool, canaryWeight int, primaryWeight int, maxWeight int) {
primaryName := fmt.Sprintf("%s-primary", canary.Spec.TargetRef.Name)
Expand Down
11 changes: 8 additions & 3 deletions pkg/controller/scheduler_deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ func TestScheduler_DeploymentSkipAnalysis(t *testing.T) {
func TestScheduler_DeploymentAnalysisPhases(t *testing.T) {
cd := newDeploymentTestCanary()
cd.Spec.Analysis = &flaggerv1.CanaryAnalysis{
Interval: "1m",
StepWeight: 100,
Interval: "1m",
StepWeight: 100,
StepWeightPromotion: 50,
}
mocks := newDeploymentFixture(cd)

Expand Down Expand Up @@ -163,7 +164,11 @@ func TestScheduler_DeploymentAnalysisPhases(t *testing.T) {
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhaseProgressing))

// promoting
// start promotion
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhasePromoting))

// end promotion
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhasePromoting))

Expand Down