Skip to content

Commit

Permalink
Merge pull request #593 from weaveworks/progressive-promotion
Browse files Browse the repository at this point in the history
Implement progressive promotion
  • Loading branch information
stefanprodan authored May 18, 2020
2 parents 0056b99 + be96a11 commit f5a3b9d
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 25 deletions.
5 changes: 4 additions & 1 deletion artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ spec:
description: Max traffic percentage routed to canary
type: number
stepWeight:
description: Incremental traffic percentage step
description: Incremental traffic percentage step for the analysis phase
type: number
stepWeightPromotion:
description: Incremental traffic percentage step for the promotion phase
type: number
mirror:
description: Mirror traffic to canary
Expand Down
5 changes: 4 additions & 1 deletion charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ spec:
description: Max traffic percentage routed to canary
type: number
stepWeight:
description: Incremental traffic percentage step
description: Incremental traffic percentage step for the analysis phase
type: number
stepWeightPromotion:
description: Incremental traffic percentage step for the promotion phase
type: number
mirror:
description: Mirror traffic to canary
Expand Down
9 changes: 8 additions & 1 deletion docs/gitbook/usage/deployment-strategies.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,16 @@ Spec:
# canary increment step
# percentage (0-100)
stepWeight: 2
# promotion increment step (default 100)
# percentage (0-100)
stepWeightPromotion: 100
# deploy straight to production without
# the metrics and webhook checks
skipAnalysis: false
```
The above analysis, if it succeeds, will run for 25 minutes while validating the HTTP metrics and webhooks every minute.
You can determine the minimum time that it takes to validate and promote a canary deployment using this formula:
You can determine the minimum time it takes to validate and promote a canary deployment using this formula:
```
interval * (maxWeight / stepWeight)
Expand All @@ -61,6 +64,10 @@ And the time it takes for a canary to be rollback when the metrics or webhook ch
interval * threshold
```

When `stepWeightPromotion` is specified, the promotion phase happens in stages,
the traffic is routed back to the primary pods in a progressive manner,
the primary weight is increased until it reaches 100%.

In emergency cases, you may want to skip the analysis phase and ship changes directly to production.
At any time you can set the `spec.skipAnalysis: true`.
When skip analysis is enabled, Flagger checks if the canary deployment is healthy and
Expand Down
3 changes: 3 additions & 0 deletions docs/gitbook/usage/how-it-works.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ Spec:
# canary increment step
# percentage (0-100)
stepWeight:
# promotion increment step
# percentage (0-100)
stepWeightPromotion:
# total number of iterations
# used for A/B Testing and Blue/Green
iterations:
Expand Down
5 changes: 4 additions & 1 deletion kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ spec:
description: Max traffic percentage routed to canary
type: number
stepWeight:
description: Incremental traffic percentage step
description: Incremental traffic percentage step for the analysis phase
type: number
stepWeightPromotion:
description: Incremental traffic percentage step for the promotion phase
type: number
mirror:
description: Mirror traffic to canary
Expand Down
6 changes: 5 additions & 1 deletion pkg/apis/flagger/v1beta1/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,14 @@ type CanaryAnalysis struct {
// +optional
MaxWeight int `json:"maxWeight,omitempty"`

// Incremental traffic percentage step
// Incremental traffic percentage step for analysis phase
// +optional
StepWeight int `json:"stepWeight,omitempty"`

// Incremental traffic percentage step for promotion phase
// +optional
StepWeightPromotion int `json:"stepWeightPromotion,omitempty"`

// Max number of failed checks before the canary is terminated
Threshold int `json:"threshold"`

Expand Down
77 changes: 60 additions & 17 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,23 +251,9 @@ func (c *Controller) advanceCanary(name string, namespace string) {
}
}

// route all traffic to primary if analysis has succeeded
// route traffic back to primary if analysis has succeeded
if cd.Status.Phase == flaggerv1.CanaryPhasePromoting {
if provider != "kubernetes" {
c.recordEventInfof(cd, "Routing all traffic to primary")
if err := meshRouter.SetRoutes(cd, 100, 0, false); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}
c.recorder.SetWeight(cd, 100, 0)
}

// update status phase
if err := canaryController.SetStatusPhase(cd, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}

c.runPromotionTrafficShift(cd, canaryController, meshRouter, provider, canaryWeight, primaryWeight)
return
}

Expand Down Expand Up @@ -332,7 +318,7 @@ func (c *Controller) advanceCanary(name string, namespace string) {
}

// use blue/green strategy for kubernetes provider
if provider == "kubernetes" {
if provider == flaggerv1.KubernetesProvider {
if len(cd.GetAnalysis().Match) > 0 {
c.recordEventWarningf(cd, "A/B testing is not supported when using the kubernetes provider")
cd.GetAnalysis().Match = nil
Expand Down Expand Up @@ -363,6 +349,63 @@ func (c *Controller) advanceCanary(name string, namespace string) {

}

func (c *Controller) runPromotionTrafficShift(canary *flaggerv1.Canary, canaryController canary.Controller,
meshRouter router.Interface, provider string, canaryWeight int, primaryWeight int) {
// finalize promotion since no traffic shifting is possible for Kubernetes CNI
if provider == flaggerv1.KubernetesProvider {
if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
return
}

// route all traffic to primary in one go when promotion step wight is not set
if canary.Spec.Analysis.StepWeightPromotion == 0 {
c.recordEventInfof(canary, "Routing all traffic to primary")
if err := meshRouter.SetRoutes(canary, 100, 0, false); err != nil {
c.recordEventWarningf(canary, "%v", err)
return
}
c.recorder.SetWeight(canary, 100, 0)
if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
return
}

// increment the primary traffic weight until it reaches 100%
if canaryWeight > 0 {
primaryWeight += canary.GetAnalysis().StepWeightPromotion
if primaryWeight > 100 {
primaryWeight = 100
}
canaryWeight -= canary.GetAnalysis().StepWeightPromotion
if canaryWeight < 0 {
canaryWeight = 0
}
if err := meshRouter.SetRoutes(canary, primaryWeight, canaryWeight, false); err != nil {
c.recordEventWarningf(canary, "%v", err)
return
}
c.recorder.SetWeight(canary, primaryWeight, canaryWeight)
c.recordEventInfof(canary, "Advance %s.%s primary weight %v", canary.Name, canary.Namespace, primaryWeight)

// finalize promotion
if primaryWeight == 100 {
if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
} else {
if err := canaryController.SetStatusWeight(canary, canaryWeight); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
}
}

return

}

func (c *Controller) runCanary(canary *flaggerv1.Canary, canaryController canary.Controller,
meshRouter router.Interface, mirrored bool, canaryWeight int, primaryWeight int, maxWeight int) {
primaryName := fmt.Sprintf("%s-primary", canary.Spec.TargetRef.Name)
Expand Down
11 changes: 8 additions & 3 deletions pkg/controller/scheduler_deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ func TestScheduler_DeploymentSkipAnalysis(t *testing.T) {
func TestScheduler_DeploymentAnalysisPhases(t *testing.T) {
cd := newDeploymentTestCanary()
cd.Spec.Analysis = &flaggerv1.CanaryAnalysis{
Interval: "1m",
StepWeight: 100,
Interval: "1m",
StepWeight: 100,
StepWeightPromotion: 50,
}
mocks := newDeploymentFixture(cd)

Expand Down Expand Up @@ -163,7 +164,11 @@ func TestScheduler_DeploymentAnalysisPhases(t *testing.T) {
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhaseProgressing))

// promoting
// start promotion
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhasePromoting))

// end promotion
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhasePromoting))

Expand Down

0 comments on commit f5a3b9d

Please sign in to comment.