diff --git a/lib/kubernetes-deploy/kubernetes_resource/deployment.rb b/lib/kubernetes-deploy/kubernetes_resource/deployment.rb index 31d42d97f..fcdf34774 100644 --- a/lib/kubernetes-deploy/kubernetes_resource/deployment.rb +++ b/lib/kubernetes-deploy/kubernetes_resource/deployment.rb @@ -96,6 +96,16 @@ def validate_definition(_) private + def current_generation + return -2 unless exists? # different default than observed + @instance_data.dig('metadata', 'generation') + end + + def observed_generation + return -1 unless exists? # different default than current + @instance_data.dig('status', 'observedGeneration') + end + def desired_replicas return -1 unless exists? @instance_data["spec"]["replicas"].to_i @@ -133,12 +143,13 @@ def deploy_failing_to_progress? # Deployments were being updated prematurely with incorrect progress information # https://github.com/kubernetes/kubernetes/issues/49637 return false unless Time.now.utc - @deploy_started_at >= progress_deadline.to_i - else - return false unless deploy_started? end - progress_condition["status"] == 'False' && - Time.parse(progress_condition["lastUpdateTime"]).to_i >= (@deploy_started_at - 5.seconds).to_i + # This assumes that when the controller bumps the observed generation, it also updates/clears all the status + # conditions. Specifically, it assumes the progress condition is immediately set to True if a rollout is starting. + deploy_started? && + current_generation == observed_generation && + progress_condition["status"] == 'False' end def find_latest_rs(mediator) diff --git a/test/fixtures/for_unit_tests/deployment_test.yml b/test/fixtures/for_unit_tests/deployment_test.yml index 05eec0dc6..115202557 100644 --- a/test/fixtures/for_unit_tests/deployment_test.yml +++ b/test/fixtures/for_unit_tests/deployment_test.yml @@ -4,6 +4,7 @@ kind: Deployment metadata: name: web uid: foobar + generation: 2 annotations: "deployment.kubernetes.io/revision": "1" labels: diff --git a/test/unit/kubernetes-deploy/kubernetes_resource/deployment_test.rb b/test/unit/kubernetes-deploy/kubernetes_resource/deployment_test.rb index a9bc306b4..3b9eb34ea 100644 --- a/test/unit/kubernetes-deploy/kubernetes_resource/deployment_test.rb +++ b/test/unit/kubernetes-deploy/kubernetes_resource/deployment_test.rb @@ -267,33 +267,36 @@ def test_deploy_timed_out_based_on_progress_deadline Timecop.freeze do deployment_status = { "replicas" => 3, + "observedGeneration" => 2, "conditions" => [{ "type" => "Progressing", "status" => 'False', "lastUpdateTime" => Time.now.utc - 10.seconds, - "reason" => "Failed to progress" + "reason" => "ProgressDeadlineExceeded" }] } deploy = build_synced_deployment( template: build_deployment_template(status: deployment_status), replica_sets: [build_rs_template(status: { "replica" => 1 })] ) - deploy.deploy_started_at = Time.now.utc - 3.minutes + refute deploy.deploy_timed_out?, "Deploy not started shouldn't have timed out" + deploy.deploy_started_at = Time.now.utc - 3.minutes assert deploy.deploy_timed_out? - assert_equal "Timeout reason: Failed to progress\nLatest ReplicaSet: web-1", deploy.timeout_message.strip + assert_equal "Timeout reason: ProgressDeadlineExceeded\nLatest ReplicaSet: web-1", deploy.timeout_message.strip end end - def test_deploy_timed_out_based_on_progress_deadline_ignores_conditions_older_than_the_deploy + def test_deploy_timed_out_based_on_progress_deadline_ignores_statuses_for_older_generations Timecop.freeze do deployment_status = { "replicas" => 3, + "observedGeneration" => 1, # current generation is 2 "conditions" => [{ "type" => "Progressing", "status" => 'False', "lastUpdateTime" => Time.now.utc - 10.seconds, - "reason" => "Failed to progress" + "reason" => "ProgressDeadlineExceeded" }] } deploy = build_synced_deployment( @@ -301,14 +304,8 @@ def test_deploy_timed_out_based_on_progress_deadline_ignores_conditions_older_th replica_sets: [build_rs_template(status: { "replica" => 1 })] ) - deploy.deploy_started_at = nil # not started yet + deploy.deploy_started_at = Time.now.utc - 20.seconds refute deploy.deploy_timed_out? - - deploy.deploy_started_at = Time.now.utc - 4.seconds # 10s ago is before deploy started - refute deploy.deploy_timed_out? - - deploy.deploy_started_at = Time.now.utc - 5.seconds # 10s ago is "equal" to deploy time (fudge for clock skew) - assert deploy.deploy_timed_out? end end