From c8051109fb67a3c4074e0fa216aa5f45dd1f1d4a Mon Sep 17 00:00:00 2001 From: narasux Date: Fri, 19 Jul 2024 11:33:50 +0800 Subject: [PATCH 1/4] fix: hook pod gone away may block new deployment --- .../pkg/controllers/bkapp/deploy_action.go | 5 +-- operator/pkg/controllers/bkapp/hooks/hooks.go | 38 +++++++++++++------ .../pkg/controllers/bkapp/hooks/hooks_test.go | 4 +- .../bkapp/processes/resources/deployment.go | 2 +- 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/operator/pkg/controllers/bkapp/deploy_action.go b/operator/pkg/controllers/bkapp/deploy_action.go index af9b79713f..7c2144a772 100644 --- a/operator/pkg/controllers/bkapp/deploy_action.go +++ b/operator/pkg/controllers/bkapp/deploy_action.go @@ -77,9 +77,8 @@ func (r *DeployActionReconciler) Reconcile(ctx context.Context, bkapp *paasv1alp // If this is not the initial deploy action, check if there is any preceding running hooks, // wait for these hooks by return an error to delay for another reconcile cycle. // - // TODO: Should we remove this logic and allow every new deploy action to start even the - // hook triggered by older deploy is not finished yet? - if bkapp.Status.DeployId != "" { + // If hook already turned off, validateNoRunningHooks will not execute (disregarding previous hooks). + if bkapp.Spec.Hooks != nil && bkapp.Status.DeployId != "" { if err = r.validateNoRunningHooks(ctx, bkapp); err != nil { return r.Result.WithError(err) } diff --git a/operator/pkg/controllers/bkapp/hooks/hooks.go b/operator/pkg/controllers/bkapp/hooks/hooks.go index 9e54e0c9c9..b463e504bd 100644 --- a/operator/pkg/controllers/bkapp/hooks/hooks.go +++ b/operator/pkg/controllers/bkapp/hooks/hooks.go @@ -65,7 +65,7 @@ func (r *HookReconciler) Reconcile(ctx context.Context, bkapp *paasv1alpha2.BkAp log.V(1).Info("handling pre-release-hook reconciliation") if current.Pod != nil { - if err := r.UpdateStatus(ctx, bkapp, current, hookres.HookExecuteTimeoutThreshold); err != nil { + if err := r.UpdateStatus(bkapp, current, hookres.HookExecuteTimeoutThreshold); err != nil { return r.Result.WithError(err) } @@ -87,7 +87,6 @@ func (r *HookReconciler) Reconcile(ctx context.Context, bkapp *paasv1alpha2.BkAp return r.Result.End() case current.Progressing(): // 当 Hook 执行成功或失败时会由 owned pod 触发新的调和循环, 因此只需要通过 Requeue 处理超时事件即可 - return r.Result.Requeue(hookres.HookExecuteTimeoutThreshold) case current.Succeeded(): return r.Result @@ -139,11 +138,18 @@ func (r *HookReconciler) Reconcile(ctx context.Context, bkapp *paasv1alpha2.BkAp // 获取应用当前在集群中的状态 func (r *HookReconciler) getCurrentState(ctx context.Context, bkapp *paasv1alpha2.BkApp) hookres.HookInstance { pod := corev1.Pod{} - err := r.Client.Get(ctx, types.NamespacedName{Name: names.PreReleaseHook(bkapp), Namespace: bkapp.Namespace}, &pod) - if err != nil { + key := types.NamespacedName{Namespace: bkapp.Namespace, Name: names.PreReleaseHook(bkapp)} + if err := r.Client.Get(ctx, key, &pod); err != nil { return hookres.HookInstance{ - Pod: nil, - Status: nil, + Pod: nil, + Status: &paasv1alpha2.HookStatus{ + Type: paasv1alpha2.HookPreRelease, + Started: lo.ToPtr(false), + StartTime: nil, + Phase: paasv1alpha2.HealthUnknown, + Reason: "Failed", + Message: "PreReleaseHook not found, consider it is failed", + }, } } @@ -213,7 +219,6 @@ func (r *HookReconciler) ExecuteHook( // UpdateStatus will update bkapp hook status from the given instance status func (r *HookReconciler) UpdateStatus( - ctx context.Context, bkapp *paasv1alpha2.BkApp, instance hookres.HookInstance, timeoutThreshold time.Duration, @@ -228,6 +233,19 @@ func (r *HookReconciler) UpdateStatus( observedGeneration = bkapp.Generation } + // 若 Hook Pod 不存在,则应该判定 Hook 执行失败 + if instance.Pod == nil { + apimeta.SetStatusCondition(&bkapp.Status.Conditions, metav1.Condition{ + Type: paasv1alpha2.HooksFinished, + Status: metav1.ConditionFalse, + Reason: instance.Status.Reason, + Message: instance.Status.Message, + ObservedGeneration: observedGeneration, + }) + r.updateAppProgressingStatus(bkapp, metav1.ConditionFalse) + return nil + } + switch { case instance.TimeoutExceededProgressing(timeoutThreshold): bkapp.Status.Phase = paasv1alpha2.AppFailed @@ -325,11 +343,7 @@ func CheckAndUpdatePreReleaseHookStatus( r := NewHookReconciler(cli) instance := r.getCurrentState(ctx, bkapp) - if instance.Pod == nil { - return false, errors.New("pre-release-hook not found") - } - - if err = r.UpdateStatus(ctx, bkapp, instance, timeout); err != nil { + if err = r.UpdateStatus(bkapp, instance, timeout); err != nil { return false, err } diff --git a/operator/pkg/controllers/bkapp/hooks/hooks_test.go b/operator/pkg/controllers/bkapp/hooks/hooks_test.go index 314c26f846..29ed4be79e 100644 --- a/operator/pkg/controllers/bkapp/hooks/hooks_test.go +++ b/operator/pkg/controllers/bkapp/hooks/hooks_test.go @@ -173,7 +173,9 @@ var _ = Describe("Test HookReconciler", func() { ) Expect(finished).To(BeFalse()) - Expect(err.Error()).To(Equal("pre-release-hook not found")) + Expect( + err.Error(), + ).To(Equal("hook failed with: PreReleaseHook not found, consider it is failed: pod ends unsuccessfully")) }) It("Pod Execute Failed", func() { diff --git a/operator/pkg/controllers/bkapp/processes/resources/deployment.go b/operator/pkg/controllers/bkapp/processes/resources/deployment.go index d4f0f0efb6..e75ff802f8 100644 --- a/operator/pkg/controllers/bkapp/processes/resources/deployment.go +++ b/operator/pkg/controllers/bkapp/processes/resources/deployment.go @@ -104,7 +104,7 @@ func BuildProcDeployment(app *paasv1alpha2.BkApp, procName string) (*appsv1.Depl } annotations := map[string]string{ paasv1alpha2.DeployIDAnnoKey: deployID, - paasv1alpha2.LastSyncedSerializedBkAppAnnoKey: string(bkAppJson), + paasv1alpha2.LastSyncedSerializedBkAppAnnoKey: bkAppJson, } deployment := &appsv1.Deployment{ From 21936ac6c45b9af2f109fbd9aa9e808fcf9c3766 Mon Sep 17 00:00:00 2001 From: narasux Date: Wed, 24 Jul 2024 10:27:39 +0800 Subject: [PATCH 2/4] minor: resolve #1482 conversations --- operator/pkg/controllers/bkapp/hooks/hooks.go | 6 ++++-- operator/pkg/controllers/bkapp/hooks/hooks_test.go | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/operator/pkg/controllers/bkapp/hooks/hooks.go b/operator/pkg/controllers/bkapp/hooks/hooks.go index b463e504bd..f6167c19a1 100644 --- a/operator/pkg/controllers/bkapp/hooks/hooks.go +++ b/operator/pkg/controllers/bkapp/hooks/hooks.go @@ -148,7 +148,7 @@ func (r *HookReconciler) getCurrentState(ctx context.Context, bkapp *paasv1alpha StartTime: nil, Phase: paasv1alpha2.HealthUnknown, Reason: "Failed", - Message: "PreReleaseHook not found, consider it is failed", + Message: "PreReleaseHook not found", }, } } @@ -235,6 +235,7 @@ func (r *HookReconciler) UpdateStatus( // 若 Hook Pod 不存在,则应该判定 Hook 执行失败 if instance.Pod == nil { + bkapp.Status.Phase = paasv1alpha2.AppFailed apimeta.SetStatusCondition(&bkapp.Status.Conditions, metav1.Condition{ Type: paasv1alpha2.HooksFinished, Status: metav1.ConditionFalse, @@ -348,12 +349,13 @@ func CheckAndUpdatePreReleaseHookStatus( } switch { - // 删除超时的 pod + // 删除超时的 Pod case instance.TimeoutExceededProgressing(timeout): if err = cli.Delete(ctx, instance.Pod); err != nil { return false, err } return false, errors.WithStack(hookres.ErrExecuteTimeout) + // 若 instance.Pod 为 nil,会判定为失败 case instance.Failed(): return false, errors.Wrapf( hookres.ErrPodEndsUnsuccessfully, diff --git a/operator/pkg/controllers/bkapp/hooks/hooks_test.go b/operator/pkg/controllers/bkapp/hooks/hooks_test.go index 29ed4be79e..fa165939d6 100644 --- a/operator/pkg/controllers/bkapp/hooks/hooks_test.go +++ b/operator/pkg/controllers/bkapp/hooks/hooks_test.go @@ -175,7 +175,7 @@ var _ = Describe("Test HookReconciler", func() { Expect(finished).To(BeFalse()) Expect( err.Error(), - ).To(Equal("hook failed with: PreReleaseHook not found, consider it is failed: pod ends unsuccessfully")) + ).To(Equal("hook failed with: PreReleaseHook not found: pod ends unsuccessfully")) }) It("Pod Execute Failed", func() { From c7492d3c96a586aa176ed74064f24d909dab2396 Mon Sep 17 00:00:00 2001 From: narasux Date: Wed, 24 Jul 2024 14:29:54 +0800 Subject: [PATCH 3/4] minor: resolve #1482 conversations --- operator/pkg/controllers/bkapp/hooks/hooks.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/operator/pkg/controllers/bkapp/hooks/hooks.go b/operator/pkg/controllers/bkapp/hooks/hooks.go index f6167c19a1..f30d580f56 100644 --- a/operator/pkg/controllers/bkapp/hooks/hooks.go +++ b/operator/pkg/controllers/bkapp/hooks/hooks.go @@ -233,9 +233,8 @@ func (r *HookReconciler) UpdateStatus( observedGeneration = bkapp.Generation } - // 若 Hook Pod 不存在,则应该判定 Hook 执行失败 + // 若 Hook Pod 不存在,则应该判定 Hook 执行失败,但不认为 bkapp 失败,因为可以通过后续调和循环重新创建 Hook Pod if instance.Pod == nil { - bkapp.Status.Phase = paasv1alpha2.AppFailed apimeta.SetStatusCondition(&bkapp.Status.Conditions, metav1.Condition{ Type: paasv1alpha2.HooksFinished, Status: metav1.ConditionFalse, From 634ef609eecb445edd50ec5228bcb54608ac82e8 Mon Sep 17 00:00:00 2001 From: narasux Date: Tue, 30 Jul 2024 16:31:51 +0800 Subject: [PATCH 4/4] minor: resolve #1482 conversations --- operator/pkg/controllers/bkapp/deploy_action.go | 2 +- operator/pkg/controllers/bkapp/hooks/hooks.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/operator/pkg/controllers/bkapp/deploy_action.go b/operator/pkg/controllers/bkapp/deploy_action.go index 7c2144a772..ae39db6c01 100644 --- a/operator/pkg/controllers/bkapp/deploy_action.go +++ b/operator/pkg/controllers/bkapp/deploy_action.go @@ -77,7 +77,7 @@ func (r *DeployActionReconciler) Reconcile(ctx context.Context, bkapp *paasv1alp // If this is not the initial deploy action, check if there is any preceding running hooks, // wait for these hooks by return an error to delay for another reconcile cycle. // - // If hook already turned off, validateNoRunningHooks will not execute (disregarding previous hooks). + // If hook already turned off, validateNoRunningHooks should not execute (disregarding previous hooks). if bkapp.Spec.Hooks != nil && bkapp.Status.DeployId != "" { if err = r.validateNoRunningHooks(ctx, bkapp); err != nil { return r.Result.WithError(err) diff --git a/operator/pkg/controllers/bkapp/hooks/hooks.go b/operator/pkg/controllers/bkapp/hooks/hooks.go index f30d580f56..0becce5419 100644 --- a/operator/pkg/controllers/bkapp/hooks/hooks.go +++ b/operator/pkg/controllers/bkapp/hooks/hooks.go @@ -148,7 +148,7 @@ func (r *HookReconciler) getCurrentState(ctx context.Context, bkapp *paasv1alpha StartTime: nil, Phase: paasv1alpha2.HealthUnknown, Reason: "Failed", - Message: "PreReleaseHook not found", + Message: lo.Ternary(apierrors.IsNotFound(err), "PreReleaseHook not found", err.Error()), }, } }