From eb8a8a703c6d421f2616fe5a8de3d3a9c92fe033 Mon Sep 17 00:00:00 2001 From: qingliu Date: Fri, 2 Aug 2024 12:58:45 +0800 Subject: [PATCH] fix(taskrun): handle TaskRun failure when Pod OOM occurs fix #8170 When an OOM occurs in a Pod related to TaskRun, the TaskRun should be marked as failed immediately instead of waiting for it to timeout. --- pkg/pod/status.go | 4 +++- pkg/pod/status_test.go | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/pod/status.go b/pkg/pod/status.go index d823f229f51..6354f9aaec1 100644 --- a/pkg/pod/status.go +++ b/pkg/pod/status.go @@ -125,7 +125,9 @@ func MakeTaskRunStatus(ctx context.Context, logger *zap.SugaredLogger, tr v1.Tas sortPodContainerStatuses(pod.Status.ContainerStatuses, pod.Spec.Containers) - complete := areContainersCompleted(ctx, pod) || pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed + complete := areContainersCompleted(ctx, pod) || + pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed || + DidTaskRunFail(pod) if complete { onError, ok := tr.Annotations[v1.PipelineTaskOnErrorAnnotation] diff --git a/pkg/pod/status_test.go b/pkg/pod/status_test.go index e11b9946208..48af5fc344d 100644 --- a/pkg/pod/status_test.go +++ b/pkg/pod/status_test.go @@ -832,7 +832,7 @@ func TestMakeTaskRunStatus(t *testing.T) { }}, }, want: v1.TaskRunStatus{ - Status: statusRunning(), + Status: statusFailure(v1.TaskRunReasonFailed.String(), "\"step-state-name\" exited with code 123"), TaskRunStatusFields: v1.TaskRunStatusFields{ Steps: []v1.StepState{{ ContainerState: corev1.ContainerState{ @@ -843,6 +843,8 @@ func TestMakeTaskRunStatus(t *testing.T) { Container: "step-state-name", }}, Sidecars: []v1.SidecarState{}, + // We don't actually care about the time, just that it's not nil + CompletionTime: &metav1.Time{Time: time.Now()}, }, }, }, { @@ -866,7 +868,7 @@ func TestMakeTaskRunStatus(t *testing.T) { }}, }, want: v1.TaskRunStatus{ - Status: statusRunning(), + Status: statusFailure(v1.TaskRunReasonFailed.String(), "\"step-state-name\" exited with code 123"), TaskRunStatusFields: v1.TaskRunStatusFields{ Steps: []v1.StepState{{ ContainerState: corev1.ContainerState{ @@ -878,6 +880,8 @@ func TestMakeTaskRunStatus(t *testing.T) { ImageID: "image-id", }}, Sidecars: []v1.SidecarState{}, + // We don't actually care about the time, just that it's not nil + CompletionTime: &metav1.Time{Time: time.Now()}, }, }, }, {