diff --git a/controllers/runner_controller.go b/controllers/runner_controller.go index 6cb9a9cbd1..90dda03a68 100644 --- a/controllers/runner_controller.go +++ b/controllers/runner_controller.go @@ -207,6 +207,24 @@ func runnerPodOrContainerIsStopped(pod *corev1.Pod) bool { return stopped } +func ephemeralRunnerContainerStatus(pod *corev1.Pod) *corev1.ContainerStatus { + if getRunnerEnv(pod, "RUNNER_EPHEMERAL") != "true" { + return nil + } + + for _, status := range pod.Status.ContainerStatuses { + if status.Name != containerName { + continue + } + + status := status + + return &status + } + + return nil +} + func (r *RunnerReconciler) processRunnerDeletion(runner v1alpha1.Runner, ctx context.Context, log logr.Logger, pod *corev1.Pod) (reconcile.Result, error) { finalizers, removed := removeFinalizer(runner.ObjectMeta.Finalizers, finalizerName) diff --git a/controllers/runner_graceful_stop.go b/controllers/runner_graceful_stop.go index 86de46bbf4..82fa0322d0 100644 --- a/controllers/runner_graceful_stop.go +++ b/controllers/runner_graceful_stop.go @@ -113,9 +113,27 @@ func ensureRunnerUnregistration(ctx context.Context, retryDelay time.Duration, l // Happens e.g. when dind is in runner and run completes log.Info("Runner pod has been stopped with a successful status.") } else if pod != nil && pod.Annotations[AnnotationKeyRunnerCompletionWaitStartTimestamp] != "" { - log.Info("Runner pod is annotated to wait for completion") + ct := ephemeralRunnerContainerStatus(pod) + if ct == nil { + log.Info("Runner pod is annotated to wait for completion, and the runner container is not ephemeral") - return &ctrl.Result{RequeueAfter: retryDelay}, nil + return &ctrl.Result{RequeueAfter: retryDelay}, nil + } + + lts := ct.LastTerminationState.Terminated + if lts == nil { + log.Info("Runner pod is annotated to wait for completion, and the runner container is not restarting") + + return &ctrl.Result{RequeueAfter: retryDelay}, nil + } + + // Prevent runner pod from stucking in Terminating. + // See https://github.com/actions-runner-controller/actions-runner-controller/issues/1369 + log.Info("Deleting runner pod anyway because it has stopped prematurely. This may leave a dangling runner resource in GitHub Actions", + "lastState.exitCode", lts.ExitCode, + "lastState.message", lts.Message, + "pod.phase", pod.Status.Phase, + ) } else if ok, err := unregisterRunner(ctx, ghClient, enterprise, organization, repository, runner, *runnerID); err != nil { if errors.Is(err, &gogithub.RateLimitError{}) { // We log the underlying error when we failed calling GitHub API to list or unregisters,