Skip to content

Commit

Permalink
Introduce count thresholds for unrecoverable pod events
Browse files Browse the repository at this point in the history
Enable more fine-grained management of unrecoverable pod events during
workspace startup by having different thresholds for how many times an
event can be seen before it is considered fatal.

Signed-off-by: Angel Misevski <amisevsk@redhat.com>
  • Loading branch information
amisevsk committed Mar 17, 2022
1 parent 56d187b commit 21fe7f2
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions pkg/provision/workspace/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,16 @@ var containerFailureStateReasons = []string{
"RunContainerError",
}

var unrecoverablePodEventReasons = []string{
"FailedPostStartHook",
"FailedMount",
"FailedScheduling",
"FailedCreate",
"ReplicaSetCreateError",
// unrecoverablePodEventReasons contains Kubernetes events that should fail workspace startup
// if they occur related to a workspace pod. Events are stored as a map with event names as keys
// and values representing the threshold of how many times we can see an event before it is considered
// unrecoverable.
var unrecoverablePodEventReasons = map[string]int32{
"FailedPostStartHook": 1,
"FailedMount": 3,
"FailedScheduling": 1,
"FailedCreate": 1,
"ReplicaSetCreateError": 1,
}

var unrecoverableDeploymentConditionReasons = []string{
Expand Down Expand Up @@ -473,9 +477,15 @@ func checkPodEvents(pod *corev1.Pod, workspaceID string, clusterAPI sync.Cluster
continue
}

for _, fatalEv := range unrecoverablePodEventReasons {
if ev.Reason == fatalEv && !checkIfUnrecoverableEventIgnored(ev.Reason) {
return fmt.Sprintf("Detected unrecoverable event %s: %s", ev.Reason, ev.Message), nil
if maxCount, isUnrecoverableEvent := unrecoverablePodEventReasons[ev.Reason]; isUnrecoverableEvent {
if !checkIfUnrecoverableEventIgnored(ev.Reason) && ev.Count >= maxCount {
var msg string
if ev.Count > 1 {
msg = fmt.Sprintf("Detected unrecoverable event %s %d times: %s", ev.Reason, ev.Count, ev.Message)
} else {
msg = fmt.Sprintf("Detected unrecoverable event %s: %s", ev.Reason, ev.Message)
}
return msg, nil
}
}
}
Expand Down

0 comments on commit 21fe7f2

Please sign in to comment.