Skip to content

Commit 2a9ed03

Browse files
committed
[RayCluster] Containers not ready status reflects structured reason
Signed-off-by: Spencer Peterson <spencerjp@google.com>
1 parent 3e88985 commit 2a9ed03

File tree

2 files changed

+42
-29
lines changed

2 files changed

+42
-29
lines changed

ray-operator/controllers/ray/utils/util.go

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ const (
3737
ServeName = "serve"
3838
ClusterDomainEnvKey = "CLUSTER_DOMAIN"
3939
DefaultDomainName = "cluster.local"
40+
ContainersNotReady = "ContainersNotReady"
4041
)
4142

4243
// TODO (kevin85421): Define CRDType here rather than constant.go to avoid circular dependency.
@@ -91,16 +92,6 @@ func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition {
9192
headPodReadyCondition.Status = metav1.ConditionStatus(cond.Status)
9293
headPodReadyCondition.Message = cond.Message
9394

94-
// Add details from failed or waiting container statuses if available.
95-
details := containerStatusDetails(headPod)
96-
if details != "" {
97-
if headPodReadyCondition.Message == "" {
98-
headPodReadyCondition.Message = details
99-
} else {
100-
headPodReadyCondition.Message += "; " + details
101-
}
102-
}
103-
10495
// Determine the reason; default to HeadPodRunningAndReady if the headPod is ready but no specific reason is provided
10596
reason := cond.Reason
10697
if cond.Status == corev1.ConditionTrue && reason == "" {
@@ -112,22 +103,34 @@ func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition {
112103
headPodReadyCondition.Reason = reason
113104
}
114105

106+
// If reason is ContainersNotReady, then replace it with an available
107+
// container status that may illuminate why the container is not ready.
108+
if reason == ContainersNotReady {
109+
reason, message, ok := firstNotReadyContainerStatus(headPod)
110+
if ok {
111+
if headPodReadyCondition.Message != "" {
112+
headPodReadyCondition.Message += "; "
113+
}
114+
headPodReadyCondition.Message += message
115+
headPodReadyCondition.Reason = reason
116+
}
117+
}
118+
115119
// Since we're only interested in the PodReady condition, break after processing it
116120
break
117121
}
118122
return headPodReadyCondition
119123
}
120124

121-
func containerStatusDetails(pod *corev1.Pod) string {
122-
var details []string
125+
func firstNotReadyContainerStatus(pod *corev1.Pod) (reason string, message string, ok bool) {
123126
for _, status := range pod.Status.ContainerStatuses {
124127
if status.State.Waiting != nil {
125-
details = append(details, fmt.Sprintf("%s: %s: %s", status.Name, status.State.Waiting.Reason, status.State.Waiting.Message))
128+
return status.State.Waiting.Reason, status.Name + ": " + status.State.Waiting.Message, true
126129
} else if status.State.Terminated != nil {
127-
details = append(details, fmt.Sprintf("%s: %s: %s", status.Name, status.State.Terminated.Reason, status.State.Terminated.Message))
130+
return status.State.Terminated.Reason, status.Name + ": " + status.State.Terminated.Message, true
128131
}
129132
}
130-
return strings.Join(details, ", ")
133+
return "", "", false
131134
}
132135

133136
// FindRayClusterSuspendStatus returns the current suspend status from two conditions:

ray-operator/controllers/ray/utils/util_test.go

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ func createRayHeadPodWithPhaseAndCondition(phase corev1.PodPhase, status corev1.
347347
{
348348
Type: corev1.PodReady,
349349
Status: status,
350+
Reason: ContainersNotReady,
350351
},
351352
},
352353
},
@@ -979,16 +980,19 @@ func TestFindHeadPodReadyCondition(t *testing.T) {
979980

980981
func TestFindHeadPodReadyMessage(t *testing.T) {
981982
tests := []struct {
982-
name string
983-
message string
984-
expectedMessage string
985-
status []corev1.ContainerStatus
983+
name string
984+
message string
985+
wantMessage string
986+
wantReason string
987+
status []corev1.ContainerStatus
986988
}{{
987-
name: "no message no status want nothing",
989+
name: "no message no status want original reason",
990+
wantReason: ContainersNotReady,
988991
}, {
989-
name: "only reason",
990-
message: "TooEarlyInTheMorning",
991-
expectedMessage: "TooEarlyInTheMorning",
992+
name: "no container status want original reason",
993+
message: "TooEarlyInTheMorning",
994+
wantMessage: "TooEarlyInTheMorning",
995+
wantReason: ContainersNotReady,
992996
}, {
993997
name: "one reason one status",
994998
message: "containers not ready",
@@ -1001,9 +1005,10 @@ func TestFindHeadPodReadyMessage(t *testing.T) {
10011005
},
10021006
},
10031007
}},
1004-
expectedMessage: `containers not ready; ray: ImagePullBackOff: Back-off pulling image royproject/roy:latest: ErrImagePull: rpc error: code = NotFound`,
1008+
wantReason: "ImagePullBackOff",
1009+
wantMessage: `containers not ready; ray: Back-off pulling image royproject/roy:latest: ErrImagePull: rpc error: code = NotFound`,
10051010
}, {
1006-
name: "one reason two statuses",
1011+
name: "one reason two statuses only copy first",
10071012
message: "aesthetic problems",
10081013
status: []corev1.ContainerStatus{{
10091014
Name: "indigo",
@@ -1022,7 +1027,8 @@ func TestFindHeadPodReadyMessage(t *testing.T) {
10221027
},
10231028
},
10241029
}},
1025-
expectedMessage: "aesthetic problems; indigo: BadColor: too blue, circle: BadGeometry: too round",
1030+
wantReason: "BadColor",
1031+
wantMessage: "aesthetic problems; indigo: too blue",
10261032
}, {
10271033
name: "no reason one status",
10281034
status: []corev1.ContainerStatus{{
@@ -1034,7 +1040,8 @@ func TestFindHeadPodReadyMessage(t *testing.T) {
10341040
},
10351041
},
10361042
}},
1037-
expectedMessage: "my-image: Crashed: bash not found",
1043+
wantReason: "Crashed",
1044+
wantMessage: "my-image: bash not found",
10381045
}}
10391046

10401047
for _, tc := range tests {
@@ -1043,8 +1050,11 @@ func TestFindHeadPodReadyMessage(t *testing.T) {
10431050
pod.Status.Conditions[0].Message = tc.message
10441051
pod.Status.ContainerStatuses = tc.status
10451052
cond := FindHeadPodReadyCondition(pod)
1046-
if cond.Message != tc.expectedMessage {
1047-
t.Errorf("FindHeadPodReadyCondition(...) returned condition with message %q, but wanted %q", cond.Message, tc.expectedMessage)
1053+
if cond.Message != tc.wantMessage {
1054+
t.Errorf("FindHeadPodReadyCondition(...) returned condition with message %q, but wanted %q", cond.Message, tc.wantMessage)
1055+
}
1056+
if cond.Reason != tc.wantReason {
1057+
t.Errorf("FindHeadPodReadyCondition(...) returned condition with reason %q, but wanted %q", cond.Reason, tc.wantReason)
10481058
}
10491059
})
10501060
}

0 commit comments

Comments
 (0)