Skip to content

Commit

Permalink
Merge pull request grafana#269 from xperimental/status-ready-5.8
Browse files Browse the repository at this point in the history
LOG-5171: Extend status to show difference between running and ready
  • Loading branch information
openshift-merge-bot[bot] authored Mar 6, 2024
2 parents 5c85836 + 5664436 commit 62b51ec
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 88 deletions.
4 changes: 4 additions & 0 deletions operator/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## Main

## Release 5.8.5

- [11968](https://github.com/grafana/loki/pull/11968) **xperimental**: Extend status to show difference between running and ready

## Release 5.8.4

- [11824](https://github.com/grafana/loki/pull/11824) **xperimental**: Improve messages for errors in storage secret
Expand Down
22 changes: 21 additions & 1 deletion operator/apis/loki/v1/lokistack_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1006,8 +1006,28 @@ const (
ReasonZoneAwareEmptyLabel LokiStackConditionReason = "ReasonZoneAwareEmptyLabel"
)

// PodStatus is a short description of the status a Pod can be in.
type PodStatus string

const (
// PodPending means the pod has been accepted by the system, but one or more of the containers
// has not been started. This includes time before being bound to a node, as well as time spent
// pulling images onto the host.
PodPending PodStatus = "Pending"
// PodRunning means the pod has been bound to a node and all of the containers have been started.
// At least one container is still running or is in the process of being restarted.
PodRunning PodStatus = "Running"
// PodReady means the pod has been started and the readiness probe reports a successful status.
PodReady PodStatus = "Ready"
// PodFailed means that all containers in the pod have terminated, and at least one container has
// terminated in a failure (exited with a non-zero exit code or was stopped by the system).
PodFailed PodStatus = "Failed"
// PodStatusUnknown is used when none of the other statuses apply or the information is not ready yet.
PodStatusUnknown PodStatus = "Unknown"
)

// PodStatusMap defines the type for mapping pod status to pod name.
type PodStatusMap map[corev1.PodPhase][]string
type PodStatusMap map[PodStatus][]string

// LokiStackComponentStatus defines the map of per pod status per LokiStack component.
// Each component is represented by a separate map of v1.Phase to a list of pods.
Expand Down
56 changes: 40 additions & 16 deletions operator/apis/loki/v1beta1/lokistack_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -829,21 +829,45 @@ func init() {
SchemeBuilder.Register(&LokiStack{}, &LokiStackList{})
}

func convertStatusV1(src PodStatusMap) v1.PodStatusMap {
if src == nil {
return nil
}

dst := v1.PodStatusMap{}
for k, v := range src {
dst[v1.PodStatus(k)] = v
}
return dst
}

func convertStatusBeta(src v1.PodStatusMap) PodStatusMap {
if src == nil {
return nil
}

dst := PodStatusMap{}
for k, v := range src {
dst[corev1.PodPhase(k)] = v
}
return dst
}

// ConvertTo converts this LokiStack (v1beta1) to the Hub version (v1).
func (src *LokiStack) ConvertTo(dstRaw conversion.Hub) error {
dst := dstRaw.(*v1.LokiStack)

dst.ObjectMeta = src.ObjectMeta
dst.Status.Conditions = src.Status.Conditions
dst.Status.Components = v1.LokiStackComponentStatus{
Compactor: v1.PodStatusMap(src.Status.Components.Compactor),
Distributor: v1.PodStatusMap(src.Status.Components.Distributor),
Ingester: v1.PodStatusMap(src.Status.Components.Ingester),
Querier: v1.PodStatusMap(src.Status.Components.Querier),
QueryFrontend: v1.PodStatusMap(src.Status.Components.QueryFrontend),
IndexGateway: v1.PodStatusMap(src.Status.Components.IndexGateway),
Ruler: v1.PodStatusMap(src.Status.Components.Ruler),
Gateway: v1.PodStatusMap(src.Status.Components.Gateway),
Compactor: convertStatusV1(src.Status.Components.Compactor),
Distributor: convertStatusV1(src.Status.Components.Distributor),
Ingester: convertStatusV1(src.Status.Components.Ingester),
Querier: convertStatusV1(src.Status.Components.Querier),
QueryFrontend: convertStatusV1(src.Status.Components.QueryFrontend),
IndexGateway: convertStatusV1(src.Status.Components.IndexGateway),
Ruler: convertStatusV1(src.Status.Components.Ruler),
Gateway: convertStatusV1(src.Status.Components.Gateway),
}

var statusSchemas []v1.ObjectStorageSchema
Expand Down Expand Up @@ -1104,14 +1128,14 @@ func (dst *LokiStack) ConvertFrom(srcRaw conversion.Hub) error {
dst.ObjectMeta = src.ObjectMeta
dst.Status.Conditions = src.Status.Conditions
dst.Status.Components = LokiStackComponentStatus{
Compactor: PodStatusMap(src.Status.Components.Compactor),
Distributor: PodStatusMap(src.Status.Components.Distributor),
Ingester: PodStatusMap(src.Status.Components.Ingester),
Querier: PodStatusMap(src.Status.Components.Querier),
QueryFrontend: PodStatusMap(src.Status.Components.QueryFrontend),
IndexGateway: PodStatusMap(src.Status.Components.IndexGateway),
Ruler: PodStatusMap(src.Status.Components.Ruler),
Gateway: PodStatusMap(src.Status.Components.Gateway),
Compactor: convertStatusBeta(src.Status.Components.Compactor),
Distributor: convertStatusBeta(src.Status.Components.Distributor),
Ingester: convertStatusBeta(src.Status.Components.Ingester),
Querier: convertStatusBeta(src.Status.Components.Querier),
QueryFrontend: convertStatusBeta(src.Status.Components.QueryFrontend),
IndexGateway: convertStatusBeta(src.Status.Components.IndexGateway),
Ruler: convertStatusBeta(src.Status.Components.Ruler),
Gateway: convertStatusBeta(src.Status.Components.Gateway),
}

var statusSchemas []ObjectStorageSchema
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ metadata:
categories: OpenShift Optional, Logging & Tracing
certified: "false"
containerImage: docker.io/grafana/loki-operator:0.4.0
createdAt: "2024-01-26T13:17:45Z"
createdAt: "2024-03-04T17:34:37Z"
description: The Community Loki Operator provides Kubernetes native deployment
and management of Loki and related logging components.
features.operators.openshift.io/disconnected: "true"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ metadata:
categories: OpenShift Optional, Logging & Tracing
certified: "false"
containerImage: docker.io/grafana/loki-operator:0.4.0
createdAt: "2024-01-26T13:17:43Z"
createdAt: "2024-03-04T17:34:34Z"
description: The Community Loki Operator provides Kubernetes native deployment
and management of Loki and related logging components.
operators.operatorframework.io/builder: operator-sdk-unknown
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ metadata:
categories: OpenShift Optional, Logging & Tracing
certified: "false"
containerImage: quay.io/openshift-logging/loki-operator:0.1.0
createdAt: "2024-01-26T13:17:48Z"
createdAt: "2024-03-04T17:34:40Z"
description: |
The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging.
## Prerequisites and Requirements
Expand Down
36 changes: 35 additions & 1 deletion operator/docs/operator/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -2726,8 +2726,42 @@ Setting this to an empty array disables admin groups.</p>
</tr></tbody>
</table>

## PodStatus { #loki-grafana-com-v1-PodStatus }
(<code>string</code> alias)
<div>
<p>PodStatus is a short description of the status a Pod can be in.</p>
</div>
<table>
<thead>
<tr>
<th>Value</th>
<th>Description</th>
</tr>
</thead>
<tbody><tr><td><p>&#34;Failed&#34;</p></td>
<td><p>PodFailed means that all containers in the pod have terminated, and at least one container has
terminated in a failure (exited with a non-zero exit code or was stopped by the system).</p>
</td>
</tr><tr><td><p>&#34;Pending&#34;</p></td>
<td><p>PodPending means the pod has been accepted by the system, but one or more of the containers
has not been started. This includes time before being bound to a node, as well as time spent
pulling images onto the host.</p>
</td>
</tr><tr><td><p>&#34;Ready&#34;</p></td>
<td><p>PodReady means the pod has been started and the readiness probe reports a successful status.</p>
</td>
</tr><tr><td><p>&#34;Running&#34;</p></td>
<td><p>PodRunning means the pod has been bound to a node and all of the containers have been started.
At least one container is still running or is in the process of being restarted.</p>
</td>
</tr><tr><td><p>&#34;Unknown&#34;</p></td>
<td><p>PodStatusUnknown is used when none of the other statuses apply or the information is not ready yet.</p>
</td>
</tr></tbody>
</table>

## PodStatusMap { #loki-grafana-com-v1-PodStatusMap }
(<code>map[k8s.io/api/core/v1.PodPhase][]string</code> alias)
(<code>map[github.com/grafana/loki/operator/apis/loki/v1.PodStatus][]string</code> alias)
<p>
(<em>Appears on:</em><a href="#loki-grafana-com-v1-LokiStackComponentStatus">LokiStackComponentStatus</a>)
</p>
Expand Down
25 changes: 23 additions & 2 deletions operator/internal/status/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,29 @@ func appendPodStatus(ctx context.Context, k k8s.Client, component, stack, ns str
return nil, kverrors.Wrap(err, "failed to list pods for LokiStack component", "name", stack, "component", component)
}
for _, pod := range pods.Items {
phase := pod.Status.Phase
psm[phase] = append(psm[phase], pod.Name)
status := podStatus(&pod)
psm[status] = append(psm[status], pod.Name)
}
return psm, nil
}

func podStatus(pod *corev1.Pod) lokiv1.PodStatus {
status := pod.Status
switch status.Phase {
case corev1.PodFailed:
return lokiv1.PodFailed
case corev1.PodPending:
return lokiv1.PodPending
case corev1.PodRunning:
default:
return lokiv1.PodStatusUnknown
}

for _, c := range status.ContainerStatuses {
if !c.Ready {
return lokiv1.PodRunning
}
}

return lokiv1.PodReady
}
55 changes: 30 additions & 25 deletions operator/internal/status/components_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
"github.com/grafana/loki/operator/internal/manifests"
)

func createPodList(baseName string, phases ...corev1.PodPhase) *corev1.PodList {
func createPodList(baseName string, ready bool, phases ...corev1.PodPhase) *corev1.PodList {
items := []corev1.Pod{}
for i, p := range phases {
items = append(items, corev1.Pod{
Expand All @@ -24,6 +24,11 @@ func createPodList(baseName string, phases ...corev1.PodPhase) *corev1.PodList {
},
Status: corev1.PodStatus{
Phase: p,
ContainerStatuses: []corev1.ContainerStatus{
{
Ready: ready,
},
},
},
})
}
Expand Down Expand Up @@ -78,37 +83,37 @@ func TestGenerateComponentStatus(t *testing.T) {
manifests.LabelGatewayComponent: {},
},
wantComponentStatus: &lokiv1.LokiStackComponentStatus{
Compactor: map[corev1.PodPhase][]string{},
Distributor: map[corev1.PodPhase][]string{},
IndexGateway: map[corev1.PodPhase][]string{},
Ingester: map[corev1.PodPhase][]string{},
Querier: map[corev1.PodPhase][]string{},
QueryFrontend: map[corev1.PodPhase][]string{},
Gateway: map[corev1.PodPhase][]string{},
Ruler: map[corev1.PodPhase][]string{},
Compactor: lokiv1.PodStatusMap{},
Distributor: lokiv1.PodStatusMap{},
IndexGateway: lokiv1.PodStatusMap{},
Ingester: lokiv1.PodStatusMap{},
Querier: lokiv1.PodStatusMap{},
QueryFrontend: lokiv1.PodStatusMap{},
Gateway: lokiv1.PodStatusMap{},
Ruler: lokiv1.PodStatusMap{},
},
},
{
desc: "all one pod running",
componentPods: map[string]*corev1.PodList{
manifests.LabelCompactorComponent: createPodList(manifests.LabelCompactorComponent, corev1.PodRunning),
manifests.LabelDistributorComponent: createPodList(manifests.LabelDistributorComponent, corev1.PodRunning),
manifests.LabelIngesterComponent: createPodList(manifests.LabelIngesterComponent, corev1.PodRunning),
manifests.LabelQuerierComponent: createPodList(manifests.LabelQuerierComponent, corev1.PodRunning),
manifests.LabelQueryFrontendComponent: createPodList(manifests.LabelQueryFrontendComponent, corev1.PodRunning),
manifests.LabelIndexGatewayComponent: createPodList(manifests.LabelIndexGatewayComponent, corev1.PodRunning),
manifests.LabelRulerComponent: createPodList(manifests.LabelRulerComponent, corev1.PodRunning),
manifests.LabelGatewayComponent: createPodList(manifests.LabelGatewayComponent, corev1.PodRunning),
manifests.LabelCompactorComponent: createPodList(manifests.LabelCompactorComponent, false, corev1.PodRunning),
manifests.LabelDistributorComponent: createPodList(manifests.LabelDistributorComponent, false, corev1.PodRunning),
manifests.LabelIngesterComponent: createPodList(manifests.LabelIngesterComponent, false, corev1.PodRunning),
manifests.LabelQuerierComponent: createPodList(manifests.LabelQuerierComponent, false, corev1.PodRunning),
manifests.LabelQueryFrontendComponent: createPodList(manifests.LabelQueryFrontendComponent, false, corev1.PodRunning),
manifests.LabelIndexGatewayComponent: createPodList(manifests.LabelIndexGatewayComponent, false, corev1.PodRunning),
manifests.LabelRulerComponent: createPodList(manifests.LabelRulerComponent, false, corev1.PodRunning),
manifests.LabelGatewayComponent: createPodList(manifests.LabelGatewayComponent, false, corev1.PodRunning),
},
wantComponentStatus: &lokiv1.LokiStackComponentStatus{
Compactor: map[corev1.PodPhase][]string{corev1.PodRunning: {"compactor-pod-0"}},
Distributor: map[corev1.PodPhase][]string{corev1.PodRunning: {"distributor-pod-0"}},
IndexGateway: map[corev1.PodPhase][]string{corev1.PodRunning: {"index-gateway-pod-0"}},
Ingester: map[corev1.PodPhase][]string{corev1.PodRunning: {"ingester-pod-0"}},
Querier: map[corev1.PodPhase][]string{corev1.PodRunning: {"querier-pod-0"}},
QueryFrontend: map[corev1.PodPhase][]string{corev1.PodRunning: {"query-frontend-pod-0"}},
Gateway: map[corev1.PodPhase][]string{corev1.PodRunning: {"lokistack-gateway-pod-0"}},
Ruler: map[corev1.PodPhase][]string{corev1.PodRunning: {"ruler-pod-0"}},
Compactor: lokiv1.PodStatusMap{lokiv1.PodRunning: {"compactor-pod-0"}},
Distributor: lokiv1.PodStatusMap{lokiv1.PodRunning: {"distributor-pod-0"}},
IndexGateway: lokiv1.PodStatusMap{lokiv1.PodRunning: {"index-gateway-pod-0"}},
Ingester: lokiv1.PodStatusMap{lokiv1.PodRunning: {"ingester-pod-0"}},
Querier: lokiv1.PodStatusMap{lokiv1.PodRunning: {"querier-pod-0"}},
QueryFrontend: lokiv1.PodStatusMap{lokiv1.PodRunning: {"query-frontend-pod-0"}},
Gateway: lokiv1.PodStatusMap{lokiv1.PodRunning: {"lokistack-gateway-pod-0"}},
Ruler: lokiv1.PodStatusMap{lokiv1.PodRunning: {"ruler-pod-0"}},
},
},
}
Expand Down
56 changes: 38 additions & 18 deletions operator/internal/status/lokistack.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ import (

const (
messageReady = "All components ready"
messageFailed = "Some LokiStack components failed"
messagePending = "Some LokiStack components pending on dependencies"
messageFailed = "One or more LokiStack components failed"
messagePending = "One or more LokiStack components pending on dependencies"
messageRunning = "All components are running, but some readiness checks are failing"
messageDegradedMissingNodes = "Cluster contains no nodes matching the labels used for zone-awareness"
messageDegradedEmptyNodeLabel = "No value for the labels used for zone-awareness"
)
Expand All @@ -35,6 +36,11 @@ var (
Message: messagePending,
Reason: string(lokiv1.ReasonPendingComponents),
}
conditionRunning = metav1.Condition{
Type: string(lokiv1.ConditionPending),
Message: messageRunning,
Reason: string(lokiv1.ReasonPendingComponents),
}
conditionReady = metav1.Condition{
Type: string(lokiv1.ConditionReady),
Message: messageReady,
Expand Down Expand Up @@ -76,28 +82,28 @@ func SetDegradedCondition(ctx context.Context, k k8s.Client, req ctrl.Request, m

func generateCondition(ctx context.Context, cs *lokiv1.LokiStackComponentStatus, k k8s.Client, req ctrl.Request, stack *lokiv1.LokiStack) (metav1.Condition, error) {
// Check for failed pods first
failed := len(cs.Compactor[corev1.PodFailed]) +
len(cs.Distributor[corev1.PodFailed]) +
len(cs.Ingester[corev1.PodFailed]) +
len(cs.Querier[corev1.PodFailed]) +
len(cs.QueryFrontend[corev1.PodFailed]) +
len(cs.Gateway[corev1.PodFailed]) +
len(cs.IndexGateway[corev1.PodFailed]) +
len(cs.Ruler[corev1.PodFailed])
failed := len(cs.Compactor[lokiv1.PodFailed]) +
len(cs.Distributor[lokiv1.PodFailed]) +
len(cs.Ingester[lokiv1.PodFailed]) +
len(cs.Querier[lokiv1.PodFailed]) +
len(cs.QueryFrontend[lokiv1.PodFailed]) +
len(cs.Gateway[lokiv1.PodFailed]) +
len(cs.IndexGateway[lokiv1.PodFailed]) +
len(cs.Ruler[lokiv1.PodFailed])

if failed != 0 {
return conditionFailed, nil
}

// Check for pending pods
pending := len(cs.Compactor[corev1.PodPending]) +
len(cs.Distributor[corev1.PodPending]) +
len(cs.Ingester[corev1.PodPending]) +
len(cs.Querier[corev1.PodPending]) +
len(cs.QueryFrontend[corev1.PodPending]) +
len(cs.Gateway[corev1.PodPending]) +
len(cs.IndexGateway[corev1.PodPending]) +
len(cs.Ruler[corev1.PodPending])
pending := len(cs.Compactor[lokiv1.PodPending]) +
len(cs.Distributor[lokiv1.PodPending]) +
len(cs.Ingester[lokiv1.PodPending]) +
len(cs.Querier[lokiv1.PodPending]) +
len(cs.QueryFrontend[lokiv1.PodPending]) +
len(cs.Gateway[lokiv1.PodPending]) +
len(cs.IndexGateway[lokiv1.PodPending]) +
len(cs.Ruler[lokiv1.PodPending])

if pending != 0 {
if stack.Spec.Replication != nil && len(stack.Spec.Replication.Zones) > 0 {
Expand All @@ -120,6 +126,20 @@ func generateCondition(ctx context.Context, cs *lokiv1.LokiStackComponentStatus,
return conditionPending, nil
}

// Check if there are pods that are running but not ready
running := len(cs.Compactor[lokiv1.PodRunning]) +
len(cs.Distributor[lokiv1.PodRunning]) +
len(cs.Ingester[lokiv1.PodRunning]) +
len(cs.Querier[lokiv1.PodRunning]) +
len(cs.QueryFrontend[lokiv1.PodRunning]) +
len(cs.Gateway[lokiv1.PodRunning]) +
len(cs.IndexGateway[lokiv1.PodRunning]) +
len(cs.Ruler[lokiv1.PodRunning])

if running > 0 {
return conditionRunning, nil
}

return conditionReady, nil
}

Expand Down
Loading

0 comments on commit 62b51ec

Please sign in to comment.