Skip to content

Commit

Permalink
Improved Health Check Status logic
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
  • Loading branch information
dvaldivia committed Sep 1, 2021
1 parent e845cd0 commit 0772e11
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
15 changes: 12 additions & 3 deletions pkg/controller/cluster/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ const (
HealthUnavailableMessage = "Service Unavailable"
// HealthHealingMessage means MinIO is healing one of more drives
HealthHealingMessage = "Healing"
// HealthReduceAvailabilityMessage some drives are offline
HealthReduceAvailabilityMessage = "Reduced Availability"
// HealthAboutToLoseQuorumMessage means we are close to losing write capabilities
HealthAboutToLoseQuorumMessage = "About to lose quorum"
)
Expand Down Expand Up @@ -163,7 +165,7 @@ func (c *Controller) updateHealthStatusForTenant(tenant *miniov2.Tenant) error {
allPodsRunning = false
}
}
if !allPodsRunning {
if !allPodsRunning && tenant.Status.HealthStatus != miniov2.HealthStatusRed {
tenant.Status.HealthStatus = miniov2.HealthStatusYellow
}

Expand Down Expand Up @@ -197,14 +199,21 @@ func (c *Controller) updateHealthStatusForTenant(tenant *miniov2.Tenant) error {

if tenant.Status.DrivesOffline > 0 || tenant.Status.DrivesHealing > 0 {
tenant.Status.HealthStatus = miniov2.HealthStatusYellow
if tenant.Status.DrivesHealing > 0 {
tenant.Status.HealthMessage = HealthHealingMessage
} else {
tenant.Status.HealthMessage = HealthReduceAvailabilityMessage
}
}
if tenant.Status.DrivesOnline < tenant.Status.WriteQuorum {
tenant.Status.HealthStatus = miniov2.HealthStatusRed
tenant.Status.HealthMessage = HealthUnavailableMessage
}

// only if no disks are offline, we are green
if tenant.Status.DrivesOffline == 0 {
// only if no disks are offline and we are not healing, we are green
if tenant.Status.DrivesOffline == 0 && tenant.Status.DrivesHealing == 0 {
tenant.Status.HealthStatus = miniov2.HealthStatusGreen
tenant.Status.HealthMessage = ""
}

if _, err = c.updatePoolStatus(context.Background(), tenant); err != nil {
Expand Down
24 changes: 24 additions & 0 deletions pkg/controller/cluster/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package cluster

import (
"context"
"runtime"

miniov2 "github.com/minio/operator/pkg/apis/minio.min.io/v2"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -130,6 +131,29 @@ func (c *Controller) updatePoolStatusWithRetry(ctx context.Context, tenant *mini
return t, nil
}

func getFrame(skipFrames int) runtime.Frame {
// We need the frame at index skipFrames+2, since we never want runtime.Callers and getFrame
targetFrameIndex := skipFrames + 2

// Set size to targetFrameIndex+2 to ensure we have room for one more caller than we need
programCounters := make([]uintptr, targetFrameIndex+2)
n := runtime.Callers(0, programCounters)

frame := runtime.Frame{Function: "unknown"}
if n > 0 {
frames := runtime.CallersFrames(programCounters[:n])
for more, frameIndex := true, 0; more && frameIndex <= targetFrameIndex; frameIndex++ {
var frameCandidate runtime.Frame
frameCandidate, more = frames.Next()
if frameIndex == targetFrameIndex {
frame = frameCandidate
}
}
}

return frame
}

func (c *Controller) updateCertificatesStatus(ctx context.Context, tenant *miniov2.Tenant, autoCertEnabled bool) (*miniov2.Tenant, error) {
return c.updateCertificatesWithRetry(ctx, tenant, autoCertEnabled, true)
}
Expand Down

0 comments on commit 0772e11

Please sign in to comment.