Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2012069: Refactoring Status controller #498

Merged
Merged
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 116 additions & 84 deletions pkg/controller/status/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,17 @@ func (c *Controller) merge(existing *configv1.ClusterOperator) *configv1.Cluster

// calculate the current controller state
var last time.Time
var reason string
var errorReason string
var errs []string
var uploadErrorReason, uploadErrorMessage, disabledReason, disabledMessage, downloadReason, downloadMessage string
var uploadErrorReason,
uploadErrorMessage,
disabledReason,
disabledMessage,
downloadReason,
downloadMessage string

allReady := true

for i, source := range c.Sources() {
summary, ready := source.CurrentStatus()
if !ready {
Expand Down Expand Up @@ -185,27 +192,28 @@ func (c *Controller) merge(existing *configv1.ClusterOperator) *configv1.Cluster
}

if degradingFailure {
reason = summary.Reason
errorReason = summary.Reason
errs = append(errs, summary.Message)
}

if last.Before(summary.LastTransitionTime) {
last = summary.LastTransitionTime
}
}

// handling errors
var errorMessage string
switch len(errs) {
case 0:
case 1:
if len(reason) == 0 {
reason = "UnknownError"
}
errorMessage = errs[0]
default:
reason = "MultipleFailures"
if len(errs) > 1 {
errorReason = "MultipleFailures"
sort.Strings(errs)
errorMessage = fmt.Sprintf("There are multiple errors blocking progress:\n* %s", strings.Join(errs, "\n* "))
} else {
if len(errorReason) == 0 {
errorReason = "UnknownError"
}
errorMessage = errs[0]
}

// disabled state only when it's disabled by config. It means that gathering will not happen
if !c.configurator.Config().Report {
disabledReason = "Disabled"
Expand All @@ -230,6 +238,35 @@ func (c *Controller) merge(existing *configv1.ClusterOperator) *configv1.Cluster
isInitializing := !allReady && now.Sub(c.controllerStartTime()) < 3*time.Minute

// update the disabled and failing conditions
c.updateDisabledAndFailingConditions(existing, isInitializing, last, disabledReason, disabledMessage, errorReason, errorMessage, uploadErrorReason, uploadErrorMessage, downloadReason, downloadMessage)

// once the operator is running it is always considered available
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorAvailable,
Status: configv1.ConditionTrue,
Reason: "AsExpected",
})

// update the Progressing condition with a summary of the current state
c.updateProcessingConditionWithSummary(existing, isInitializing, last, errorMessage, disabledMessage, errorReason)

if release := os.Getenv("RELEASE_VERSION"); len(release) > 0 {
existing.Status.Versions = []configv1.OperandVersion{
{Name: "operator", Version: release},
}
}

if data, err := json.Marshal(reported); err != nil {
klog.Errorf("Unable to marshal status extension: %v", err)
} else {
existing.Status.Extension.Raw = data
}
return existing
}

func (c *Controller) updateDisabledAndFailingConditions(existing *configv1.ClusterOperator, isInitializing bool,
last time.Time, disabledReason, disabledMessage, errorReason, errorMessage, uploadErrorReason, uploadErrorMessage,
downloadReason, downloadMessage string) {
switch {
case isInitializing:
// the disabled condition is optional, but set it now if we already know we're disabled
Expand All @@ -252,71 +289,15 @@ func (c *Controller) merge(existing *configv1.ClusterOperator) *configv1.Cluster

default:
// once we've initialized set Failing and Disabled as best we know
if len(disabledMessage) > 0 {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: OperatorDisabled,
Status: configv1.ConditionTrue,
Reason: disabledReason,
Message: disabledMessage,
})
} else {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: OperatorDisabled,
Status: configv1.ConditionFalse,
Reason: "AsExpected",
})
}

if len(errorMessage) > 0 {
klog.V(4).Infof("The operator has some internal errors: %s", errorMessage)
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: configv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: last},
Reason: reason,
Message: errorMessage,
})
} else {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: configv1.ConditionFalse,
Reason: "AsExpected",
})
}

if len(uploadErrorReason) > 0 {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: UploadDegraded,
Status: configv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: last},
Reason: uploadErrorReason,
Message: uploadErrorMessage,
})
} else {
removeOperatorStatusCondition(&existing.Status.Conditions, UploadDegraded)
}

if len(downloadReason) > 0 {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: InsightsDownloadDegraded,
Status: configv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: last},
Reason: downloadReason,
Message: downloadMessage,
})
} else {
removeOperatorStatusCondition(&existing.Status.Conditions, InsightsDownloadDegraded)
}
setDisabledOperatorStatusCondition(existing, disabledReason, disabledMessage)
setDegradedOperatorStatusCondition(existing, last, errorReason, errorMessage)
setUploadDegradedOperatorStatusCondition(existing, last, uploadErrorReason, uploadErrorMessage)
setInsightsDownloadDegradedOperatorStatusCondition(existing, last, downloadReason, downloadMessage)
}
}

// once the operator is running it is always considered available
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorAvailable,
Status: configv1.ConditionTrue,
Reason: "AsExpected",
})

// update the Progressing condition with a summary of the current state
func (c *Controller) updateProcessingConditionWithSummary(existing *configv1.ClusterOperator,
rluders marked this conversation as resolved.
Show resolved Hide resolved
isInitializing bool, last time.Time, errorMessage, disabledMessage, errorReason string) {
switch {
case isInitializing:
klog.V(4).Infof("The operator is still being initialized")
Expand Down Expand Up @@ -346,7 +327,7 @@ func (c *Controller) merge(existing *configv1.ClusterOperator) *configv1.Cluster
Type: configv1.OperatorProgressing,
Status: configv1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: last},
Reason: reason,
Reason: errorReason,
Message: disabledMessage,
})

Expand All @@ -359,19 +340,70 @@ func (c *Controller) merge(existing *configv1.ClusterOperator) *configv1.Cluster
Message: "Monitoring the cluster",
})
}
}

if release := os.Getenv("RELEASE_VERSION"); len(release) > 0 {
existing.Status.Versions = []configv1.OperandVersion{
{Name: "operator", Version: release},
}
func setInsightsDownloadDegradedOperatorStatusCondition(existing *configv1.ClusterOperator, last time.Time, downloadReason string, downloadMessage string) {
if len(downloadReason) > 0 {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: InsightsDownloadDegraded,
Status: configv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: last},
Reason: downloadReason,
Message: downloadMessage,
})
} else {
removeOperatorStatusCondition(&existing.Status.Conditions, InsightsDownloadDegraded)
}
}

if data, err := json.Marshal(reported); err != nil {
klog.Errorf("Unable to marshal status extension: %v", err)
func setUploadDegradedOperatorStatusCondition(existing *configv1.ClusterOperator, last time.Time, uploadErrorReason string, uploadErrorMessage string) {
if len(uploadErrorReason) > 0 {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: UploadDegraded,
Status: configv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: last},
Reason: uploadErrorReason,
Message: uploadErrorMessage,
})
} else {
existing.Status.Extension.Raw = data
removeOperatorStatusCondition(&existing.Status.Conditions, UploadDegraded)
}
}

func setDegradedOperatorStatusCondition(existing *configv1.ClusterOperator, last time.Time, errorReason string, errorMessage string,) {
if len(errorMessage) > 0 {
klog.V(4).Infof("The operator has some internal errors: %s", errorMessage)
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: configv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: last},
Reason: errorReason,
Message: errorMessage,
})
} else {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorDegraded,
Status: configv1.ConditionFalse,
Reason: "AsExpected",
})
}
}

func setDisabledOperatorStatusCondition(existing *configv1.ClusterOperator, disabledMessage string, disabledReason string) {
rluders marked this conversation as resolved.
Show resolved Hide resolved
if len(disabledMessage) > 0 {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: OperatorDisabled,
Status: configv1.ConditionTrue,
Reason: disabledReason,
Message: disabledMessage,
})
} else {
setOperatorStatusCondition(&existing.Status.Conditions, configv1.ClusterOperatorStatusCondition{
Type: OperatorDisabled,
Status: configv1.ConditionFalse,
Reason: "AsExpected",
})
}
return existing
}

// Start starts the periodic checking of sources.
Expand Down