Skip to content

Commit

Permalink
fix(openshift): block upgrades on invalid max properties (#2302)
Browse files Browse the repository at this point in the history
Block OpenShift upgrades while:

- olm.maxOpenShiftVersion properties have invalid values
- cluster information is unavailable; e.g. the desired version of the cluster is undefined
- an installed operator has declared more than one olm.MaxOpenShiftVersion
  property

See https://bugzilla.redhat.com/show_bug.cgi?id=1986753 for motivation.

Signed-off-by: Nick Hale <njohnhale@gmail.com>
  • Loading branch information
njhale committed Aug 2, 2021
1 parent 69bccf3 commit 734c6d0
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 133 deletions.
31 changes: 20 additions & 11 deletions pkg/controller/operators/openshift/clusteroperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ func (r *ClusterOperatorReconciler) setDegraded(_ context.Context, co *ClusterOp
}

const (
IncompatibleOperatorsInstalled = "IncompatibleOperatorsInstalled"
IncompatibleOperatorsInstalled = "IncompatibleOperatorsInstalled"
ErrorCheckingOperatorCompatibility = "ErrorCheckingOperatorCompatibility"
)

func (r *ClusterOperatorReconciler) setUpgradeable(ctx context.Context, co *ClusterOperator) error {
Expand All @@ -221,31 +222,39 @@ func (r *ClusterOperatorReconciler) setUpgradeable(ctx context.Context, co *Clus

// Set upgradeable=false if (either/or):
// 1. OLM currently upgrading (takes priorty in the status message)
// 2. Operators currently installed that are incompatible with the next OCP minor version
// 2. Operators currently installed that are incompatible with the next minor version of OpenShift
// 3. An error occurs while determining 2
var err error
if r.syncTracker.SuccessfulSyncs() < 1 || !versionsMatch(co.Status.Versions, r.TargetVersions) {
// OLM is still upgrading
desired.Status = configv1.ConditionFalse
desired.Message = "Waiting for updates to take effect"
} else {
incompatible, err := incompatibleOperators(ctx, r.Client)
var incompatible skews
incompatible, err = incompatibleOperators(ctx, r.Client)
if err != nil {
return err
}

if len(incompatible) > 0 {
// Some incompatible operator is installed
// "Fail closed" when we can't determine compatibility
// Note: Unspecified compatibility = universal compatibility; i.e. operators that don't specify a "maxOpenShiftVersion" property are compatible with everything.
desired.Status = configv1.ConditionFalse
desired.Reason = ErrorCheckingOperatorCompatibility
desired.Message = fmt.Sprintf("Encountered errors while checking compatibility with the next minor version of OpenShift: %s", err)
} else if len(incompatible) > 0 {
// Operators are installed that have incompatible and/or invalid max versions
desired.Status = configv1.ConditionFalse
desired.Reason = IncompatibleOperatorsInstalled
desired.Message = incompatible.String() // TODO: Truncate message to field length
desired.Message = incompatible.String()
}
}

// Only return transient errors likely resolved by retrying immediately
err = transientErrors(err)

current := co.GetCondition(configv1.OperatorUpgradeable)
if conditionsEqual(current, desired) { // Comparison ignores lastUpdated
return nil
return err
}

co.SetCondition(desired)

return nil
return err
}
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,35 @@ var _ = Describe("ClusterOperator controller", func() {
LastTransitionTime: fixedNow(),
}))

By("setting upgradeable=false when incompatible operators exist")
By("setting upgradeable=false when there's an error determining compatibility")
cv.Status = configv1.ClusterVersionStatus{}

Eventually(func() error {
return k8sClient.Status().Update(ctx, cv)
}).Should(Succeed())

Eventually(func() ([]configv1.ClusterOperatorStatusCondition, error) {
err := k8sClient.Get(ctx, clusterOperatorName, co)
return co.Status.Conditions, err
}, timeout).Should(ContainElement(configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorUpgradeable,
Status: configv1.ConditionFalse,
Reason: ErrorCheckingOperatorCompatibility,
Message: "Encountered errors while checking compatibility with the next minor version of OpenShift: Desired release version missing from ClusterVersion",
LastTransitionTime: fixedNow(),
}))

cv.Status = configv1.ClusterVersionStatus{
Desired: configv1.Update{
Version: clusterVersion,
},
}

Eventually(func() error {
return k8sClient.Status().Update(ctx, cv)
}).Should(Succeed())

By("setting upgradeable=false when incompatible operators exist")
ns := &corev1.Namespace{}
ns.SetName("nostromo")

Expand All @@ -160,12 +187,15 @@ var _ = Describe("ClusterOperator controller", func() {
incompatible.SetName("xenomorph")
incompatible.SetNamespace(ns.GetName())

withMax := func(version string) map[string]string {
maxProperty := &api.Property{
Type: MaxOpenShiftVersionProperty,
Value: version,
withMax := func(versions ...string) map[string]string {
var properties []*api.Property
for _, v := range versions {
properties = append(properties, &api.Property{
Type: MaxOpenShiftVersionProperty,
Value: v,
})
}
value, err := projection.PropertiesAnnotationFromPropertyList([]*api.Property{maxProperty})
value, err := projection.PropertiesAnnotationFromPropertyList(properties)
Expect(err).ToNot(HaveOccurred())

return map[string]string{
Expand Down Expand Up @@ -245,5 +275,54 @@ var _ = Describe("ClusterOperator controller", func() {
}.String(),
LastTransitionTime: fixedNow(),
}))

By("setting upgradeable=false when invalid max versions are found")
incompatible.SetAnnotations(withMax(`"garbage"`))

Eventually(func() error {
return k8sClient.Update(ctx, incompatible)
}).Should(Succeed())

_, parseErr := semver.ParseTolerant("garbage")
Eventually(func() ([]configv1.ClusterOperatorStatusCondition, error) {
err := k8sClient.Get(ctx, clusterOperatorName, co)
return co.Status.Conditions, err
}, timeout).Should(ContainElement(configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorUpgradeable,
Status: configv1.ConditionFalse,
Reason: IncompatibleOperatorsInstalled,
Message: skews{
{
namespace: ns.GetName(),
name: incompatible.GetName(),
err: fmt.Errorf(`Failed to parse "garbage" as semver: %w`, parseErr),
},
}.String(),
LastTransitionTime: fixedNow(),
}))

By("setting upgradeable=false when more than one max version property is defined")
incompatible.SetAnnotations(withMax(fmt.Sprintf(`"%s"`, clusterVersion), fmt.Sprintf(`"%s"`, next.String())))

Eventually(func() error {
return k8sClient.Update(ctx, incompatible)
}).Should(Succeed())

Eventually(func() ([]configv1.ClusterOperatorStatusCondition, error) {
err := k8sClient.Get(ctx, clusterOperatorName, co)
return co.Status.Conditions, err
}, timeout).Should(ContainElement(configv1.ClusterOperatorStatusCondition{
Type: configv1.OperatorUpgradeable,
Status: configv1.ConditionFalse,
Reason: IncompatibleOperatorsInstalled,
Message: skews{
{
namespace: ns.GetName(),
name: incompatible.GetName(),
err: fmt.Errorf(`Defining more than one "%s" property is not allowed`, MaxOpenShiftVersionProperty),
},
}.String(),
LastTransitionTime: fixedNow(),
}))
})
})
120 changes: 63 additions & 57 deletions pkg/controller/operators/openshift/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package openshift

import (
"context"
"errors"
"fmt"
"strings"

Expand Down Expand Up @@ -80,22 +81,47 @@ func versionsMatch(a []configv1.OperandVersion, b []configv1.OperandVersion) boo
type skews []skew

func (s skews) String() string {
var msg []string
msg := make([]string, len(s))
i, j := 0, len(s)-1
for _, sk := range s {
msg = append(msg, sk.String())
m := sk.String()
// Partial order: error skews first
if sk.err != nil {
msg[i] = m
i++
continue
}
msg[j] = m
j--
}

return "The following operators block OpenShift upgrades: " + strings.Join(msg, ",")
return "ClusterServiceVersions blocking cluster upgrade: " + strings.Join(msg, ",")
}

type skew struct {
namespace string
name string
maxOpenShiftVersion string
err error
}

func (s skew) String() string {
return fmt.Sprintf("Operator %s in namespace %s is not compatible with OpenShift versions greater than %s", s.name, s.namespace, s.maxOpenShiftVersion)
if s.err != nil {
return fmt.Sprintf("%s/%s has invalid %s properties: %s", s.namespace, s.name, MaxOpenShiftVersionProperty, s.err)
}

return fmt.Sprintf("%s/%s is incompatible with OpenShift versions greater than %s", s.namespace, s.name, s.maxOpenShiftVersion)
}

type transientError struct {
error
}

// transientErrors returns the result of stripping all wrapped errors not of type transientError from the given error.
func transientErrors(err error) error {
return utilerrors.FilterOut(err, func(e error) bool {
return !errors.As(e, new(transientError))
})
}

func incompatibleOperators(ctx context.Context, cli client.Client) (skews, error) {
Expand All @@ -105,58 +131,59 @@ func incompatibleOperators(ctx context.Context, cli client.Client) (skews, error
}

if next == nil {
return nil, nil
// Note: This shouldn't happen
return nil, fmt.Errorf("Failed to determine next OpenShift Y-stream release")
}
next.Minor++

csvList := &operatorsv1alpha1.ClusterServiceVersionList{}
if err := cli.List(ctx, csvList); err != nil {
return nil, err
return nil, &transientError{fmt.Errorf("Failed to list ClusterServiceVersions: %w", err)}
}

var (
s skews
errs []error
)
var incompatible skews
for _, csv := range csvList.Items {
if csv.IsCopied() {
continue
}

s := skew{
name: csv.GetName(),
namespace: csv.GetNamespace(),
}
max, err := maxOpenShiftVersion(&csv)
if err != nil {
errs = append(errs, err)
s.err = err
incompatible = append(incompatible, s)
continue
}

if max == nil || max.GTE(*next) {
continue
}
s.maxOpenShiftVersion = max.String()

s = append(s, skew{
name: csv.GetName(),
namespace: csv.GetNamespace(),
maxOpenShiftVersion: max.String(),
})
incompatible = append(incompatible, s)
}

return s, utilerrors.NewAggregate(errs)
return incompatible, nil
}

func desiredRelease(ctx context.Context, cli client.Client) (*semver.Version, error) {
cv := configv1.ClusterVersion{}
if err := cli.Get(ctx, client.ObjectKey{Name: "version"}, &cv); err != nil { // "version" is the name of OpenShift's ClusterVersion singleton
return nil, err
return nil, &transientError{fmt.Errorf("Failed to get ClusterVersion: %w", err)}
}

v := cv.Status.Desired.Version
if v == "" {
// The release version hasn't been set yet
return nil, nil
return nil, fmt.Errorf("Desired release version missing from ClusterVersion")
}

desired, err := semver.ParseTolerant(v)
if err != nil {
return nil, err
return nil, fmt.Errorf("ClusterVersion has invalid desired release version: %w", err)
}

return &desired, nil
Expand All @@ -178,57 +205,36 @@ func maxOpenShiftVersion(csv *operatorsv1alpha1.ClusterServiceVersion) (*semver.
return nil, err
}

// Take the highest semver if there's more than one max version specified
var (
max *semver.Version
dups []semver.Version
errs []error
)
var max *string
for _, property := range properties {
if property.Type != MaxOpenShiftVersionProperty {
continue
}

value := strings.Trim(property.Value, "\"")
if value == "" {
continue
}

version, err := semver.ParseTolerant(value)
if err != nil {
errs = append(errs, err)
continue
if max != nil {
return nil, fmt.Errorf(`Defining more than one "%s" property is not allowed`, MaxOpenShiftVersionProperty)
}

if max == nil {
max = &version
continue
}
if version.LT(*max) {
continue
}
if version.EQ(*max) {
// Found a duplicate, mark it
dups = append(dups, *max)
}
max = &property.Value
}

max = &version
if max == nil {
return nil, nil
}

// Return an error if THE max version has a duplicate (i.e. equivalent version)
// Note: This may not be a problem since there should be no difference as far as blocking upgrades is concerned.
// This is more for clear status messages.
for _, dup := range dups {
if max.EQ(dup) && max.String() != dup.String() { // "1.0.0" vs "1.0.0" is fine, but not "1.0.0" vs "1.0.0+1"
errs = append(errs, fmt.Errorf("max openshift version ambiguous, equivalent versions %s and %s have been specified concurrently", max, dup))
}
// Account for any additional quoting
value := strings.Trim(*max, "\"")
if value == "" {
// Handle "" separately, so parse doesn't treat it as a zero
return nil, fmt.Errorf(`Value cannot be "" (an empty string)`)
}

if len(errs) > 0 {
return nil, utilerrors.NewAggregate(errs)
version, err := semver.ParseTolerant(value)
if err != nil {
return nil, fmt.Errorf(`Failed to parse "%s" as semver: %w`, value, err)
}

return max, nil
return &version, nil
}

func notCopiedSelector() (labels.Selector, error) {
Expand Down
Loading

0 comments on commit 734c6d0

Please sign in to comment.