Skip to content

Commit

Permalink
Merge pull request openshift#65 from sadasu/annotate-metal3
Browse files Browse the repository at this point in the history
Bug 1906105: Annotate existing metal3 deployment
  • Loading branch information
openshift-merge-robot authored Dec 15, 2020
2 parents b8767e8 + fa3c01f commit bc6f65c
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 60 deletions.
5 changes: 4 additions & 1 deletion controllers/clusteroperator.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ const (
// ReasonDeploymentCrashLooping indicates that the deployment is crashlooping
ReasonDeploymentCrashLooping StatusReason = "DeploymentCrashLooping"

// ReasonNotFound indicates that the deployment is not found
ReasonNotFound StatusReason = "ResourceNotFound"

// ReasonUnsupported is an unsupported StatusReason
ReasonUnsupported StatusReason = "UnsupportedPlatform"
)
Expand Down Expand Up @@ -223,7 +226,7 @@ func (r *ProvisioningReconciler) updateCOStatus(newReason StatusReason, msg, pro
case ReasonComplete:
v1helpers.SetStatusCondition(&conds, setStatusCondition(osconfigv1.OperatorAvailable, osconfigv1.ConditionTrue, string(newReason), msg))
v1helpers.SetStatusCondition(&conds, setStatusCondition(osconfigv1.OperatorProgressing, osconfigv1.ConditionFalse, string(newReason), progressMsg))
case ReasonInvalidConfiguration, ReasonDeployTimedOut:
case ReasonInvalidConfiguration, ReasonDeployTimedOut, ReasonNotFound:
v1helpers.SetStatusCondition(&conds, setStatusCondition(osconfigv1.OperatorDegraded, osconfigv1.ConditionTrue, string(newReason), msg))
v1helpers.SetStatusCondition(&conds, setStatusCondition(osconfigv1.OperatorAvailable, osconfigv1.ConditionTrue, string(ReasonEmpty), ""))
v1helpers.SetStatusCondition(&conds, setStatusCondition(osconfigv1.OperatorProgressing, osconfigv1.ConditionTrue, string(newReason), progressMsg))
Expand Down
49 changes: 41 additions & 8 deletions controllers/clusteroperator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,62 @@ import (
"github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
)

func TestUpdateCOStatusDisabled(t *testing.T) {
func TestUpdateCOStatus(t *testing.T) {
tCases := []struct {
name string
reason StatusReason
msg string
progressMsg string
expectedConditions []osconfigv1.ClusterOperatorStatusCondition
}{
{
name: "Correct Condition",
name: "Disabled",
reason: ReasonUnsupported,
msg: "Operator is non-functional",
progressMsg: "",
expectedConditions: []osconfigv1.ClusterOperatorStatusCondition{
setStatusCondition(osconfigv1.OperatorDegraded, osconfigv1.ConditionFalse, "", ""),
setStatusCondition(osconfigv1.OperatorAvailable, osconfigv1.ConditionTrue, "AsExpected", "Operational"),
setStatusCondition(OperatorDisabled, osconfigv1.ConditionTrue, "UnsupportedPlatform", "Operator is non-functional"),
setStatusCondition(osconfigv1.OperatorAvailable, osconfigv1.ConditionTrue, string(ReasonExpected), "Operational"),
setStatusCondition(OperatorDisabled, osconfigv1.ConditionTrue, string(ReasonUnsupported), "Operator is non-functional"),
setStatusCondition(osconfigv1.OperatorProgressing, osconfigv1.ConditionFalse, "", ""),
setStatusCondition(osconfigv1.OperatorUpgradeable, osconfigv1.ConditionTrue, "", ""),
},
},
{
name: "Progressing",
reason: ReasonSyncing,
msg: "",
progressMsg: "syncing metal3 pod",
expectedConditions: []osconfigv1.ClusterOperatorStatusCondition{
setStatusCondition(osconfigv1.OperatorDegraded, osconfigv1.ConditionFalse, "", ""),
setStatusCondition(osconfigv1.OperatorAvailable, osconfigv1.ConditionTrue, string(ReasonSyncing), ""),
setStatusCondition(OperatorDisabled, osconfigv1.ConditionFalse, "", ""),
setStatusCondition(osconfigv1.OperatorProgressing, osconfigv1.ConditionTrue, string(ReasonSyncing), "syncing metal3 pod"),
setStatusCondition(osconfigv1.OperatorUpgradeable, osconfigv1.ConditionTrue, "", ""),
},
},
{
name: "Available",
reason: ReasonComplete,
msg: "metal3 pod running",
progressMsg: "",
expectedConditions: []osconfigv1.ClusterOperatorStatusCondition{
setStatusCondition(osconfigv1.OperatorDegraded, osconfigv1.ConditionFalse, "", ""),
setStatusCondition(osconfigv1.OperatorProgressing, osconfigv1.ConditionFalse, string(ReasonComplete), ""),
setStatusCondition(osconfigv1.OperatorAvailable, osconfigv1.ConditionTrue, string(ReasonComplete), "metal3 pod running"),
setStatusCondition(osconfigv1.OperatorUpgradeable, osconfigv1.ConditionTrue, "", ""),
setStatusCondition(OperatorDisabled, osconfigv1.ConditionFalse, "", ""),
},
},
}

reconciler := newFakeProvisioningReconciler(setUpSchemeForReconciler(), &osconfigv1.Infrastructure{})
co, _ := reconciler.createClusterOperator()
reconciler.OSClient = fakeconfigclientset.NewSimpleClientset(co)

for _, tc := range tCases {
err := reconciler.updateCOStatus(ReasonUnsupported, "Operator is non-functional", "")
co, _ := reconciler.createClusterOperator()
reconciler.OSClient = fakeconfigclientset.NewSimpleClientset(co)

err := reconciler.updateCOStatus(tc.reason, tc.msg, tc.progressMsg)
if err != nil {
t.Error(err)
}
Expand All @@ -53,8 +86,8 @@ func TestUpdateCOStatusDisabled(t *testing.T) {
if diff != "" {
t.Fatal(diff)
}
_ = reconciler.OSClient.ConfigV1().ClusterOperators().Delete(context.Background(), clusterOperatorName, metav1.DeleteOptions{})
}
_ = reconciler.OSClient.ConfigV1().ClusterOperators().Delete(context.Background(), clusterOperatorName, metav1.DeleteOptions{})
}

func TestEnsureClusterOperator(t *testing.T) {
Expand Down
96 changes: 52 additions & 44 deletions controllers/provisioning_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,12 @@ import (
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

osconfigv1 "github.com/openshift/api/config/v1"
osclientset "github.com/openshift/client-go/config/clientset/versioned"
metal3iov1alpha1 "github.com/openshift/cluster-baremetal-operator/api/v1alpha1"
provisioning "github.com/openshift/cluster-baremetal-operator/provisioning"
"github.com/openshift/library-go/pkg/operator/events"
"github.com/openshift/library-go/pkg/operator/resource/resourceapply"
"github.com/openshift/library-go/pkg/operator/resource/resourcemerge"
)

const (
Expand Down Expand Up @@ -189,6 +186,14 @@ func (r *ProvisioningReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error
return ctrl.Result{}, err
}

specChanged := baremetalConfig.Generation != baremetalConfig.Status.ObservedGeneration
if specChanged {
err = r.updateCOStatus(ReasonSyncing, "", "Applying metal3 resources")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Syncing state: %v", clusterOperatorName, err)
}
}

if err := provisioning.ValidateBaremetalProvisioningConfig(baremetalConfig); err != nil {
// Provisioning configuration is not valid.
// Requeue request.
Expand All @@ -215,7 +220,7 @@ func (r *ProvisioningReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error
return ctrl.Result{}, err
}

//Create Secrets needed for Metal3 deployment
// Create Secrets needed for Metal3 deployment
if err := provisioning.CreateMariadbPasswordSecret(r.KubeClient.CoreV1(), ComponentNamespace, baremetalConfig, r.Scheme); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to create Mariadb password")
}
Expand All @@ -236,32 +241,42 @@ func (r *ProvisioningReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error
}

if maoOwned {
r.Log.V(1).Info("metal3 deployment already exists")
err = r.updateCOStatus(ReasonComplete, "found existing Metal3 deployment", "")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Available state: %v", clusterOperatorName, err)
}
return ctrl.Result{}, nil
r.Log.V(1).Info("Adding annotation for CBO to take ownership of metal3 deployment created by MAO")
}

specChanged := baremetalConfig.Generation != baremetalConfig.Status.ObservedGeneration
if specChanged {
err = r.updateCOStatus(ReasonSyncing, "", "Applying the Metal3 deployment")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Syncing state: %v", clusterOperatorName, err)
}
}
info := r.provisioningInfo(baremetalConfig, &containerImages)

// Proceed with creating or updating the Metal3 deployment
updated, err := r.ensureMetal3Deployment(baremetalConfig, &containerImages, metal3DeploymentSelector)
updated, err := provisioning.EnsureMetal3Deployment(info, metal3DeploymentSelector)
if err != nil {
return ctrl.Result{}, err
}
if updated {
err = r.Client.Status().Update(context.Background(), baremetalConfig)
return ctrl.Result{Requeue: true}, err
}

info := r.provisioningInfo(baremetalConfig, &containerImages)
// Determine the status of the deployment
deploymentState, err := provisioning.GetDeploymentState(r.KubeClient.AppsV1(), ComponentNamespace, baremetalConfig)
if err != nil {
err = r.updateCOStatus(ReasonNotFound, "metal3 deployment inaccessible", "")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Degraded state: %v", clusterOperatorName, err)
}
return ctrl.Result{}, errors.Wrap(err, "failed to determine state of metal3 deployment")
}
if deploymentState == appsv1.DeploymentReplicaFailure {
err = r.updateCOStatus(ReasonDeployTimedOut, "metal3 deployment rollout taking too long", "")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Degraded state: %v", clusterOperatorName, err)
}
} else if deploymentState == appsv1.DeploymentAvailable {
err = r.updateCOStatus(ReasonSyncing, "metal3 pod running", "starting other metal3 services")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Progressing state: %v", clusterOperatorName, err)
}
}

for _, ensureResource := range []ensureFunc{
provisioning.EnsureMetal3StateService,
provisioning.EnsureImageCache,
Expand All @@ -284,36 +299,29 @@ func (r *ProvisioningReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error
}
}

err = r.updateCOStatus(ReasonComplete, "new Metal3 deployment completed", "")
// Determine the status of the DaemonSet
daemonSetState, err := provisioning.GetDaemonSetState(r.KubeClient.AppsV1(), ComponentNamespace, baremetalConfig)
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Available state: %v", clusterOperatorName, err)
}

return ctrl.Result{}, nil
}

func (r *ProvisioningReconciler) ensureMetal3Deployment(provConfig *metal3iov1alpha1.Provisioning, images *provisioning.Images, selector *metav1.LabelSelector) (updated bool, err error) {
metal3Deployment := provisioning.NewMetal3Deployment(ComponentNamespace, images, &provConfig.Spec, selector)
expectedGeneration := resourcemerge.ExpectedDeploymentGeneration(metal3Deployment, provConfig.Status.Generations)
err = r.updateCOStatus(ReasonNotFound, "metal3 image cache daemonset inaccessible", "")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Degraded state: %v", clusterOperatorName, err)
}

err = controllerutil.SetControllerReference(provConfig, metal3Deployment, r.Scheme)
if err != nil {
err = fmt.Errorf("unable to set controllerReference on deployment: %w", err)
return
return ctrl.Result{}, errors.Wrap(err, "failed to determine state of metal3 image cache daemonset")
}

deployment, updated, err := resourceapply.ApplyDeployment(r.KubeClient.AppsV1(),
events.NewLoggingEventRecorder(ComponentName), metal3Deployment, expectedGeneration)
if err != nil {
err = fmt.Errorf("unable to apply Metal3 deployment: %w", err)
return
if daemonSetState == provisioning.DaemonSetReplicaFailure {
err = r.updateCOStatus(ReasonDeployTimedOut, "metal3 image cache rollout taking too long", "")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Degraded state: %v", clusterOperatorName, err)
}
} else if daemonSetState == provisioning.DaemonSetAvailable {
err = r.updateCOStatus(ReasonComplete, "metal3 pod and image cache are running", "")
if err != nil {
return ctrl.Result{}, fmt.Errorf("unable to put %q ClusterOperator in Progressing state: %v", clusterOperatorName, err)
}
}

if updated {
resourcemerge.SetDeploymentGeneration(&provConfig.Status.Generations, deployment)
err = r.Client.Status().Update(context.Background(), provConfig)
}
return
return ctrl.Result{}, nil
}

func (r *ProvisioningReconciler) provisioningInfo(provConfig *metal3iov1alpha1.Provisioning, images *provisioning.Images) *provisioning.ProvisioningInfo {
Expand Down
61 changes: 59 additions & 2 deletions provisioning/baremetal_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,20 @@ package provisioning

import (
"context"
"fmt"
"strconv"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
appsclientv1 "k8s.io/client-go/kubernetes/typed/apps/v1"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

metal3iov1alpha1 "github.com/openshift/cluster-baremetal-operator/api/v1alpha1"
"github.com/openshift/library-go/pkg/operator/resource/resourceapply"
"github.com/openshift/library-go/pkg/operator/resource/resourcemerge"
)

const (
Expand All @@ -42,6 +47,9 @@ const (
cboLabelName = "baremetal.openshift.io/cluster-baremetal-operator"
)

var deploymentRolloutStartTime = time.Now()
var deploymentRolloutTimeout = 5 * time.Minute

var sharedVolumeMount = corev1.VolumeMount{
Name: baremetalSharedVolume,
MountPath: "/shared",
Expand Down Expand Up @@ -567,7 +575,7 @@ func newMetal3PodTemplateSpec(images *Images, config *metal3iov1alpha1.Provision
}
}

func NewMetal3Deployment(targetNamespace string, images *Images, config *metal3iov1alpha1.ProvisioningSpec, selector *metav1.LabelSelector) *appsv1.Deployment {
func newMetal3Deployment(targetNamespace string, images *Images, config *metal3iov1alpha1.ProvisioningSpec, selector *metav1.LabelSelector) *appsv1.Deployment {
if selector == nil {
selector = &metav1.LabelSelector{
MatchLabels: map[string]string{
Expand All @@ -583,7 +591,6 @@ func NewMetal3Deployment(targetNamespace string, images *Images, config *metal3i
break
}
}

template := newMetal3PodTemplateSpec(images, config, k8sAppLabel)
return &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -616,3 +623,53 @@ func CheckExistingMetal3Deployment(client appsclientv1.DeploymentsGetter, target
}
return nil, false, err
}

func EnsureMetal3Deployment(info *ProvisioningInfo, selector *metav1.LabelSelector) (updated bool, err error) {
// Create metal3 deployment object based on current baremetal configuration
// It will be created with the cboOwnedAnnotation
metal3Deployment := newMetal3Deployment(info.Namespace, info.Images, &info.ProvConfig.Spec, selector)

expectedGeneration := resourcemerge.ExpectedDeploymentGeneration(metal3Deployment, info.ProvConfig.Status.Generations)

err = controllerutil.SetControllerReference(info.ProvConfig, metal3Deployment, info.Scheme)
if err != nil {
err = fmt.Errorf("unable to set controllerReference on deployment: %w", err)
return
}

deploymentRolloutStartTime = time.Now()
deployment, updated, err := resourceapply.ApplyDeployment(info.Client.AppsV1(),
info.EventRecorder, metal3Deployment, expectedGeneration)
if err != nil {
err = fmt.Errorf("unable to apply Metal3 deployment: %w", err)
return
}

if updated {
resourcemerge.SetDeploymentGeneration(&info.ProvConfig.Status.Generations, deployment)
}
return
}

func getDeploymentCondition(deployment *appsv1.Deployment) appsv1.DeploymentConditionType {
for _, cond := range deployment.Status.Conditions {
if cond.Status == corev1.ConditionTrue {
return cond.Type
}
}
return appsv1.DeploymentProgressing
}

// Provide the current state of metal3 deployment
func GetDeploymentState(client appsclientv1.DeploymentsGetter, targetNamespace string, config *metal3iov1alpha1.Provisioning) (appsv1.DeploymentConditionType, error) {
existing, err := client.Deployments(targetNamespace).Get(context.Background(), baremetalDeploymentName, metav1.GetOptions{})
if err != nil || existing == nil {
// There were errors accessing the deployment.
return appsv1.DeploymentReplicaFailure, err
}
deploymentState := getDeploymentCondition(existing)
if deploymentState == appsv1.DeploymentProgressing && deploymentRolloutTimeout <= time.Since(deploymentRolloutStartTime) {
return appsv1.DeploymentReplicaFailure, nil
}
return deploymentState, nil
}
Loading

0 comments on commit bc6f65c

Please sign in to comment.