Skip to content
This repository has been archived by the owner on Jul 17, 2023. It is now read-only.

Implement node reconcile and drain metric #2

Merged
merged 2 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# ocp-drain-monitor
// TODO(user): Add simple overview of use/purpose

## Description
// TODO(user): An in-depth paragraph about your project and overview of use
`ocp-drain-monitor` watches OpenShift Nodes and updates the node draining metric.
The metrics are calculated based on annotations from the [OpenShift machine-config-operator](https://github.com/openshift/machine-config-operator).

## Getting Started
You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) to get a local cluster for testing, or run against a remote cluster.
Expand Down Expand Up @@ -41,9 +40,6 @@ UnDeploy the controller from the cluster:
make undeploy
```

## Contributing
// TODO(user): Add detailed information on how you would like others to contribute to this project

### How it works
This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/).

Expand Down
35 changes: 4 additions & 31 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,35 +36,10 @@ spec:
labels:
control-plane: controller-manager
spec:
# TODO(user): Uncomment the following code to configure the nodeAffinity expression
# according to the platforms which are supported by your solution.
# It is considered best practice to support multiple architectures. You can
# build your manager image using the makefile target docker-buildx.
# affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: kubernetes.io/arch
# operator: In
# values:
# - amd64
# - arm64
# - ppc64le
# - s390x
# - key: kubernetes.io/os
# operator: In
# values:
# - linux
securityContext:
runAsNonRoot: true
# TODO(user): For common cases that do not require escalating privileges
# it is recommended to ensure that all your Pods/Containers are restrictive.
# More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
# Please uncomment the following code if your project does NOT have to work on old Kubernetes
# versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
# seccompProfile:
# type: RuntimeDefault
bastjan marked this conversation as resolved.
Show resolved Hide resolved
seccompProfile:
type: RuntimeDefault
containers:
- command:
- /manager
Expand All @@ -89,14 +64,12 @@ spec:
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
# TODO(user): Configure the resources accordingly based on the project requirements.
# More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
resources:
limits:
cpu: 500m
cpu: 100m
memory: 128Mi
requests:
cpu: 10m
memory: 64Mi
memory: 32Mi
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
21 changes: 21 additions & 0 deletions controllers/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package controllers

import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var (
nodeDraining = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "ocp_drain_monitor",
Name: "node_draining",
Help: "Node draining status",
},
[]string{"node"},
)
)

func init() {
metrics.Registry.MustRegister(nodeDraining)
}
45 changes: 34 additions & 11 deletions controllers/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
)

const (
// https://github.com/openshift/machine-config-operator/blob/b36482885ba1304e122e7c01c26cd671dfdd0418/pkg/daemon/constants/constants.go#L17
// https://github.com/openshift/machine-config-operator/blob/b36482885ba1304e122e7c01c26cd671dfdd0418/pkg/daemon/drain.go#L79
// DesiredDrainerAnnotationKey is set by OCP to indicate drain/uncordon requests
DesiredDrainerAnnotationKey = "machineconfiguration.openshift.io/desiredDrain"
// LastAppliedDrainerAnnotationKey is by OCP to indicate the last request applied
LastAppliedDrainerAnnotationKey = "machineconfiguration.openshift.io/lastAppliedDrain"
)

// NodeReconciler reconciles a Node object
type NodeReconciler struct {
client.Client
Expand All @@ -34,20 +43,34 @@ type NodeReconciler struct {

//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the Node object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.1/pkg/reconcile
// Reconcile reacts to Node changes and updates the node draining metric.
func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = log.FromContext(ctx)
l := log.FromContext(ctx)

var node corev1.Node
if err := r.Get(ctx, req.NamespacedName, &node); err != nil {
nodeDraining.DeleteLabelValues(req.Name)
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if !node.DeletionTimestamp.IsZero() {
nodeDraining.DeleteLabelValues(node.Name)
return ctrl.Result{}, nil
}

desiredDrain, ddOk := node.Annotations[DesiredDrainerAnnotationKey]
lastAppliedDrain, laOk := node.Annotations[LastAppliedDrainerAnnotationKey]
if !ddOk || !laOk {
l.Info("Node is missing drain annotations. Not OCP?", "node", node.Name, "desiredDrain", desiredDrain, "lastAppliedDrain", lastAppliedDrain)
nodeDraining.DeleteLabelValues(node.Name)
return ctrl.Result{}, nil
}

// TODO(user): your logic here
if desiredDrain == lastAppliedDrain {
nodeDraining.WithLabelValues(node.Name).Set(0)
return ctrl.Result{}, nil
}

nodeDraining.WithLabelValues(node.Name).Set(1)
return ctrl.Result{}, nil
}

Expand Down
80 changes: 80 additions & 0 deletions controllers/node_controller_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package controllers

import (
"context"
"testing"

"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func TestReconcile(t *testing.T) {
ctx := context.Background()

scheme := runtime.NewScheme()
utilruntime.Must(clientgoscheme.AddToScheme(scheme))

client := fake.NewClientBuilder().
WithScheme(scheme).
WithRuntimeObjects(node("node1", "a", "a")).
Build()

subject := &NodeReconciler{
Client: client,
Scheme: scheme,
}

_, err := subject.Reconcile(ctx, requestFor("node1"))
require.NoError(t, err)
compareMetrics(t, "node1", 0.0, "node is not draining if desired drainer is the same as last applied drainer annotation")

require.NoError(t, client.Update(ctx, node("node1", "b", "a")))
_, err = subject.Reconcile(ctx, requestFor("node1"))
require.NoError(t, err)
compareMetrics(t, "node1", 1.0, "node should be draining if desired drainer is different from last applied drainer annotation")

n := node("node1", "", "")
n.Annotations = map[string]string{}
require.NoError(t, client.Update(ctx, n))
_, err = subject.Reconcile(ctx, requestFor("node1"))
require.NoError(t, err)
require.Equal(t, 0,
testutil.CollectAndCount(nodeDraining, "ocp_drain_monitor_node_draining"),
"metric should be removed if not able to calculate it")
}

func compareMetrics(t *testing.T, nodeLbl string, expected float64, msgAndArgs ...interface{}) {
t.Helper()

m, err := nodeDraining.GetMetricWithLabelValues(nodeLbl)
require.NoError(t, err)
require.Equal(t, expected, testutil.ToFloat64(m), msgAndArgs...)
}

func requestFor(name string) ctrl.Request {
return ctrl.Request{
NamespacedName: types.NamespacedName{
Name: name,
},
}
}

func node(name, desiredDrainer, lastAppliedDrainer string) *corev1.Node {
return &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Annotations: map[string]string{
DesiredDrainerAnnotationKey: desiredDrainer,
LastAppliedDrainerAnnotationKey: lastAppliedDrainer,
},
},
}
}
79 changes: 0 additions & 79 deletions controllers/suite_test.go

This file was deleted.

6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ module github.com/appuio/ocp-drain-monitor
go 1.19

require (
github.com/onsi/ginkgo/v2 v2.6.1
github.com/onsi/gomega v1.24.2
github.com/prometheus/client_golang v1.14.0
github.com/stretchr/testify v1.8.0
k8s.io/api v0.26.0
k8s.io/apimachinery v0.26.0
k8s.io/client-go v0.26.0
Expand Down Expand Up @@ -54,7 +54,7 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/olekukonko/tablewriter v0.0.4 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.14.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
Expand Down
4 changes: 1 addition & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,8 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8=
github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo/v2 v2.6.1 h1:1xQPCjcqYw/J5LchOcp4/2q/jzJFjiAOc25chhnDw+Q=
github.com/onsi/ginkgo/v2 v2.6.1/go.mod h1:yjiuMwPokqY1XauOgju45q3sJt6VzQ/Fict1LFVcsAo=
github.com/onsi/ginkgo/v2 v2.6.0 h1:9t9b9vRUbFq3C4qKFCGkVuq/fIHji802N1nrtkh1mNc=
github.com/onsi/gomega v1.24.2 h1:J/tulyYK6JwBldPViHJReihxxZ+22FHs0piGjQAvoUE=
github.com/onsi/gomega v1.24.2/go.mod h1:gs3J10IS7Z7r7eXRoNJIrNqU4ToQukCJhFtKrWgHWnk=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down