Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine yawollet health check in keepailved #280

Merged
merged 8 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,6 @@ issues:
- directive `// nolint.*` should be written without leading space

run:
timeout: 10m
timeout: 15m
issues-exit-code: 1
tests: true
31 changes: 27 additions & 4 deletions cmd/yawollet/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

yawolv1beta1 "github.com/stackitcloud/yawol/api/v1beta1"
controllers "github.com/stackitcloud/yawol/controllers/yawollet"
yawolhealthz "github.com/stackitcloud/yawol/internal/healthz"
"github.com/stackitcloud/yawol/internal/helper"

"go.uber.org/zap/zapcore"
Expand Down Expand Up @@ -68,7 +69,7 @@ func main() {
var keepalivedStatsFile string

flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metric endpoint binds to. Default is disabled.")
flag.StringVar(&probeAddr, "health-probe-bind-address", "0", "The address the probe endpoint binds to. Default is disabled.")
flag.StringVar(&probeAddr, "health-probe-bind-address", "127.0.0.1:8080", "The address the probe endpoint binds to.")

flag.StringVar(&namespace, "namespace", "", "The namespace from lb und lbm object.")
flag.StringVar(&loadbalancerName, "loadbalancer-name", "", "Name of lb object.")
Expand Down Expand Up @@ -105,6 +106,7 @@ func main() {
} else if requeueTime > 170 {
requeueTime = 170
}
requeueDuration := time.Duration(requeueTime) * time.Second

// set listen address
if listenAddress == "" {
Expand Down Expand Up @@ -191,7 +193,8 @@ func main() {
Metrics: server.Options{
BindAddress: metricsAddr,
},
LeaderElection: false,
HealthProbeBindAddress: probeAddr,
LeaderElection: false,
Cache: cache.Options{
DefaultNamespaces: map[string]cache.Config{
namespace: {},
Expand All @@ -215,7 +218,7 @@ func main() {
LoadbalancerMachineName: loadbalancerMachineName,
EnvoyCache: envoyCache,
ListenAddress: listenAddress,
RequeueDuration: time.Duration(requeueTime) * time.Second,
RequeueDuration: requeueDuration,
KeepalivedStatsFile: keepalivedStatsFile,
Recorder: mgr.GetEventRecorderFor("yawollet"),
RecorderLB: mgr.GetEventRecorderFor("yawol-service"),
Expand All @@ -226,17 +229,37 @@ func main() {

//+kubebuilder:scaffold:builder

managerCtx := ctrl.SetupSignalHandler()

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}

// The readyz checks are used by keepalived for increasing the priority of machines that run a healthy yawollet for
// the latest LoadBalancer revision (i.e., when the machine belongs to the current LoadBalancerSet).
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("informer-sync", yawolhealthz.NewCacheSyncHealthz(mgr.GetCache())); err != nil {
setupLog.Error(err, "unable to set up informer ready check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("loadbalancer-heartbeat", yawolhealthz.NewHeartbeatHealthz(
managerCtx, mgr.GetCache(), 2*requeueDuration, namespace, loadbalancerMachineName)); err != nil {
setupLog.Error(err, "unable to set up LoadBalancer heartbeat ready check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("loadbalancer-revision", yawolhealthz.NewLoadBalancerRevisionHealthz(
managerCtx, mgr.GetCache(), namespace, loadbalancerName, loadbalancerMachineName,
)); err != nil {
setupLog.Error(err, "unable to set up LoadBalancer revision ready check")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
if err := mgr.Start(managerCtx); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
Expand Down
53 changes: 0 additions & 53 deletions controllers/yawol-controller/loadbalancerset/helper.go
Original file line number Diff line number Diff line change
@@ -1,65 +1,12 @@
package loadbalancerset

import (
"fmt"

yawolv1beta1 "github.com/stackitcloud/yawol/api/v1beta1"
"github.com/stackitcloud/yawol/internal/helper"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

var relevantLBMConditionslForLBS = []helper.LoadbalancerCondition{
helper.ConfigReady,
helper.EnvoyReady,
helper.EnvoyUpToDate,
}

// areRelevantConditionsMet checks if all required conditions (from the
// perspective of the LoadBalancerSet) are both `True` and up-to-date, according
// to the passed expiration time. If `stableConditions` is set, a condition is
// only considered `False` if it has been in that state since the expiration
// time.
func areRelevantConditionsMet(
machine *yawolv1beta1.LoadBalancerMachine,
expiration metav1.Time,
stableConditions bool,
) (ok bool, reason string) {
if machine.Status.Conditions == nil {
return false, "no conditions set"
}

// constuct lookup map
conditions := *machine.Status.Conditions
condMap := make(map[helper.LoadbalancerCondition]corev1.NodeCondition, len(conditions))
for i := range conditions {
condMap[helper.LoadbalancerCondition(conditions[i].Type)] = conditions[i]
}

for _, typ := range relevantLBMConditionslForLBS {
condition, found := condMap[typ]
if !found {
return false, fmt.Sprintf("required condition %s not present on machine", typ)
}

conditionIsStable := true
if stableConditions {
conditionIsStable = condition.LastTransitionTime.Before(&expiration)
}
if conditionIsStable && condition.Status != corev1.ConditionTrue {
return false, fmt.Sprintf(
"condition %s is in status %s with reason: %v, message: %v, lastTransitionTime: %v",
condition.Type, condition.Status, condition.Reason, condition.Message, condition.LastTransitionTime,
)
}
if condition.LastHeartbeatTime.Before(&expiration) {
return false, fmt.Sprintf("condition %s heartbeat is stale", condition.Type)
}
}

return true, ""
}

func findDeletionCondition(machine *yawolv1beta1.LoadBalancerMachine) *corev1.NodeCondition {
if machine.Status.Conditions == nil {
return nil
Expand Down
106 changes: 0 additions & 106 deletions controllers/yawol-controller/loadbalancerset/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,112 +11,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type relCondTest struct {
conditions *[]corev1.NodeCondition
expiration metav1.Time
checkTransition bool
expect bool
expectReason string
}

var _ = DescribeTable("areRelevantConditionsMet",
func(t relCondTest) {
machine := &yawolv1beta1.LoadBalancerMachine{
Status: yawolv1beta1.LoadBalancerMachineStatus{
Conditions: t.conditions,
},
}
res, reason := areRelevantConditionsMet(machine, t.expiration, t.checkTransition)
if t.expectReason != "" {
Expect(reason).To(ContainSubstring(t.expectReason))
}
Expect(res).To(Equal(t.expect))
},
Entry("No Conditions", relCondTest{
conditions: nil,
expect: false,
}),
Entry("empty Conditions", relCondTest{
conditions: &[]corev1.NodeCondition{},
expect: false,
}),
Entry("all conditions met", relCondTest{
conditions: &[]corev1.NodeCondition{
{Type: corev1.NodeConditionType(helper.ConfigReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
},
checkTransition: false,
expect: true,
}),
Entry("a required condition is missing", relCondTest{
conditions: &[]corev1.NodeCondition{
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
},
expect: false,
expectReason: "required condition ConfigReady not present on machine",
}),
Entry("a unrelated condition is fale", relCondTest{
conditions: &[]corev1.NodeCondition{
{Type: corev1.NodeConditionType(helper.ConfigReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
{Type: "foo", Status: corev1.ConditionFalse},
},
expect: true,
}),
Entry("a required condition is false", relCondTest{
conditions: &[]corev1.NodeCondition{
{Type: corev1.NodeConditionType(helper.ConfigReady), Status: corev1.ConditionFalse},
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
},
expect: false,
expectReason: "condition ConfigReady is in status False",
}),
Entry("a required condition is too old", relCondTest{
conditions: &[]corev1.NodeCondition{
{
Type: corev1.NodeConditionType(helper.ConfigReady),
Status: corev1.ConditionTrue,
LastHeartbeatTime: metav1.Time{Time: time.Time{}.Add(-5 * time.Minute)},
},
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
},
checkTransition: false,
expect: false,
expectReason: "condition ConfigReady heartbeat is stale",
}),
Entry("with transition check: a condition is failed, but just happened", relCondTest{
conditions: &[]corev1.NodeCondition{
{
Type: corev1.NodeConditionType(helper.ConfigReady),
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{},
},
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
},
checkTransition: true,
expect: true,
}),
Entry("with transition check: a condition is failed, some time ago", relCondTest{
conditions: &[]corev1.NodeCondition{
{
Type: corev1.NodeConditionType(helper.ConfigReady),
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Time{}.Add(-5 * time.Minute)},
},
{Type: corev1.NodeConditionType(helper.EnvoyReady), Status: corev1.ConditionTrue},
{Type: corev1.NodeConditionType(helper.EnvoyUpToDate), Status: corev1.ConditionTrue},
},
checkTransition: true,
expect: false,
}),
)

var _ = Describe("getDeletionCondition", func() {
It("should return nil if no conditions are set", func() {
machine := &yawolv1beta1.LoadBalancerMachine{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ func shouldMachineBeDeleted(machine *yawolv1beta1.LoadBalancerMachine) (shouldDe
return false, ""
}

ok, reason := areRelevantConditionsMet(machine, before3Minutes, true)
ok, reason := helper.AreRelevantConditionsMet(machine, before3Minutes, true)
if !ok {
return true, reason
}
Expand All @@ -279,7 +279,7 @@ func shouldMachineBeDeleted(machine *yawolv1beta1.LoadBalancerMachine) (shouldDe
func isMachineReady(machine yawolv1beta1.LoadBalancerMachine) bool {
before180seconds := metav1.Time{Time: time.Now().Add(-180 * time.Second)}

ok, _ := areRelevantConditionsMet(&machine, before180seconds, false)
ok, _ := helper.AreRelevantConditionsMet(&machine, before180seconds, false)
return ok
}

Expand Down
11 changes: 1 addition & 10 deletions controllers/yawollet/loadbalancer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ import (
yawolv1beta1 "github.com/stackitcloud/yawol/api/v1beta1"
"github.com/stackitcloud/yawol/internal/helper"
"github.com/stackitcloud/yawol/internal/helper/kubernetes"
"golang.org/x/time/rate"

envoycache "github.com/envoyproxy/go-control-plane/pkg/cache/v3"
"github.com/envoyproxy/go-control-plane/pkg/resource/v3"
"github.com/go-logr/logr"
"github.com/spf13/afero"
"golang.org/x/time/rate"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
Expand All @@ -39,7 +38,6 @@ type LoadBalancerReconciler struct {
ListenAddress string
RequeueDuration time.Duration
KeepalivedStatsFile string
Filesystem afero.Fs
}

// Reconcile handles reconciliation of loadbalancer object
Expand Down Expand Up @@ -97,10 +95,6 @@ func (r *LoadBalancerReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, err
}

if err := helper.ReconcileLatestRevisionFile(r.Filesystem, lb, lbm); err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{RequeueAfter: r.RequeueDuration}, reconcileError
}

Expand Down Expand Up @@ -179,9 +173,6 @@ func (r *LoadBalancerReconciler) reconcile(

// SetupWithManager is used by kubebuilder to init the controller loop
func (r *LoadBalancerReconciler) SetupWithManager(mgr ctrl.Manager) error {
if r.Filesystem == nil {
r.Filesystem = afero.NewOsFs()
}
return ctrl.NewControllerManagedBy(mgr).
For(&yawolv1beta1.LoadBalancer{}).
WithEventFilter(predicate.And(
Expand Down
29 changes: 0 additions & 29 deletions controllers/yawollet/loadbalancer_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ package controllers

import (
"context"
"errors"
"io"
"io/fs"
"net/http"
"os/exec"
"strings"
Expand All @@ -22,7 +20,6 @@ import (
"github.com/stackitcloud/yawol/internal/helper"
)

const StatusConditions int = 3
const (
TIMEOUT = 10 * time.Second
INTERVAL = 500 * time.Millisecond
Expand Down Expand Up @@ -449,32 +446,6 @@ var _ = Describe("check loadbalancer reconcile", Serial, Ordered, func() {
)
})
})
When("lb and lbm revision annotation are the same", func() {
It("should create yawolKeepalivedFile", func() {
Eventually(func() error {
_, err := filesystem.Stat(helper.YawolSetIsLatestRevisionFile)
return err
}, TIMEOUT, INTERVAL).Should(Succeed())

})
})
When("lb and lbm revision annotation are the same", func() {
BeforeEach(func() {
lb.Annotations = map[string]string{
helper.RevisionAnnotation: "2",
}
})
It("should not create or delete yawolKeepalivedFile", func() {
Eventually(func() error {
_, err := filesystem.Stat(helper.YawolSetIsLatestRevisionFile)
if err == nil || !errors.Is(err, fs.ErrNotExist) {
return errors.New("keepalived file still exists")
}
return nil
}, TIMEOUT, INTERVAL).Should(Succeed())

})
})
When("envoy gets killed and restarted", func() {
It("should set the correct conditions", func() {
By("killing the envoy process")
Expand Down
Loading