From f72904e0617a5de15286dc2ae2b07d79a4c4bffe Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Mon, 13 Jul 2020 11:20:58 +0200 Subject: [PATCH 1/2] Implement leader election for manager --- cmd/manager/main.go | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index bb7fe2bf..254684b0 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -20,6 +20,7 @@ import ( "flag" "fmt" "os" + "time" "k8s.io/client-go/kubernetes" "k8s.io/klog" @@ -40,8 +41,36 @@ import ( func main() { klog.InitFlags(nil) - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") - metricsAddr := flag.String("metrics-addr", ":8080", "The address the metric endpoint binds to.") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + metricsAddr := flag.String( + "metrics-addr", + ":8080", + "The address the metric endpoint binds to.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + 15*time.Second, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + flag.Parse() log := logf.Log.WithName("ovirt-controller-manager") @@ -55,7 +84,11 @@ func main() { // Setup a Manager opts := manager.Options{ - MetricsBindAddress: *metricsAddr, + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-ovirt-leader", + LeaseDuration: leaderElectLeaseDuration, + MetricsBindAddress: *metricsAddr, } if *watchNamespace != "" { opts.Namespace = *watchNamespace From a2b7cfb27b7c28fb8f9bc8beb71957a1a60436ee Mon Sep 17 00:00:00 2001 From: Michael McCune Date: Tue, 25 Aug 2020 10:38:26 -0400 Subject: [PATCH 2/2] Slow the default lease retry and renew rate for machine controller Prevent machine controllers from writing in etcd at idle too often by setting 120s lease, 20s retry and 110s deadline on all renewals. Higher values cause tests to flake. --- cmd/manager/main.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 254684b0..9addabc3 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -38,6 +38,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/runtime/signals" ) +// The default durations for the leader election operations. +var ( + leaseDuration = 120 * time.Second + renewDeadline = 110 * time.Second + retryPeriod = 20 * time.Second +) + func main() { klog.InitFlags(nil) @@ -67,7 +74,7 @@ func main() { leaderElectLeaseDuration := flag.Duration( "leader-elect-lease-duration", - 15*time.Second, + leaseDuration, "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", ) @@ -89,6 +96,9 @@ func main() { LeaderElectionID: "cluster-api-provider-ovirt-leader", LeaseDuration: leaderElectLeaseDuration, MetricsBindAddress: *metricsAddr, + // Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400 + RetryPeriod: &retryPeriod, + RenewDeadline: &renewDeadline, } if *watchNamespace != "" { opts.Namespace = *watchNamespace