Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1864352: Add leader election mechanism to release 4.5 #664

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions cmd/machine-healthcheck/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"flag"
"runtime"
"time"

"github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck"

Expand All @@ -16,14 +17,44 @@ import (
"sigs.k8s.io/controller-runtime/pkg/runtime/signals"
)

// The default durations for the leader electrion operations.
var (
leaseDuration = 120 * time.Second
renewDealine = 110 * time.Second
retryPeriod = 90 * time.Second
)

func printVersion() {
glog.Infof("Go Version: %s", runtime.Version())
glog.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH)
glog.Infof("operator-sdk Version: %v", sdkVersion.Version)
}

func main() {
watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.")
watchNamespace := flag.String(
"namespace",
"",
"Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.",
)

leaderElectResourceNamespace := flag.String(
"leader-elect-resource-namespace",
"",
"The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.",
)

leaderElect := flag.Bool(
"leader-elect",
false,
"Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.",
)

leaderElectLeaseDuration := flag.Duration(
"leader-elect-lease-duration",
leaseDuration,
"The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.",
)

flag.Parse()
printVersion()

Expand All @@ -35,8 +66,16 @@ func main() {

opts := manager.Options{
// Disable metrics serving
MetricsBindAddress: "0",
MetricsBindAddress: "0",
LeaderElection: *leaderElect,
LeaderElectionNamespace: *leaderElectResourceNamespace,
LeaderElectionID: "cluster-api-provider-healthcheck-leader",
LeaseDuration: leaderElectLeaseDuration,
// Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400
RetryPeriod: &retryPeriod,
RenewDeadline: &renewDealine,
}

if *watchNamespace != "" {
opts.Namespace = *watchNamespace
glog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace)
Expand Down
38 changes: 35 additions & 3 deletions cmd/machineset/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,37 @@ import (
"sigs.k8s.io/controller-runtime/pkg/runtime/signals"
)

// The default durations for the leader electrion operations.
var (
leaseDuration = 120 * time.Second
renewDealine = 110 * time.Second
retryPeriod = 90 * time.Second
)

func main() {
flag.Set("logtostderr", "true")
klog.InitFlags(nil)
watchNamespace := flag.String("namespace", "",
"Namespace that the controller watches to reconcile cluster-api objects. If unspecified, the controller watches for cluster-api objects across all namespaces.")

leaderElectResourceNamespace := flag.String(
"leader-elect-resource-namespace",
"",
"The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.",
)

leaderElect := flag.Bool(
"leader-elect",
false,
"Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.",
)

leaderElectLeaseDuration := flag.Duration(
"leader-elect-lease-duration",
leaseDuration,
"The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.",
)

flag.Parse()
if *watchNamespace != "" {
log.Printf("Watching cluster-api objects only in namespace %q for reconciliation.", *watchNamespace)
Expand All @@ -52,9 +77,16 @@ func main() {
syncPeriod := 10 * time.Minute
opts := manager.Options{
// Disable metrics serving
MetricsBindAddress: "0",
SyncPeriod: &syncPeriod,
Namespace: *watchNamespace,
MetricsBindAddress: "0",
SyncPeriod: &syncPeriod,
Namespace: *watchNamespace,
LeaderElection: *leaderElect,
LeaderElectionNamespace: *leaderElectResourceNamespace,
LeaderElectionID: "cluster-api-provider-machineset-leader",
LeaseDuration: leaderElectLeaseDuration,
// Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400
RetryPeriod: &retryPeriod,
RenewDeadline: &renewDealine,
}
mgr, err := manager.New(cfg, opts)
if err != nil {
Expand Down
42 changes: 40 additions & 2 deletions cmd/nodelink-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"flag"
"runtime"
"time"

mapiv1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1"
"github.com/openshift/machine-api-operator/pkg/controller"
Expand All @@ -14,6 +15,13 @@ import (
"sigs.k8s.io/controller-runtime/pkg/runtime/signals"
)

// The default durations for the leader electrion operations.
var (
leaseDuration = 120 * time.Second
renewDealine = 110 * time.Second
retryPeriod = 90 * time.Second
)

func printVersion() {
klog.Infof("Go Version: %s", runtime.Version())
klog.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH)
Expand All @@ -23,7 +31,30 @@ func printVersion() {
func main() {
printVersion()

watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.")
watchNamespace := flag.String(
"namespace",
"",
"Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.",
)

leaderElectResourceNamespace := flag.String(
"leader-elect-resource-namespace",
"",
"The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.",
)

leaderElect := flag.Bool(
"leader-elect",
false,
"Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.",
)

leaderElectLeaseDuration := flag.Duration(
"leader-elect-lease-duration",
leaseDuration,
"The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.",
)

klog.InitFlags(nil)
flag.Set("logtostderr", "true")
flag.Parse()
Expand All @@ -36,7 +67,14 @@ func main() {

opts := manager.Options{
// Disable metrics serving
MetricsBindAddress: "0",
MetricsBindAddress: "0",
LeaderElection: *leaderElect,
LeaderElectionNamespace: *leaderElectResourceNamespace,
LeaderElectionID: "cluster-api-provider-nodelink-leader",
LeaseDuration: leaderElectLeaseDuration,
// Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400
RetryPeriod: &retryPeriod,
RenewDeadline: &renewDealine,
}
if *watchNamespace != "" {
opts.Namespace = *watchNamespace
Expand Down
43 changes: 41 additions & 2 deletions cmd/vsphere/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"flag"
"fmt"
"os"
"time"

configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1"
Expand All @@ -17,12 +18,42 @@ import (
"sigs.k8s.io/controller-runtime/pkg/runtime/signals"
)

// The default durations for the leader electrion operations.
var (
leaseDuration = 120 * time.Second
renewDealine = 110 * time.Second
retryPeriod = 90 * time.Second
)

func main() {
var printVersion bool
flag.BoolVar(&printVersion, "version", false, "print version and exit")

klog.InitFlags(nil)
watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.")
watchNamespace := flag.String(
"namespace",
"",
"Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.",
)

leaderElectResourceNamespace := flag.String(
"leader-elect-resource-namespace",
"",
"The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.",
)

leaderElect := flag.Bool(
"leader-elect",
false,
"Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.",
)

leaderElectLeaseDuration := flag.Duration(
"leader-elect-lease-duration",
leaseDuration,
"The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.",
)

flag.Set("logtostderr", "true")
flag.Parse()

Expand All @@ -35,8 +66,16 @@ func main() {

opts := manager.Options{
// Disable metrics serving
MetricsBindAddress: "0",
MetricsBindAddress: "0",
LeaderElection: *leaderElect,
LeaderElectionNamespace: *leaderElectResourceNamespace,
LeaderElectionID: "cluster-api-provider-vsphere-leader",
LeaseDuration: leaderElectLeaseDuration,
// Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400
RetryPeriod: &retryPeriod,
RenewDeadline: &renewDealine,
}

if *watchNamespace != "" {
opts.Namespace = *watchNamespace
klog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace)
Expand Down
2 changes: 2 additions & 0 deletions pkg/operator/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ func newContainers(config *OperatorConfig, features map[string]bool) []corev1.Co
args := []string{
"--logtostderr=true",
"--v=3",
"--leader-elect=true",
"--leader-elect-lease-duration=120s",
fmt.Sprintf("--namespace=%s", config.TargetNamespace),
}

Expand Down