Skip to content

Commit

Permalink
Use true leader elector for controller counting
Browse files Browse the repository at this point in the history
Signed-off-by: Jussi Nummelin <jnummelin@mirantis.com>

The previous iteration, while successfull at proving the approach in general, had a drawback of the konnectivity-server process flapping bit too much.
This has the unwelcome side-effect of the agents getting slightly confused to which and how many servers they should connect with. This causes connectivity issues between API and workers as all that comms is done through konnectivity tunnels.

This commit changes couple things:
- standard leader election used for per controller leases. They are more accurate but do use bit more resources.
- Instead of restarting the whole konnectivity component we now only restart the supervisor part (== the k-server process itself)
- per controller lease broken into separate component, to keep things bit apart

Signed-off-by: Jussi Nummelin <jnummelin@mirantis.com>
  • Loading branch information
jnummelin committed Feb 4, 2021
1 parent c6937f5 commit a749381
Show file tree
Hide file tree
Showing 5 changed files with 260 additions and 412 deletions.
8 changes: 8 additions & 0 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ func startServer(token string) error {
LogLevel: logging["kube-apiserver"],
Storage: storageBackend,
})

if clusterConfig.Spec.API.ExternalAddress != "" {
componentManager.Add(&server.ControllerLease{
ClusterConfig: clusterConfig,
KubeClientFactory: adminClientFactory,
})
}

componentManager.Add(&server.Konnectivity{
ClusterConfig: clusterConfig,
LogLevel: logging["konnectivity-server"],
Expand Down
87 changes: 87 additions & 0 deletions pkg/component/server/controllerlease.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package server

import (
"context"
"fmt"
"os"

"github.com/sirupsen/logrus"

config "github.com/k0sproject/k0s/pkg/apis/v1beta1"
kubeutil "github.com/k0sproject/k0s/pkg/kubernetes"
"github.com/k0sproject/k0s/pkg/leaderelection"
)

// ControllerLease implements a component that manages a lease per controller.
// The per-controller leases are used to determine the amount of currently running controllers
type ControllerLease struct {
ClusterConfig *config.ClusterConfig
KubeClientFactory kubeutil.ClientFactory

cancelCtx context.Context
cancelFunc context.CancelFunc
leaseCancel context.CancelFunc
}

// Init initializes the component needs
func (c *ControllerLease) Init() error {
return nil
}

// Run runs the leader elector to keep the lease object up-to-date.
func (c *ControllerLease) Run() error {
c.cancelCtx, c.cancelFunc = context.WithCancel(context.Background())
log := logrus.WithFields(logrus.Fields{"component": "controllerlease"})
client, err := c.KubeClientFactory.GetClient()
if err != nil {
return fmt.Errorf("can't create kubernetes rest client for lease pool: %v", err)
}

// hostname used to make the lease names be clear to which controller they belong to
holderIdentity, err := os.Hostname()
if err != nil {
return nil
}
leaseID := fmt.Sprintf("k0s-ctrl-%s", holderIdentity)

leasePool, err := leaderelection.NewLeasePool(client, leaseID, leaderelection.WithLogger(log))

if err != nil {
return err
}
events, cancel, err := leasePool.Watch()
if err != nil {
return err
}

c.leaseCancel = cancel

go func() {
for {
select {
case <-events.AcquiredLease:
log.Info("acquired leader lease")
case <-events.LostLease:
log.Error("lost leader lease, this should not really happen!?!?!?")
case <-c.cancelCtx.Done():
return
}
}
}()
return nil
}

// Stop stops the component
func (c *ControllerLease) Stop() error {
if c.leaseCancel != nil {
c.leaseCancel()
}

if c.cancelFunc != nil {
c.cancelFunc()
}
return nil
}

// Healthy is a no-op healchcheck
func (c *ControllerLease) Healthy() error { return nil }
Loading

0 comments on commit a749381

Please sign in to comment.