Skip to content

Commit

Permalink
Fix agents removing configured supervisor address
Browse files Browse the repository at this point in the history
We shouldn't be replacing the configured server address on agents. Doing
so breaks the agent's ability to fall back to the fixed registration
endpoint when all servers are down, since we replaced it with the first
discovered apiserver address. The fixed registration endpoint will be
restored as default when the service is restarted, but this is not the
correct behavior. This should have only been done on etcd-only nodes
that start up using their local supervisor, but need to switch to a
control-plane node as soon as one is available.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
  • Loading branch information
brandond committed Jul 13, 2024
1 parent e217bd6 commit a219fcb
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pkg/agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func APIServers(ctx context.Context, node *config.Node, proxy proxy.Proxy) []str
return false, err
}
if len(addresses) == 0 {
logrus.Infof("Waiting for apiserver addresses")
logrus.Infof("Waiting for supervisor to provide apiserver addresses")
return false, nil
}
return true, nil
Expand Down
19 changes: 15 additions & 4 deletions pkg/agent/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,20 +530,31 @@ func setupTunnelAndRunAgent(ctx context.Context, nodeConfig *daemonconfig.Node,
}

func waitForAPIServerAddresses(ctx context.Context, nodeConfig *daemonconfig.Node, cfg cmds.Agent, proxy proxy.Proxy) error {
var localSupervisorDefault bool
if addresses := proxy.SupervisorAddresses(); len(addresses) > 0 {
host, _, _ := net.SplitHostPort(addresses[0])
if host == "127.0.0.1" || host == "::1" {
localSupervisorDefault = true
}
}

for {
select {
case <-time.After(5 * time.Second):
logrus.Info("Waiting for apiserver addresses")
logrus.Info("Waiting for control-plane node to register apiserver addresses in etcd")
case addresses := <-cfg.APIAddressCh:
for i, a := range addresses {
host, _, err := net.SplitHostPort(a)
if err == nil {
addresses[i] = net.JoinHostPort(host, strconv.Itoa(nodeConfig.ServerHTTPSPort))
if i == 0 {
proxy.SetSupervisorDefault(addresses[i])
}
}
}
// If this is an etcd-only node that started up using its local supervisor,
// switch to using a control-plane node as the supervisor. Otherwise, leave the
// configured server address as the default.
if localSupervisorDefault && len(addresses) > 0 {
proxy.SetSupervisorDefault(addresses[0])
}
proxy.Update(addresses)
return nil
case <-ctx.Done():
Expand Down
21 changes: 18 additions & 3 deletions pkg/agent/tunnel/tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,18 +124,33 @@ func Setup(ctx context.Context, config *daemonconfig.Node, proxy proxy.Proxy) er
// The loadbalancer is only disabled when there is a local apiserver. Servers without a local
// apiserver load-balance to themselves initially, then switch over to an apiserver node as soon
// as we get some addresses from the code below.
var localSupervisorDefault bool
if addresses := proxy.SupervisorAddresses(); len(addresses) > 0 {
host, _, _ := net.SplitHostPort(addresses[0])
if host == "127.0.0.1" || host == "::1" {
localSupervisorDefault = true
}
}

if proxy.IsSupervisorLBEnabled() && proxy.SupervisorURL() != "" {
logrus.Info("Getting list of apiserver endpoints from server")
// If not running an apiserver locally, try to get a list of apiservers from the server we're
// connecting to. If that fails, fall back to querying the endpoints list from Kubernetes. This
// fallback requires that the server we're joining be running an apiserver, but is the only safe
// thing to do if its supervisor is down-level and can't provide us with an endpoint list.
if addresses := agentconfig.APIServers(ctx, config, proxy); len(addresses) > 0 {
proxy.SetSupervisorDefault(addresses[0])
addresses := agentconfig.APIServers(ctx, config, proxy)
logrus.Infof("Got apiserver addresses from supervisor: %v", addresses)

if len(addresses) > 0 {
if localSupervisorDefault {
proxy.SetSupervisorDefault(addresses[0])
}
proxy.Update(addresses)
} else {
if endpoint, _ := client.CoreV1().Endpoints(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{}); endpoint != nil {
if addresses := util.GetAddresses(endpoint); len(addresses) > 0 {
addresses = util.GetAddresses(endpoint)
logrus.Infof("Got apiserver addresses from kubernetes endpoints: %v", addresses)
if len(addresses) > 0 {
proxy.Update(addresses)
}
}
Expand Down

0 comments on commit a219fcb

Please sign in to comment.