Skip to content

Commit

Permalink
Add jitter to client config retry
Browse files Browse the repository at this point in the history
Also:
* Replaces labeled for/continue RETRY loops with wait helpers for improved readability
* Pulls secrets and nodes from cache for node password verification
* Migrate nodepassword tests to wrangler mocks for better code reuse

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
  • Loading branch information
brandond committed Nov 15, 2023
1 parent 78ea593 commit cb0198e
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 150 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ require (
github.com/go-bindata/go-bindata v3.1.2+incompatible
github.com/go-sql-driver/mysql v1.7.1
github.com/go-test/deep v1.0.7
github.com/golang/mock v1.6.0
github.com/google/cadvisor v0.47.3
github.com/google/uuid v1.3.0
github.com/gorilla/mux v1.8.0
Expand Down Expand Up @@ -257,7 +258,6 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/mock v1.6.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/btree v1.1.2 // indirect
github.com/google/cel-go v0.16.1 // indirect
Expand Down
67 changes: 36 additions & 31 deletions pkg/agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/rancher/wrangler/pkg/slice"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/json"
"k8s.io/apimachinery/pkg/util/wait"
utilsnet "k8s.io/utils/net"
)

Expand All @@ -42,65 +43,69 @@ const (

// Get returns a pointer to a completed Node configuration struct,
// containing a merging of the local CLI configuration with settings from the server.
// Node configuration includes client certificates, which requires node password verification,
// so this is somewhat computationally expensive on the server side, and is retried with jitter
// to avoid having clients hammer on the server at fixed periods.
// A call to this will bock until agent configuration is successfully returned by the
// server.
func Get(ctx context.Context, agent cmds.Agent, proxy proxy.Proxy) *config.Node {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
RETRY:
for {
agentConfig, err := get(ctx, &agent, proxy)
var agentConfig *config.Node
var err error

// This would be more clear as wait.PollImmediateUntilWithContext, but that function
// does not support jittering, so we instead use wait.JitterUntilWithContext, and cancel
// the context on success.
ctx, cancel := context.WithCancel(ctx)
wait.JitterUntilWithContext(ctx, func(ctx context.Context) {
agentConfig, err = get(ctx, &agent, proxy)
if err != nil {
logrus.Infof("Waiting to retrieve agent configuration; server is not ready: %v", err)
for range ticker.C {
continue RETRY
}
} else {
cancel()
}
return agentConfig
}
}, 5*time.Second, 1.0, true)
return agentConfig
}

// KubeProxyDisabled returns a bool indicating whether or not kube-proxy has been disabled in the
// server configuration. The server may not have a complete view of cluster configuration until
// after all startup hooks have completed, so a call to this will block until after the server's
// readyz endpoint returns OK.
func KubeProxyDisabled(ctx context.Context, node *config.Node, proxy proxy.Proxy) bool {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
RETRY:
for {
disabled, err := getKubeProxyDisabled(ctx, node, proxy)
var disabled bool
var err error

wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) {
disabled, err = getKubeProxyDisabled(ctx, node, proxy)
if err != nil {
logrus.Infof("Waiting to retrieve kube-proxy configuration; server is not ready: %v", err)
for range ticker.C {
continue RETRY
}
return false, nil
}
return disabled
}
return true, nil
})
return disabled
}

// APIServers returns a list of apiserver endpoints, suitable for seeding client loadbalancer configurations.
// This function will block until it can return a populated list of apiservers, or if the remote server returns
// an error (indicating that it does not support this functionality).
func APIServers(ctx context.Context, node *config.Node, proxy proxy.Proxy) []string {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
RETRY:
for {
addresses, err := getAPIServers(ctx, node, proxy)
var addresses []string
var err error

wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) {
addresses, err = getAPIServers(ctx, node, proxy)
if err != nil {
logrus.Infof("Failed to retrieve list of apiservers from server: %v", err)
return nil
return false, err
}
if len(addresses) == 0 {
logrus.Infof("Waiting for apiserver addresses")
for range ticker.C {
continue RETRY
}
return false, nil
}
return addresses
}
return true, nil
})
return addresses
}

type HTTPRequester func(u string, client *http.Client, username, password, token string) ([]byte, error)
Expand Down
24 changes: 11 additions & 13 deletions pkg/node/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@ import (

func Register(ctx context.Context,
modCoreDNS bool,
secretClient coreclient.SecretClient,
configMap coreclient.ConfigMapController,
secrets coreclient.SecretController,
configMaps coreclient.ConfigMapController,
nodes coreclient.NodeController,
) error {
h := &handler{
modCoreDNS: modCoreDNS,
secretClient: secretClient,
configCache: configMap.Cache(),
configClient: configMap,
modCoreDNS: modCoreDNS,
secrets: secrets,
configMaps: configMaps,
}
nodes.OnChange(ctx, "node", h.onChange)
nodes.OnRemove(ctx, "node", h.onRemove)
Expand All @@ -30,10 +29,9 @@ func Register(ctx context.Context,
}

type handler struct {
modCoreDNS bool
secretClient coreclient.SecretClient
configCache coreclient.ConfigMapCache
configClient coreclient.ConfigMapClient
modCoreDNS bool
secrets coreclient.SecretController
configMaps coreclient.ConfigMapController
}

func (h *handler) onChange(key string, node *core.Node) (*core.Node, error) {
Expand Down Expand Up @@ -78,7 +76,7 @@ func (h *handler) updateCoreDNSConfigMap(nodeName, nodeAddress string, removed b
return nil
}

configMapCache, err := h.configCache.Get("kube-system", "coredns")
configMapCache, err := h.configMaps.Cache().Get("kube-system", "coredns")
if err != nil || configMapCache == nil {
logrus.Warn(errors.Wrap(err, "Unable to fetch coredns config map"))
return nil
Expand Down Expand Up @@ -120,7 +118,7 @@ func (h *handler) updateCoreDNSConfigMap(nodeName, nodeAddress string, removed b
}
configMap.Data["NodeHosts"] = newHosts

if _, err := h.configClient.Update(configMap); err != nil {
if _, err := h.configMaps.Update(configMap); err != nil {
return err
}

Expand All @@ -135,5 +133,5 @@ func (h *handler) updateCoreDNSConfigMap(nodeName, nodeAddress string, removed b
}

func (h *handler) removeNodePassword(nodeName string) error {
return nodepassword.Delete(h.secretClient, nodeName)
return nodepassword.Delete(h.secrets, nodeName)
}
18 changes: 8 additions & 10 deletions pkg/nodepassword/nodepassword.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ func getSecretName(nodeName string) string {
return strings.ToLower(nodeName + ".node-password." + version.Program)
}

func verifyHash(secretClient coreclient.SecretClient, nodeName, pass string) error {
func verifyHash(secretClient coreclient.SecretController, nodeName, pass string) error {
name := getSecretName(nodeName)
secret, err := secretClient.Get(metav1.NamespaceSystem, name, metav1.GetOptions{})
secret, err := secretClient.Cache().Get(metav1.NamespaceSystem, name)
if err != nil {
return &passwordError{node: nodeName, err: err}
}
Expand All @@ -67,7 +67,7 @@ func verifyHash(secretClient coreclient.SecretClient, nodeName, pass string) err
}

// Ensure will verify a node-password secret if it exists, otherwise it will create one
func Ensure(secretClient coreclient.SecretClient, nodeName, pass string) error {
func Ensure(secretClient coreclient.SecretController, nodeName, pass string) error {
err := verifyHash(secretClient, nodeName, pass)
if apierrors.IsNotFound(err) {
var hash string
Expand All @@ -88,12 +88,12 @@ func Ensure(secretClient coreclient.SecretClient, nodeName, pass string) error {
}

// Delete will remove a node-password secret
func Delete(secretClient coreclient.SecretClient, nodeName string) error {
func Delete(secretClient coreclient.SecretController, nodeName string) error {
return secretClient.Delete(metav1.NamespaceSystem, getSecretName(nodeName), &metav1.DeleteOptions{})
}

// MigrateFile moves password file entries to secrets
func MigrateFile(secretClient coreclient.SecretClient, nodeClient coreclient.NodeClient, passwordFile string) error {
func MigrateFile(secretClient coreclient.SecretController, nodeClient coreclient.NodeController, passwordFile string) error {
_, err := os.Stat(passwordFile)
if os.IsNotExist(err) {
return nil
Expand All @@ -108,11 +108,9 @@ func MigrateFile(secretClient coreclient.SecretClient, nodeClient coreclient.Nod
}

nodeNames := []string{}
nodeList, _ := nodeClient.List(metav1.ListOptions{})
if nodeList != nil {
for _, node := range nodeList.Items {
nodeNames = append(nodeNames, node.Name)
}
nodeList, _ := nodeClient.Cache().List(nil)
for _, node := range nodeList {
nodeNames = append(nodeNames, node.Name)
}
if len(nodeNames) == 0 {
nodeNames = append(nodeNames, passwd.Users()...)
Expand Down
Loading

0 comments on commit cb0198e

Please sign in to comment.