Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release/v1.4] Force regenerating CSRs after CCM is deployed #2204

Merged
merged 1 commit into from
Jul 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions pkg/scripts/ccm_csi_migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ var (
sudo kubeadm {{ .VERBOSE }} init phase kubelet-start \
--config={{ .WORK_DIR }}/cfg/master_{{ .NODE_ID }}.yaml
`)

ccmMigrationRestartKubelet = heredoc.Doc(`
sudo systemctl restart kubelet
`)
)

func CCMMigrationRegenerateControlPlaneManifests(workdir string, nodeID int, verboseFlag string) (string, error) {
Expand All @@ -52,7 +48,3 @@ func CCMMigrationUpdateKubeletConfig(workdir string, nodeID int, verboseFlag str
"VERBOSE": verboseFlag,
})
}

func CCMMigrationRestartKubelet() (string, error) {
return Render(ccmMigrationRestartKubelet, Data{})
}
8 changes: 8 additions & 0 deletions pkg/scripts/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ var (
fi
{{ end }}
`)

restartKubeletTemplate = heredoc.Doc(`
sudo systemctl restart kubelet
`)
)

func Hostname() string {
Expand All @@ -58,3 +62,7 @@ func RestartKubeAPIServerCrictl(ensure bool) (string, error) {
"ENSURE": ensure,
})
}

func RestartKubelet() string {
return restartKubeletTemplate
}
7 changes: 1 addition & 6 deletions pkg/tasks/ccm_csi_migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,7 @@ func ccmMigrationUpdateStaticWorkersKubeletConfigInternal(s *state.State, node *

// Restart Kubelet
logger.Info("Restarting Kubelet...")
script, err := scripts.CCMMigrationRestartKubelet()
if err != nil {
return err
}

_, _, err = s.Runner.RunRaw(script)
_, _, err := s.Runner.RunRaw(scripts.RestartKubelet())
if err != nil {
return err
}
Expand Down
26 changes: 26 additions & 0 deletions pkg/tasks/certs.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,32 @@ func saveCABundleOnControlPlane(s *state.State, _ *kubeoneapi.HostConfig, conn s
return err
}

func restartKubelet(s *state.State, node *kubeoneapi.HostConfig, conn ssh.Connection) error {
s.Logger.WithField("node", node.PublicAddress).Debug("Restarting Kubelet to force regenerating CSRs...")

_, _, err := s.Runner.RunRaw(scripts.RestartKubelet())

return err
}

func restartKubeletOnControlPlane(s *state.State) error {
s.Logger.Infof("Restarting Kubelet on control plane nodes to force Kubelet to generate correct CSRs...")

// Restart Kubelet on all control plane nodes to force CSRs to be regenerated
if err := s.RunTaskOnControlPlane(restartKubelet, state.RunParallel); err != nil {
return err
}

// Wait 40 seconds to give Kubelet time to come up and generate correct CSRs.
// NB: We'll wait 20 seconds on the next step, so that's one minute in total
// which should be enough.
sleepTime := 40 * time.Second
s.Logger.Infof("Waiting %s to give Kubelet time to regenerate CSRs...", sleepTime)
time.Sleep(sleepTime)

return nil
}

func approvePendingCSR(s *state.State, node *kubeoneapi.HostConfig, conn ssh.Connection) error {
var csrFound bool
sleepTime := 20 * time.Second
Expand Down
25 changes: 25 additions & 0 deletions pkg/tasks/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,31 @@ func WithFullInstall(t Tasks) Tasks {
{Fn: restartKubeAPIServer, ErrMsg: "failed to restart unhealthy kube-apiserver"},
}...).
append(WithResources(nil)...).
append(
Task{
// Node might emit one more CSR for kubelet serving certificates
// after external CCM initializes the node. That's because
// CCM modifies IP addresses in the Node object to properly set
// private and public addresses, DNS names, etc...
// To ensure that we approve those CSRs, we need to force kubelet
// to generate new CSRs as soon as possible, and then approve
// those new CSRs.
// NB: We intentionally do this only on FullInstall because in
// other cases we already have CCM deployed, so this is not
// an issue. Additionally, we do this only for control plane
// nodes because static workers are joined after the CCM is
// deployed.
Fn: func(s *state.State) error {
if err := restartKubeletOnControlPlane(s); err != nil {
return err
}

return s.RunTaskOnAllNodes(approvePendingCSR, true)
},
ErrMsg: "failed to force regenerate kubelet CSRs",
Predicate: func(s *state.State) bool { return s.Cluster.CloudProvider.External },
},
).
append(
Task{
Fn: createMachineDeployments,
Expand Down