Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix version upgrade kubelet support #16932

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 68 additions & 13 deletions cmd/kops/update_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,14 @@ import (

"github.com/spf13/cobra"
"github.com/spf13/viper"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog/v2"
"k8s.io/kops/cmd/kops/util"
"k8s.io/kops/pkg/apis/kops"
apisutil "k8s.io/kops/pkg/apis/kops/util"
"k8s.io/kops/pkg/assets"
"k8s.io/kops/pkg/commands/commandutils"
"k8s.io/kops/pkg/kubeconfig"
Expand Down Expand Up @@ -67,6 +70,10 @@ type UpdateClusterOptions struct {
SSHPublicKey string
RunTasksOptions fi.RunTasksOptions
AllowKopsDowngrade bool
// Bypasses kubelet vs control plane version skew checks,
// which by default prevent non-control plane instancegroups
// from being updated to a version greater than the control plane
IgnoreKubeletVersionSkew bool
// GetAssets is whether this is invoked from the CmdGetAssets.
GetAssets bool

Expand Down Expand Up @@ -103,6 +110,8 @@ func (o *UpdateClusterOptions) InitDefaults() {
o.Target = "direct"
o.SSHPublicKey = ""
o.OutDir = ""
// By default we enforce the version skew between control plane and worker nodes
o.IgnoreKubeletVersionSkew = false

// By default we export a kubecfg, but it doesn't have a static/eternal credential in it any more.
o.CreateKubecfg = true
Expand Down Expand Up @@ -163,6 +172,7 @@ func NewCmdUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
cmd.RegisterFlagCompletionFunc("lifecycle-overrides", completeLifecycleOverrides)

cmd.Flags().BoolVar(&options.Prune, "prune", options.Prune, "Delete old revisions of cloud resources that were needed during an upgrade")
cmd.Flags().BoolVar(&options.IgnoreKubeletVersionSkew, "ignore-kubelet-version-skew", options.IgnoreKubeletVersionSkew, "Setting this to true will force updating the kubernetes version on all instance groups, regardles of which control plane version is running")

return cmd
}
Expand Down Expand Up @@ -318,20 +328,30 @@ func RunUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer, c *Up
return nil, err
}

minControlPlaneRunningVersion := cluster.Spec.KubernetesVersion
if !c.IgnoreKubeletVersionSkew {
minControlPlaneRunningVersion, err = checkControlPlaneRunningVersion(ctx, cluster.ObjectMeta.Name, minControlPlaneRunningVersion)
if err != nil {
klog.Warningf("error checking control plane running version, assuming no k8s upgrade in progress: %v", err)
} else {
klog.V(2).Infof("successfully checked control plane running version: %v", minControlPlaneRunningVersion)
}
}
applyCmd := &cloudup.ApplyClusterCmd{
Cloud: cloud,
Clientset: clientset,
Cluster: cluster,
DryRun: isDryrun,
AllowKopsDowngrade: c.AllowKopsDowngrade,
RunTasksOptions: &c.RunTasksOptions,
OutDir: c.OutDir,
InstanceGroupFilter: predicates.AllOf(instanceGroupFilters...),
Phase: phase,
TargetName: targetName,
LifecycleOverrides: lifecycleOverrideMap,
GetAssets: c.GetAssets,
DeletionProcessing: deletionProcessing,
Cloud: cloud,
Clientset: clientset,
Cluster: cluster,
DryRun: isDryrun,
AllowKopsDowngrade: c.AllowKopsDowngrade,
RunTasksOptions: &c.RunTasksOptions,
OutDir: c.OutDir,
InstanceGroupFilter: predicates.AllOf(instanceGroupFilters...),
Phase: phase,
TargetName: targetName,
LifecycleOverrides: lifecycleOverrideMap,
GetAssets: c.GetAssets,
DeletionProcessing: deletionProcessing,
ControlPlaneRunningVersion: minControlPlaneRunningVersion,
}

applyResults, err := applyCmd.Run(ctx)
Expand Down Expand Up @@ -575,3 +595,38 @@ func matchInstanceGroupRoles(roles []string) predicates.Predicate[*kops.Instance
return false
}
}

// checkControlPlaneRunningVersion returns the minimum control plane running version
func checkControlPlaneRunningVersion(ctx context.Context, clusterName string, version string) (string, error) {
configLoadingRules := clientcmd.NewDefaultClientConfigLoadingRules()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think we should try to share this "get kubeconfig" code with the rolling-update logic (I think that also connects to the cluster). Not a blocker though.

config, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
configLoadingRules,
&clientcmd.ConfigOverrides{CurrentContext: clusterName}).ClientConfig()
if err != nil {
return version, fmt.Errorf("cannot load kubecfg settings for %q: %v", clusterName, err)
}

k8sClient, err := kubernetes.NewForConfig(config)
if err != nil {
return version, fmt.Errorf("cannot build kubernetes api client for %q: %v", clusterName, err)
}

parsedVersion, err := apisutil.ParseKubernetesVersion(version)
if err != nil {
return version, fmt.Errorf("cannot parse kubernetes version %q: %v", clusterName, err)
}
nodeList, err := k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{
LabelSelector: "node-role.kubernetes.io/control-plane",
})
if err != nil {
return version, fmt.Errorf("cannot list nodes in cluster %q: %v", clusterName, err)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah - this is tricky. If the control plane is offline, we presumably don't want to update the nodes (?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the control plane is offline, this is the least of our worries lol. But it might be an expected situation (e.g. creating a new cluster) so we just log the error and continue as normal

}
for _, node := range nodeList.Items {
if apisutil.IsKubernetesGTE(node.Status.NodeInfo.KubeletVersion, *parsedVersion) {
version = node.Status.NodeInfo.KubeletVersion
parsedVersion, _ = apisutil.ParseKubernetesVersion(version)
}

}
return strings.TrimPrefix(version, "v"), nil
}
1 change: 1 addition & 0 deletions docs/cli/kops_update_cluster.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 18 additions & 6 deletions pkg/apis/kops/model/instance_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,20 @@ type InstanceGroup interface {
// RawClusterSpec returns the cluster spec for the instance group.
// If possible, prefer abstracted methods over accessing this data directly.
RawClusterSpec() *kops.ClusterSpec

// ForceKubernetesVersion overrides the Kubernetes version for this instance group.
// (The default is to use the cluster-wide Kubernetes version, but this allows
// us to override it for the nodes to respect the node skew policy.)
ForceKubernetesVersion(version string) error
}

// ForInstanceGroup creates an InstanceGroup model for the given cluster and instance group.
func ForInstanceGroup(cluster *kops.Cluster, ig *kops.InstanceGroup) (InstanceGroup, error) {
kubernetesVersionString := cluster.Spec.KubernetesVersion
kubernetesVersion, err := ParseKubernetesVersion(kubernetesVersionString)
if err != nil {
return nil, fmt.Errorf("error parsing Kubernetes version %q: %v", kubernetesVersionString, err)
m := &instanceGroupModel{cluster: cluster, ig: ig}
if err := m.ForceKubernetesVersion(cluster.Spec.KubernetesVersion); err != nil {
return nil, err
}

return &instanceGroupModel{cluster: cluster, ig: ig, kubernetesVersion: kubernetesVersion}, nil
return m, nil
}

// instanceGroupModel is a concrete implementation of InstanceGroup.
Expand All @@ -67,3 +70,12 @@ func (m *instanceGroupModel) GetCloudProvider() kops.CloudProviderID {
func (m *instanceGroupModel) RawClusterSpec() *kops.ClusterSpec {
return &m.cluster.Spec
}

func (m *instanceGroupModel) ForceKubernetesVersion(kubernetesVersionString string) error {
kubernetesVersion, err := ParseKubernetesVersion(kubernetesVersionString)
if err != nil {
return fmt.Errorf("error parsing Kubernetes version %q: %v", kubernetesVersionString, err)
}
m.kubernetesVersion = kubernetesVersion
return nil
}
5 changes: 5 additions & 0 deletions pkg/assets/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ type AssetBuilder struct {
// KubernetesVersion is the version of kubernetes we are installing
KubernetesVersion semver.Version

// KubeletSupportedVersion is the max version of kubelet that we are currently allowed to run on worker nodes.
// This is used to avoid violating the kubelet supported version skew policy,
// (we are not allowed to run a newer kubelet on a worker node than the control plane)
KubeletSupportedVersion string

// StaticManifests records manifests used by nodeup:
// * e.g. sidecar manifests for static pods run by kubelet
StaticManifests []*StaticManifest
Expand Down
6 changes: 6 additions & 0 deletions pkg/nodemodel/nodeupconfigbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ func (n *nodeUpConfigBuilder) BuildConfig(ig *kops.InstanceGroup, wellKnownAddre
return nil, nil, fmt.Errorf("building instance group model: %w", err)
}

if !hasAPIServer && n.assetBuilder.KubeletSupportedVersion != "" {
if err := igModel.ForceKubernetesVersion(n.assetBuilder.KubeletSupportedVersion); err != nil {
return nil, nil, err
}
}

kubernetesAssets, err := BuildKubernetesFileAssets(igModel, n.assetBuilder)
if err != nil {
return nil, nil, err
Expand Down
6 changes: 6 additions & 0 deletions upup/pkg/fi/cloudup/apply_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ type ApplyClusterCmd struct {

// InstanceGroupFilter is a predicate that restricts which instance groups we will update.
InstanceGroupFilter predicates.Predicate[*kops.InstanceGroup]

// The current oldest version of control plane nodes, defaults to version defined in cluster spec if IgnoreVersionSkew was set
ControlPlaneRunningVersion string
}

// ApplyResults holds information about an ApplyClusterCmd operation.
Expand Down Expand Up @@ -239,6 +242,9 @@ func (c *ApplyClusterCmd) Run(ctx context.Context) (*ApplyResults, error) {
}

assetBuilder := assets.NewAssetBuilder(c.Clientset.VFSContext(), c.Cluster.Spec.Assets, c.Cluster.Spec.KubernetesVersion, c.GetAssets)
if len(c.ControlPlaneRunningVersion) > 0 && c.ControlPlaneRunningVersion != c.Cluster.Spec.KubernetesVersion {
assetBuilder.KubeletSupportedVersion = c.ControlPlaneRunningVersion
}
err = c.upgradeSpecs(ctx, assetBuilder)
if err != nil {
return nil, err
Expand Down
Loading