diff --git a/pkg/cmd/apply.go b/pkg/cmd/apply.go index 814202aaf..2e1e83b2f 100644 --- a/pkg/cmd/apply.go +++ b/pkg/cmd/apply.go @@ -172,7 +172,7 @@ func runApply(opts *applyOpts) error { // Probe the cluster for the actual state and the needed tasks. probbing := tasks.WithHostnameOS(nil) - probbing = tasks.WithProbes(probbing) + probbing = tasks.WithProbesAndSafeguard(probbing) if err = probbing.Run(s); err != nil { return err diff --git a/pkg/cmd/migrate.go b/pkg/cmd/migrate.go index a67eea79b..bc2a63aa6 100644 --- a/pkg/cmd/migrate.go +++ b/pkg/cmd/migrate.go @@ -22,6 +22,7 @@ import ( "github.com/spf13/cobra" "github.com/spf13/pflag" + "k8c.io/kubeone/pkg/credentials" "k8c.io/kubeone/pkg/tasks" ) @@ -32,6 +33,7 @@ func migrateCmd(fs *pflag.FlagSet) *cobra.Command { } cmd.AddCommand(migrateToContainerdCmd(fs)) + cmd.AddCommand(migrateToCCMCSICmd(fs)) return cmd } @@ -55,6 +57,30 @@ func migrateToContainerdCmd(fs *pflag.FlagSet) *cobra.Command { } } +func migrateToCCMCSICmd(fs *pflag.FlagSet) *cobra.Command { + return &cobra.Command{ + Use: "to-ccm-csi", + Short: "Migrate live cluster from the in-tree cloud provider to external CCM and CSI plugin", + // TODO(xmudrii): Add which providers are supported in the long description. + Long: heredoc.Doc(` + Following the in-tree cloud provider deprecation http://kep.k8s.io/2395 + this command helps to migrate from the in-tree cloud provider to external CCM and CSI plugin. + This command is currently only available for some providers. We'll extend it for all providers with + in-tree cloud provider implementation in the future. + `), + // TODO: Remove hidden once complete + Hidden: true, + RunE: func(_ *cobra.Command, _ []string) error { + gopts, err := persistentGlobalOptions(fs) + if err != nil { + return errors.Wrap(err, "unable to get global flags") + } + + return runMigrateToCCMCSI(gopts) + }, + } +} + func runMigrateToContainerd(opts *globalOptions) error { s, err := opts.BuildState() if err != nil { @@ -63,3 +89,33 @@ func runMigrateToContainerd(opts *globalOptions) error { return errors.Wrap(tasks.WithContainerDMigration(nil).Run(s), "failed to get cluster status") } + +func runMigrateToCCMCSI(opts *globalOptions) error { + s, err := opts.BuildState() + if err != nil { + return errors.Wrap(err, "failed to initialize State") + } + + // Validate credentials + _, err = credentials.ProviderCredentials(s.Cluster.CloudProvider, opts.CredentialsFile) + if err != nil { + return errors.Wrap(err, "failed to validate credentials") + } + + // Probe the cluster for the actual state and the needed tasks. + probbing := tasks.WithHostnameOS(nil) + probbing = tasks.WithProbes(probbing) + + if err = probbing.Run(s); err != nil { + return err + } + + if !s.LiveCluster.IsProvisioned() { + return errors.New("the target cluster is not provisioned") + } + if !s.LiveCluster.Healthy() { + return errors.New("the target cluster is not healthy, please run 'kubeone apply' first") + } + + return errors.Wrap(tasks.WithCCMCSIMigration(nil).Run(s), "failed to migrate to ccm/csi") +} diff --git a/pkg/state/cluster.go b/pkg/state/cluster.go index 36d36a17d..b0ba28dc7 100644 --- a/pkg/state/cluster.go +++ b/pkg/state/cluster.go @@ -35,6 +35,7 @@ type Cluster struct { StaticWorkers []Host ExpectedVersion *semver.Version EncryptionConfiguration *EncryptionConfiguration + CCMStatus *CCMStatus Lock sync.Mutex } @@ -44,6 +45,11 @@ type EncryptionConfiguration struct { Custom bool } +type CCMStatus struct { + InTreeCloudProviderEnabled bool + ExternalCCMDeployed bool +} + type Host struct { Config *kubeone.HostConfig diff --git a/pkg/state/context.go b/pkg/state/context.go index f6366a8be..040238c8c 100644 --- a/pkg/state/context.go +++ b/pkg/state/context.go @@ -97,9 +97,11 @@ type State struct { ForceUpgrade bool ForceInstall bool UpgradeMachineDeployments bool - CredentialsFilePath string - ManifestFilePath string - PauseImage string + // TODO: Currently unset, will be provided via --complete flag + CCMMigrationComplete bool + CredentialsFilePath string + ManifestFilePath string + PauseImage string } func (s *State) KubeadmVerboseFlag() string { @@ -115,6 +117,13 @@ func (s *State) Clone() *State { return &newState } +func (s *State) ShouldEnableInTreeCloudProvider() bool { + if s.LiveCluster.CCMStatus == nil { + return s.Cluster.CloudProvider.CloudProviderInTree() + } + return s.LiveCluster.CCMStatus.InTreeCloudProviderEnabled && !s.CCMMigrationComplete +} + func (s *State) ShouldDisableEncryption() bool { return (s.Cluster.Features.EncryptionProviders == nil || !s.Cluster.Features.EncryptionProviders.Enable) && diff --git a/pkg/tasks/ccm_csi_migration.go b/pkg/tasks/ccm_csi_migration.go new file mode 100644 index 000000000..b5771d060 --- /dev/null +++ b/pkg/tasks/ccm_csi_migration.go @@ -0,0 +1,38 @@ +/* +Copyright 2021 The KubeOne Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tasks + +import ( + "github.com/pkg/errors" + + "k8c.io/kubeone/pkg/state" +) + +func validateExternalCloudProviderConfig(s *state.State) error { + if s.LiveCluster.CCMStatus != nil && s.LiveCluster.CCMStatus.ExternalCCMDeployed && + !s.LiveCluster.CCMStatus.InTreeCloudProviderEnabled { + return errors.New("the cluster is already running external ccm") + } + if s.Cluster.CloudProvider.Openstack == nil { + return errors.New("ccm/csi migration is currently supported only for openstack") + } + if !s.Cluster.CloudProvider.External { + return errors.New(".cloudProvider.external must be enabled to start the migration") + } + + return nil +} diff --git a/pkg/tasks/probes.go b/pkg/tasks/probes.go index 77a3ff293..a3dce93df 100644 --- a/pkg/tasks/probes.go +++ b/pkg/tasks/probes.go @@ -32,6 +32,7 @@ import ( "k8c.io/kubeone/pkg/state" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" dynclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -81,6 +82,20 @@ func safeguard(s *state.State) error { } } + // Block kubeone apply if .cloudProvider.external is enabled on cluster with + // in-tree cloud provider, but with no external CCM + if s.Cluster.CloudProvider.External && + s.LiveCluster.CCMStatus != nil && + s.LiveCluster.CCMStatus.InTreeCloudProviderEnabled && + !s.LiveCluster.CCMStatus.ExternalCCMDeployed { + return errors.New(".cloudProvider.external enabled, but cluster is using in-tree provider. run ccm/csi migration by running 'kubeone migrate to-ccm-csi'") + } else if !s.Cluster.CloudProvider.External && + s.LiveCluster.CCMStatus != nil && + s.LiveCluster.CCMStatus.ExternalCCMDeployed { + // Block disabling .cloudProvider.external + return errors.New(".cloudProvider.external is disabled, but external ccm is deployed") + } + return nil } @@ -336,10 +351,21 @@ func investigateCluster(s *state.State) error { s.LiveCluster.EncryptionConfiguration = &state.EncryptionConfiguration{Enable: true, Custom: encryptionEnabled.Custom} s.LiveCluster.Lock.Unlock() // no need to lock around FetchEncryptionProvidersFile because it handles locking internally. - if err := fetchEncryptionProvidersFile(s); err != nil { - return errors.Wrap(err, "failed to fetch EncryptionProviders configuration") + if fErr := fetchEncryptionProvidersFile(s); fErr != nil { + return errors.Wrap(fErr, "failed to fetch EncryptionProviders configuration") } } + + ccmStatus, err := detectCCMMigrationStatus(s) + if err != nil { + return errors.Wrap(err, "failed to check is in-tree cloud provider enabled") + } + if ccmStatus != nil { + s.LiveCluster.Lock.Lock() + s.LiveCluster.CCMStatus = ccmStatus + s.LiveCluster.Lock.Unlock() + } + return nil } @@ -512,3 +538,57 @@ func detectEncryptionProvidersEnabled(s *state.State) (ees encryptionEnabledStat } return ees, nil } + +func detectCCMMigrationStatus(s *state.State) (*state.CCMStatus, error) { + if s.DynamicClient == nil { + return nil, errors.New("kubernetes dynamic client is not initialized") + } + + pods := corev1.PodList{} + err := s.DynamicClient.List(s.Context, &pods, &dynclient.ListOptions{ + Namespace: metav1.NamespaceSystem, + LabelSelector: labels.SelectorFromSet(map[string]string{ + "component": "kube-controller-manager", + }), + }) + if err != nil { + return nil, errors.Wrap(err, "unable to list kube-controller-manager pods") + } + + status := &state.CCMStatus{} + for _, pod := range pods.Items { + for _, c := range pod.Spec.Containers[0].Command { + if strings.HasPrefix(c, "--cloud-provider") && !strings.Contains(c, "external") { + status.InTreeCloudProviderEnabled = true + } + } + } + + ccmLabel := "" + ccmLabelValue := "" + switch { + case s.Cluster.CloudProvider.Openstack != nil: + ccmLabel = "k8s-app" + ccmLabelValue = "openstack-cloud-controller-manager" + default: + status.ExternalCCMDeployed = false + return status, nil + } + + // TODO(xmudrii): Consider checking does Deployment exists instead + pods = corev1.PodList{} + err = s.DynamicClient.List(s.Context, &pods, &dynclient.ListOptions{ + Namespace: metav1.NamespaceSystem, + LabelSelector: labels.SelectorFromSet(map[string]string{ + ccmLabel: ccmLabelValue, + }), + }) + if err != nil { + return nil, errors.Wrap(err, "unable to list kube-controller-manager pods") + } + if len(pods.Items) > 0 { + status.ExternalCCMDeployed = true + } + + return status, nil +} diff --git a/pkg/tasks/tasks.go b/pkg/tasks/tasks.go index 6b7209f6b..78b6e4e1e 100644 --- a/pkg/tasks/tasks.go +++ b/pkg/tasks/tasks.go @@ -92,6 +92,12 @@ func WithHostnameOS(t Tasks) Tasks { // WithProbes will run different probes over the defined cluster func WithProbes(t Tasks) Tasks { + return t.append( + Task{Fn: runProbes, ErrMsg: "probes failed"}, + ) +} + +func WithProbesAndSafeguard(t Tasks) Tasks { return t.append( Task{Fn: runProbes, ErrMsg: "probes failed"}, Task{Fn: safeguard, ErrMsg: "probes analysis failed"}, @@ -99,7 +105,7 @@ func WithProbes(t Tasks) Tasks { } func WithHostnameOSAndProbes(t Tasks) Tasks { - return WithProbes(WithHostnameOS(t)) + return WithProbesAndSafeguard(WithHostnameOS(t)) } // WithFullInstall with install binaries (using WithBinariesOnly) and @@ -450,3 +456,27 @@ func WithRotateKey(t Tasks) Tasks { }, }...) } + +func WithCCMCSIMigration(t Tasks) Tasks { + return t.append(Tasks{ + {Fn: validateExternalCloudProviderConfig, ErrMsg: "failed to validate config", Retries: 1}, + }...). + append(kubernetesConfigFiles()...). + append(Tasks{ + {Fn: upgradeLeader, ErrMsg: "failed to upgrade leader control plane"}, + {Fn: upgradeFollower, ErrMsg: "failed to upgrade follower control plane"}, + { + Fn: func(s *state.State) error { + s.Logger.Info("Downloading PKI...") + return s.RunTaskOnLeader(certificate.DownloadKubePKI) + }, + ErrMsg: "failed to download Kubernetes PKI from the leader", + }, + }...). + append(WithResources(nil)...). + append( + Task{Fn: restartKubeAPIServer, ErrMsg: "failed to restart unhealthy kube-apiserver"}, + // TODO: Support CCM/CSI migration for worker nodes + Task{Fn: upgradeStaticWorkers, ErrMsg: "unable to upgrade static worker nodes"}, + ) +} diff --git a/pkg/templates/kubeadm/v1beta1/kubeadm.go b/pkg/templates/kubeadm/v1beta1/kubeadm.go index eccdea5af..a27ca6cf8 100644 --- a/pkg/templates/kubeadm/v1beta1/kubeadm.go +++ b/pkg/templates/kubeadm/v1beta1/kubeadm.go @@ -172,7 +172,7 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er nodeRegistration.KubeletExtraArgs["pod-infra-container-image"] = cluster.AssetConfiguration.Pause.ImageRepository + "/pause:" + cluster.AssetConfiguration.Pause.ImageTag } - if cluster.CloudProvider.CloudProviderInTree() { + if s.ShouldEnableInTreeCloudProvider() { renderedCloudConfig := "/etc/kubernetes/cloud-config" cloudConfigVol := kubeadmv1beta1.HostPathMount{ Name: "cloud-config", @@ -204,9 +204,13 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er } if cluster.CloudProvider.External { - delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider") - delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider") - nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external" + if !s.ShouldEnableInTreeCloudProvider() { + delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider") + delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider") + nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external" + } else { + clusterConfig.ControllerManager.ExtraArgs["controllers"] = "*,bootstrapsigner,tokencleaner,-cloud-node-lifecycle,-route,-service" + } } if cluster.Features.StaticAuditLog != nil && cluster.Features.StaticAuditLog.Enable { diff --git a/pkg/templates/kubeadm/v1beta2/kubeadm.go b/pkg/templates/kubeadm/v1beta2/kubeadm.go index 772332822..19b5730e7 100644 --- a/pkg/templates/kubeadm/v1beta2/kubeadm.go +++ b/pkg/templates/kubeadm/v1beta2/kubeadm.go @@ -179,7 +179,7 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er nodeRegistration.KubeletExtraArgs["pod-infra-container-image"] = cluster.AssetConfiguration.Pause.ImageRepository + "/pause:" + cluster.AssetConfiguration.Pause.ImageTag } - if cluster.CloudProvider.CloudProviderInTree() { + if s.ShouldEnableInTreeCloudProvider() { renderedCloudConfig := "/etc/kubernetes/cloud-config" cloudConfigVol := kubeadmv1beta2.HostPathMount{ Name: "cloud-config", @@ -211,9 +211,13 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er } if cluster.CloudProvider.External { - delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider") - delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider") - nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external" + if !s.ShouldEnableInTreeCloudProvider() { + delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider") + delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider") + nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external" + } else { + clusterConfig.ControllerManager.ExtraArgs["controllers"] = "*,bootstrapsigner,tokencleaner,-cloud-node-lifecycle,-route,-service" + } } if cluster.Features.StaticAuditLog != nil && cluster.Features.StaticAuditLog.Enable { diff --git a/pkg/templates/kubeadm/v1beta3/kubeadm.go b/pkg/templates/kubeadm/v1beta3/kubeadm.go index bc509ec21..8ac53fef6 100644 --- a/pkg/templates/kubeadm/v1beta3/kubeadm.go +++ b/pkg/templates/kubeadm/v1beta3/kubeadm.go @@ -180,7 +180,7 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er nodeRegistration.KubeletExtraArgs["pod-infra-container-image"] = cluster.AssetConfiguration.Pause.ImageRepository + "/pause:" + cluster.AssetConfiguration.Pause.ImageTag } - if cluster.CloudProvider.CloudProviderInTree() { + if s.ShouldEnableInTreeCloudProvider() { renderedCloudConfig := "/etc/kubernetes/cloud-config" cloudConfigVol := kubeadmv1beta3.HostPathMount{ Name: "cloud-config", @@ -212,9 +212,13 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er } if cluster.CloudProvider.External { - delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider") - delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider") - nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external" + if !s.ShouldEnableInTreeCloudProvider() { + delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider") + delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider") + nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external" + } else { + clusterConfig.ControllerManager.ExtraArgs["controllers"] = "*,bootstrapsigner,tokencleaner,-cloud-node-lifecycle,-route,-service" + } } if cluster.Features.StaticAuditLog != nil && cluster.Features.StaticAuditLog.Enable {