Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial CCM migration implementation for OpenStack #1468

Merged
merged 2 commits into from
Aug 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/cmd/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func runApply(opts *applyOpts) error {

// Probe the cluster for the actual state and the needed tasks.
probbing := tasks.WithHostnameOS(nil)
probbing = tasks.WithProbes(probbing)
probbing = tasks.WithProbesAndSafeguard(probbing)

if err = probbing.Run(s); err != nil {
return err
Expand Down
56 changes: 56 additions & 0 deletions pkg/cmd/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/pflag"

"k8c.io/kubeone/pkg/credentials"
"k8c.io/kubeone/pkg/tasks"
)

Expand All @@ -32,6 +33,7 @@ func migrateCmd(fs *pflag.FlagSet) *cobra.Command {
}

cmd.AddCommand(migrateToContainerdCmd(fs))
cmd.AddCommand(migrateToCCMCSICmd(fs))
return cmd
}

Expand All @@ -55,6 +57,30 @@ func migrateToContainerdCmd(fs *pflag.FlagSet) *cobra.Command {
}
}

func migrateToCCMCSICmd(fs *pflag.FlagSet) *cobra.Command {
return &cobra.Command{
Use: "to-ccm-csi",
Short: "Migrate live cluster from the in-tree cloud provider to external CCM and CSI plugin",
// TODO(xmudrii): Add which providers are supported in the long description.
Long: heredoc.Doc(`
Following the in-tree cloud provider deprecation http://kep.k8s.io/2395
this command helps to migrate from the in-tree cloud provider to external CCM and CSI plugin.
This command is currently only available for some providers. We'll extend it for all providers with
in-tree cloud provider implementation in the future.
`),
// TODO: Remove hidden once complete
Hidden: true,
RunE: func(_ *cobra.Command, _ []string) error {
gopts, err := persistentGlobalOptions(fs)
if err != nil {
return errors.Wrap(err, "unable to get global flags")
}

return runMigrateToCCMCSI(gopts)
},
}
}

func runMigrateToContainerd(opts *globalOptions) error {
s, err := opts.BuildState()
if err != nil {
Expand All @@ -63,3 +89,33 @@ func runMigrateToContainerd(opts *globalOptions) error {

return errors.Wrap(tasks.WithContainerDMigration(nil).Run(s), "failed to get cluster status")
}

func runMigrateToCCMCSI(opts *globalOptions) error {
s, err := opts.BuildState()
if err != nil {
return errors.Wrap(err, "failed to initialize State")
}

// Validate credentials
_, err = credentials.ProviderCredentials(s.Cluster.CloudProvider, opts.CredentialsFile)
if err != nil {
return errors.Wrap(err, "failed to validate credentials")
}

// Probe the cluster for the actual state and the needed tasks.
probbing := tasks.WithHostnameOS(nil)
probbing = tasks.WithProbes(probbing)

if err = probbing.Run(s); err != nil {
return err
}

if !s.LiveCluster.IsProvisioned() {
return errors.New("the target cluster is not provisioned")
}
if !s.LiveCluster.Healthy() {
return errors.New("the target cluster is not healthy, please run 'kubeone apply' first")
}

return errors.Wrap(tasks.WithCCMCSIMigration(nil).Run(s), "failed to migrate to ccm/csi")
}
6 changes: 6 additions & 0 deletions pkg/state/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Cluster struct {
StaticWorkers []Host
ExpectedVersion *semver.Version
EncryptionConfiguration *EncryptionConfiguration
CCMStatus *CCMStatus
Lock sync.Mutex
}

Expand All @@ -44,6 +45,11 @@ type EncryptionConfiguration struct {
Custom bool
}

type CCMStatus struct {
InTreeCloudProviderEnabled bool
ExternalCCMDeployed bool
}

type Host struct {
Config *kubeone.HostConfig

Expand Down
15 changes: 12 additions & 3 deletions pkg/state/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,11 @@ type State struct {
ForceUpgrade bool
ForceInstall bool
UpgradeMachineDeployments bool
CredentialsFilePath string
ManifestFilePath string
PauseImage string
// TODO: Currently unset, will be provided via --complete flag
CCMMigrationComplete bool
CredentialsFilePath string
ManifestFilePath string
PauseImage string
}

func (s *State) KubeadmVerboseFlag() string {
Expand All @@ -115,6 +117,13 @@ func (s *State) Clone() *State {
return &newState
}

func (s *State) ShouldEnableInTreeCloudProvider() bool {
if s.LiveCluster.CCMStatus == nil {
return s.Cluster.CloudProvider.CloudProviderInTree()
}
return s.LiveCluster.CCMStatus.InTreeCloudProviderEnabled && !s.CCMMigrationComplete
}

func (s *State) ShouldDisableEncryption() bool {
return (s.Cluster.Features.EncryptionProviders == nil ||
!s.Cluster.Features.EncryptionProviders.Enable) &&
Expand Down
38 changes: 38 additions & 0 deletions pkg/tasks/ccm_csi_migration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
Copyright 2021 The KubeOne Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tasks

import (
"github.com/pkg/errors"

"k8c.io/kubeone/pkg/state"
)

func validateExternalCloudProviderConfig(s *state.State) error {
if s.LiveCluster.CCMStatus != nil && s.LiveCluster.CCMStatus.ExternalCCMDeployed &&
!s.LiveCluster.CCMStatus.InTreeCloudProviderEnabled {
return errors.New("the cluster is already running external ccm")
}
if s.Cluster.CloudProvider.Openstack == nil {
return errors.New("ccm/csi migration is currently supported only for openstack")
}
if !s.Cluster.CloudProvider.External {
return errors.New(".cloudProvider.external must be enabled to start the migration")
}

return nil
}
84 changes: 82 additions & 2 deletions pkg/tasks/probes.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"k8c.io/kubeone/pkg/state"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
dynclient "sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -81,6 +82,20 @@ func safeguard(s *state.State) error {
}
}

// Block kubeone apply if .cloudProvider.external is enabled on cluster with
// in-tree cloud provider, but with no external CCM
if s.Cluster.CloudProvider.External &&
s.LiveCluster.CCMStatus != nil &&
s.LiveCluster.CCMStatus.InTreeCloudProviderEnabled &&
!s.LiveCluster.CCMStatus.ExternalCCMDeployed {
return errors.New(".cloudProvider.external enabled, but cluster is using in-tree provider. run ccm/csi migration by running 'kubeone migrate to-ccm-csi'")
} else if !s.Cluster.CloudProvider.External &&
s.LiveCluster.CCMStatus != nil &&
s.LiveCluster.CCMStatus.ExternalCCMDeployed {
// Block disabling .cloudProvider.external
return errors.New(".cloudProvider.external is disabled, but external ccm is deployed")
}

return nil
}

Expand Down Expand Up @@ -336,10 +351,21 @@ func investigateCluster(s *state.State) error {
s.LiveCluster.EncryptionConfiguration = &state.EncryptionConfiguration{Enable: true, Custom: encryptionEnabled.Custom}
s.LiveCluster.Lock.Unlock()
// no need to lock around FetchEncryptionProvidersFile because it handles locking internally.
if err := fetchEncryptionProvidersFile(s); err != nil {
return errors.Wrap(err, "failed to fetch EncryptionProviders configuration")
if fErr := fetchEncryptionProvidersFile(s); fErr != nil {
return errors.Wrap(fErr, "failed to fetch EncryptionProviders configuration")
}
}

ccmStatus, err := detectCCMMigrationStatus(s)
if err != nil {
return errors.Wrap(err, "failed to check is in-tree cloud provider enabled")
}
if ccmStatus != nil {
s.LiveCluster.Lock.Lock()
s.LiveCluster.CCMStatus = ccmStatus
s.LiveCluster.Lock.Unlock()
}

return nil
}

Expand Down Expand Up @@ -512,3 +538,57 @@ func detectEncryptionProvidersEnabled(s *state.State) (ees encryptionEnabledStat
}
return ees, nil
}

func detectCCMMigrationStatus(s *state.State) (*state.CCMStatus, error) {
if s.DynamicClient == nil {
return nil, errors.New("kubernetes dynamic client is not initialized")
}

pods := corev1.PodList{}
err := s.DynamicClient.List(s.Context, &pods, &dynclient.ListOptions{
Namespace: metav1.NamespaceSystem,
LabelSelector: labels.SelectorFromSet(map[string]string{
"component": "kube-controller-manager",
}),
})
if err != nil {
return nil, errors.Wrap(err, "unable to list kube-controller-manager pods")
}

status := &state.CCMStatus{}
for _, pod := range pods.Items {
for _, c := range pod.Spec.Containers[0].Command {
if strings.HasPrefix(c, "--cloud-provider") && !strings.Contains(c, "external") {
status.InTreeCloudProviderEnabled = true
}
}
}

ccmLabel := ""
ccmLabelValue := ""
switch {
case s.Cluster.CloudProvider.Openstack != nil:
ccmLabel = "k8s-app"
ccmLabelValue = "openstack-cloud-controller-manager"
default:
status.ExternalCCMDeployed = false
return status, nil
}

// TODO(xmudrii): Consider checking does Deployment exists instead
pods = corev1.PodList{}
err = s.DynamicClient.List(s.Context, &pods, &dynclient.ListOptions{
Namespace: metav1.NamespaceSystem,
LabelSelector: labels.SelectorFromSet(map[string]string{
ccmLabel: ccmLabelValue,
}),
})
if err != nil {
return nil, errors.Wrap(err, "unable to list kube-controller-manager pods")
}
if len(pods.Items) > 0 {
status.ExternalCCMDeployed = true
}

return status, nil
}
32 changes: 31 additions & 1 deletion pkg/tasks/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,20 @@ func WithHostnameOS(t Tasks) Tasks {

// WithProbes will run different probes over the defined cluster
func WithProbes(t Tasks) Tasks {
return t.append(
Task{Fn: runProbes, ErrMsg: "probes failed"},
)
}

func WithProbesAndSafeguard(t Tasks) Tasks {
return t.append(
Task{Fn: runProbes, ErrMsg: "probes failed"},
Task{Fn: safeguard, ErrMsg: "probes analysis failed"},
)
}

func WithHostnameOSAndProbes(t Tasks) Tasks {
return WithProbes(WithHostnameOS(t))
return WithProbesAndSafeguard(WithHostnameOS(t))
}

// WithFullInstall with install binaries (using WithBinariesOnly) and
Expand Down Expand Up @@ -450,3 +456,27 @@ func WithRotateKey(t Tasks) Tasks {
},
}...)
}

func WithCCMCSIMigration(t Tasks) Tasks {
return t.append(Tasks{
{Fn: validateExternalCloudProviderConfig, ErrMsg: "failed to validate config", Retries: 1},
}...).
append(kubernetesConfigFiles()...).
append(Tasks{
{Fn: upgradeLeader, ErrMsg: "failed to upgrade leader control plane"},
{Fn: upgradeFollower, ErrMsg: "failed to upgrade follower control plane"},
{
Fn: func(s *state.State) error {
s.Logger.Info("Downloading PKI...")
return s.RunTaskOnLeader(certificate.DownloadKubePKI)
},
ErrMsg: "failed to download Kubernetes PKI from the leader",
},
}...).
append(WithResources(nil)...).
append(
Task{Fn: restartKubeAPIServer, ErrMsg: "failed to restart unhealthy kube-apiserver"},
// TODO: Support CCM/CSI migration for worker nodes
Task{Fn: upgradeStaticWorkers, ErrMsg: "unable to upgrade static worker nodes"},
)
}
12 changes: 8 additions & 4 deletions pkg/templates/kubeadm/v1beta1/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er
nodeRegistration.KubeletExtraArgs["pod-infra-container-image"] = cluster.AssetConfiguration.Pause.ImageRepository + "/pause:" + cluster.AssetConfiguration.Pause.ImageTag
}

if cluster.CloudProvider.CloudProviderInTree() {
if s.ShouldEnableInTreeCloudProvider() {
renderedCloudConfig := "/etc/kubernetes/cloud-config"
cloudConfigVol := kubeadmv1beta1.HostPathMount{
Name: "cloud-config",
Expand Down Expand Up @@ -204,9 +204,13 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er
}

if cluster.CloudProvider.External {
delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider")
delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider")
nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external"
if !s.ShouldEnableInTreeCloudProvider() {
delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider")
delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider")
nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external"
} else {
clusterConfig.ControllerManager.ExtraArgs["controllers"] = "*,bootstrapsigner,tokencleaner,-cloud-node-lifecycle,-route,-service"
}
}

if cluster.Features.StaticAuditLog != nil && cluster.Features.StaticAuditLog.Enable {
Expand Down
12 changes: 8 additions & 4 deletions pkg/templates/kubeadm/v1beta2/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er
nodeRegistration.KubeletExtraArgs["pod-infra-container-image"] = cluster.AssetConfiguration.Pause.ImageRepository + "/pause:" + cluster.AssetConfiguration.Pause.ImageTag
}

if cluster.CloudProvider.CloudProviderInTree() {
if s.ShouldEnableInTreeCloudProvider() {
renderedCloudConfig := "/etc/kubernetes/cloud-config"
cloudConfigVol := kubeadmv1beta2.HostPathMount{
Name: "cloud-config",
Expand Down Expand Up @@ -211,9 +211,13 @@ func NewConfig(s *state.State, host kubeoneapi.HostConfig) ([]runtime.Object, er
}

if cluster.CloudProvider.External {
delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider")
delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider")
nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external"
if !s.ShouldEnableInTreeCloudProvider() {
delete(clusterConfig.APIServer.ExtraArgs, "cloud-provider")
delete(clusterConfig.ControllerManager.ExtraArgs, "cloud-provider")
nodeRegistration.KubeletExtraArgs["cloud-provider"] = "external"
} else {
clusterConfig.ControllerManager.ExtraArgs["controllers"] = "*,bootstrapsigner,tokencleaner,-cloud-node-lifecycle,-route,-service"
}
}

if cluster.Features.StaticAuditLog != nil && cluster.Features.StaticAuditLog.Enable {
Expand Down
Loading