Skip to content

Commit

Permalink
Allow smooth upgrades to new kube-proxy with nft
Browse files Browse the repository at this point in the history
The new eks-d version includes the new kube-proxy with support for
iptables nft. The old kube-proxy always uses iptables legacy.

During an upgrade, when the new machine for the new CP node is started,
if the machine has iptables nft as the default, the kubelet will use it.
Then, before capi updates the kube-proxy image version in the DS (this
doesn't happen until the CP upgrade is finished), the old kube-proxy is
scheduled in the node. This old kube-proxy doesn't support nft and
always uses iptables legacy. When it starts, it adds legacy iptables
rules. However, at this point the kubelet has already added iptables-nft
rules.

After the CP has been updated, capi updates the kube-proxy DS to the new
version. This new version has the new wrapper, which detects the rules
introduced by the kubelet, so it starts using nft.

The hypothesis is that these leftover legacy rules break the k8s service
IP "redirection".

This allows a smooth transition by scheduling a DS with the old kube proxy
only in the old nodes and schedule a DS with the new kube-proxy only in
the new nodes.
  • Loading branch information
g-gaston committed Mar 24, 2023
1 parent 7ee77c6 commit b088d84
Show file tree
Hide file tree
Showing 28 changed files with 1,600 additions and 26 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ mocks: ## Generate mocks
${MOCKGEN} -destination=pkg/bootstrapper/mocks/client.go -package=mocks "github.com/aws/eks-anywhere/pkg/bootstrapper" ClusterClient
${MOCKGEN} -destination=pkg/git/providers/github/mocks/github.go -package=mocks "github.com/aws/eks-anywhere/pkg/git/providers/github" GithubClient
${MOCKGEN} -destination=pkg/git/mocks/git.go -package=mocks "github.com/aws/eks-anywhere/pkg/git" Client,ProviderClient
${MOCKGEN} -destination=pkg/workflows/interfaces/mocks/clients.go -package=mocks "github.com/aws/eks-anywhere/pkg/workflows/interfaces" Bootstrapper,ClusterManager,GitOpsManager,Validator,CAPIManager,EksdInstaller,EksdUpgrader,PackageInstaller
${MOCKGEN} -destination=pkg/workflows/interfaces/mocks/clients.go -package=mocks "github.com/aws/eks-anywhere/pkg/workflows/interfaces" Bootstrapper,ClusterManager,GitOpsManager,Validator,CAPIManager,EksdInstaller,EksdUpgrader,PackageInstaller,ClusterUpgrader
${MOCKGEN} -destination=pkg/git/gogithub/mocks/client.go -package=mocks "github.com/aws/eks-anywhere/pkg/git/gogithub" Client
${MOCKGEN} -destination=pkg/git/gitclient/mocks/client.go -package=mocks "github.com/aws/eks-anywhere/pkg/git/gitclient" GoGit
${MOCKGEN} -destination=pkg/validations/mocks/docker.go -package=mocks "github.com/aws/eks-anywhere/pkg/validations" DockerExecutable
Expand Down Expand Up @@ -595,6 +595,7 @@ mocks: ## Generate mocks
${MOCKGEN} -destination=pkg/controller/clusters/mocks/ipvalidator.go -package=mocks -source "pkg/controller/clusters/ipvalidator.go" IPUniquenessValidator
${MOCKGEN} -destination=pkg/registry/mocks/storage.go -package=mocks -source "pkg/registry/storage.go" StorageClient
${MOCKGEN} -destination=pkg/registry/mocks/repository.go -package=mocks oras.land/oras-go/v2/registry Repository
${MOCKGEN} -destination=pkg/clustermanager/mocks/kube_proxy.go -package=mocks -source "pkg/clustermanager/kube_proxy.go"

.PHONY: verify-mocks
verify-mocks: mocks ## Verify if mocks need to be updated
Expand Down
6 changes: 6 additions & 0 deletions cmd/eksctl-anywhere/cmd/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ func buildClusterManagerOpts(t timeoutOptions) (*dependencies.ClusterManagerTime
}, nil
}

func kubeProxyCLIUpgraderOptions(o *upgradeClusterOptions) dependencies.KubeProxyCLIUpgraderOptions {
return dependencies.KubeProxyCLIUpgraderOptions{
NoTimouts: o.noTimeouts,
}
}

type clusterOptions struct {
fileName string
bundlesOverride string
Expand Down
2 changes: 2 additions & 0 deletions cmd/eksctl-anywhere/cmd/upgradecluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ func (uc *upgradeClusterOptions) upgradeCluster(cmd *cobra.Command) error {
WithBootstrapper().
WithCliConfig(cliConfig).
WithClusterManager(clusterSpec.Cluster, clusterManagerTimeoutOpts).
WithKubeProxyCLIUpgrader(kubeProxyCLIUpgraderOptions(uc)).
WithProvider(uc.fileName, clusterSpec.Cluster, cc.skipIpCheck, uc.hardwareCSVPath, uc.forceClean, uc.tinkerbellBootstrapIP).
WithGitOpsFlux(clusterSpec.Cluster, clusterSpec.FluxConfig, cliConfig).
WithWriter().
Expand All @@ -122,6 +123,7 @@ func (uc *upgradeClusterOptions) upgradeCluster(cmd *cobra.Command) error {
deps.Writer,
deps.EksdUpgrader,
deps.EksdInstaller,
deps.KubeProxyCLIUpgrader,
)

workloadCluster := &types.Cluster{
Expand Down
18 changes: 18 additions & 0 deletions controllers/resource/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,24 @@ func (cor *clusterReconciler) applyTemplates(ctx context.Context, cs *anywherev1
fetch, err := cor.Fetch(ctx, resource.GetName(), resource.GetNamespace(), resource.GetKind(), resource.GetAPIVersion())
if err == nil {
resource.SetResourceVersion(fetch.GetResourceVersion())

// We want to preserve annotations. It's possible that some CAPI objects have extra annotations
// that were added manually to handle difficult upgrade/recovery scenarios and we don't want the
// controller to remove them.
// Ideally we would use something like server side apply instead of an plain replace, which would
// extend this to labels, etc. But given this is a legacy controller and will be removed shortly,
// this is the path of least resistance.
annotations := fetch.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}

for k, v := range resource.GetAnnotations() {
annotations[k] = v
}

resource.SetAnnotations(annotations)

if err := cor.ApplyUpdatedTemplate(ctx, resource, dryRun); err != nil {
return err
}
Expand Down
116 changes: 116 additions & 0 deletions controllers/resource/reconciler_wb_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package resource

import (
"context"
"testing"
"time"

"github.com/go-logr/logr"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

"github.com/aws/eks-anywhere/internal/test/envtest"
anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1"
"github.com/aws/eks-anywhere/pkg/clusterapi"
"github.com/aws/eks-anywhere/pkg/constants"
)

func TestClusterReconcilerApplyTemplatesAnnotationsArePreserved(t *testing.T) {
g := NewWithT(t)
ctx := context.Background()

cluster := &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-cluster-test",
},
}
kcp := &controlplanev1.KubeadmControlPlane{
TypeMeta: metav1.TypeMeta{
Kind: "KubeadmControlPlane",
APIVersion: "controlplane.cluster.x-k8s.io/v1beta1",
},
ObjectMeta: metav1.ObjectMeta{
Name: clusterapi.KubeadmControlPlaneName(cluster),
Namespace: constants.EksaSystemNamespace,
Annotations: map[string]string{
"my-custom-annotation": "true",
},
},
}
newKCP := kcp.DeepCopy()
newKCP.Annotations = map[string]string{
"eksa-annotation": "false",
}
newKCPUnstructured, err := runtime.DefaultUnstructuredConverter.ToUnstructured(newKCP)
g.Expect(err).NotTo(HaveOccurred())

resources := []*unstructured.Unstructured{{Object: newKCPUnstructured}}

client := fake.NewClientBuilder().WithObjects(cluster, kcp).Build()
log := logr.Discard()

r := NewClusterReconciler(
NewCAPIResourceFetcher(client, log),
NewCAPIResourceUpdater(client, log),
time.Now,
log,
)

g.Expect(r.applyTemplates(ctx, cluster, resources, false)).To(Succeed())

api := envtest.NewAPIExpecter(t, client)
api.ShouldEventuallyMatch(ctx, kcp, func(g Gomega) {
g.Expect(kcp.Annotations).To(HaveKeyWithValue("my-custom-annotation", "true"))
g.Expect(kcp.Annotations).To(HaveKeyWithValue("eksa-annotation", "false"))
})
}

func TestClusterReconcilerApplyTemplatesNoExistingAnnotations(t *testing.T) {
g := NewWithT(t)
ctx := context.Background()

cluster := &anywherev1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "my-cluster-test",
},
}
kcp := &controlplanev1.KubeadmControlPlane{
TypeMeta: metav1.TypeMeta{
Kind: "KubeadmControlPlane",
APIVersion: "controlplane.cluster.x-k8s.io/v1beta1",
},
ObjectMeta: metav1.ObjectMeta{
Name: clusterapi.KubeadmControlPlaneName(cluster),
Namespace: constants.EksaSystemNamespace,
},
}
newKCP := kcp.DeepCopy()
newKCP.Annotations = map[string]string{
"eksa-annotation": "false",
}
newKCPUnstructured, err := runtime.DefaultUnstructuredConverter.ToUnstructured(newKCP)
g.Expect(err).NotTo(HaveOccurred())

resources := []*unstructured.Unstructured{{Object: newKCPUnstructured}}

client := fake.NewClientBuilder().WithObjects(cluster, kcp).Build()
log := logr.Discard()

r := NewClusterReconciler(
NewCAPIResourceFetcher(client, log),
NewCAPIResourceUpdater(client, log),
time.Now,
log,
)

g.Expect(r.applyTemplates(ctx, cluster, resources, false)).To(Succeed())

api := envtest.NewAPIExpecter(t, client)
api.ShouldEventuallyMatch(ctx, kcp, func(g Gomega) {
g.Expect(kcp.Annotations).To(HaveKeyWithValue("eksa-annotation", "false"))
})
}
6 changes: 6 additions & 0 deletions internal/test/testdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ func EksdRelease() *eksdv1.Release {
URI: "public.ecr.aws/eks-distro/kubernetes/kube-apiserver:v1.19.8",
},
},
{
Name: "kube-proxy-image",
Image: &eksdv1.AssetImage{
URI: "public.ecr.aws/eks-distro/kubernetes/kube-proxy:v1.19.8-eks-1-19-18",
},
},
},
},
},
Expand Down
9 changes: 9 additions & 0 deletions pkg/clients/kubernetes/runtimeclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

// ClientFactory builds clients from a kubeconfig file by
// wrapping around NewRuntimeClientFromFileName to facilitate mocking.
type ClientFactory struct{}

// BuildClientFromKubeconfig builds a K8s client from a kubeconfig file.
func (f ClientFactory) BuildClientFromKubeconfig(kubeconfigPath string) (client.Client, error) {
return NewRuntimeClientFromFileName(kubeconfigPath)
}

// NewRuntimeClientFromFileName creates a new controller runtime client given a kubeconfig filename.
func NewRuntimeClientFromFileName(kubeConfigFilename string) (client.Client, error) {
data, err := os.ReadFile(kubeConfigFilename)
Expand Down
7 changes: 7 additions & 0 deletions pkg/clients/kubernetes/runtimeclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,10 @@ func TestNewRuntimeClientFromFilename(t *testing.T) {
_, err := kubernetes.NewRuntimeClientFromFileName("file-does-not-exist.txt")
g.Expect(err).To(MatchError(ContainSubstring("open file-does-not-exist.txt: no such file or directory")))
}

func TestClientFactoryBuildClientFromKubeconfigNoFile(t *testing.T) {
g := NewWithT(t)
f := kubernetes.ClientFactory{}
_, err := f.BuildClientFromKubeconfig("file-does-not-exist.txt")
g.Expect(err).To(MatchError(ContainSubstring("open file-does-not-exist.txt: no such file or directory")))
}
9 changes: 9 additions & 0 deletions pkg/cluster/fetch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ func wantKubeDistroForEksdRelease() (*eksdv1.Release, *cluster.KubeDistro) {
URI: "public.ecr.aws/eks-distro/kubernetes/kube-apiserver:v1.19.8",
},
},
{
Name: "kube-proxy-image",
Image: &eksdv1.AssetImage{
URI: "public.ecr.aws/eks-distro/kubernetes/kube-proxy:v1.19.8",
},
},
},
},
},
Expand Down Expand Up @@ -531,6 +537,9 @@ func wantKubeDistroForEksdRelease() (*eksdv1.Release, *cluster.KubeDistro) {
AwsIamAuthImage: releasev1.Image{
URI: "public.ecr.aws/eks-distro/kubernetes-sigs/aws-iam-authenticator:v0.5.2",
},
KubeProxy: releasev1.Image{
URI: "public.ecr.aws/eks-distro/kubernetes/kube-proxy:v1.19.8",
},
EtcdVersion: "3.4.14",
}

Expand Down
19 changes: 18 additions & 1 deletion pkg/cluster/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,17 @@ type VersionsBundle struct {
KubeDistro *KubeDistro
}

// EKSD represents an eks-d release.
type EKSD struct {
// Channel is the minor Kubernetes version for the eks-d release (eg. "1.23", "1.24", etc.)
Channel string
// Number is the monotonically increasing number that distinguishes the different eks-d releases
// for the same Kubernetes minor version (channel).
Number int
}

type KubeDistro struct {
EKSD EKSD
Kubernetes VersionedRepository
CoreDNS VersionedRepository
Etcd VersionedRepository
Expand All @@ -52,6 +62,7 @@ type KubeDistro struct {
EtcdImage v1alpha1.Image
EtcdVersion string
AwsIamAuthImage v1alpha1.Image
KubeProxy v1alpha1.Image
}

func (k *KubeDistro) deepCopy() *KubeDistro {
Expand Down Expand Up @@ -115,7 +126,12 @@ func (s *Spec) KubeDistroImages() []v1alpha1.Image {
}

func buildKubeDistro(eksd *eksdv1alpha1.Release) (*KubeDistro, error) {
kubeDistro := &KubeDistro{}
kubeDistro := &KubeDistro{
EKSD: EKSD{
Channel: eksd.Spec.Channel,
Number: eksd.Spec.Number,
},
}
assets := make(map[string]*eksdv1alpha1.AssetImage)
for _, component := range eksd.Status.Components {
for _, asset := range component.Assets {
Expand All @@ -136,6 +152,7 @@ func buildKubeDistro(eksd *eksdv1alpha1.Release) (*KubeDistro, error) {
"pause-image": &kubeDistro.Pause,
"etcd-image": &kubeDistro.EtcdImage,
"aws-iam-authenticator-image": &kubeDistro.AwsIamAuthImage,
"kube-proxy-image": &kubeDistro.KubeProxy,
}

for assetName, image := range kubeDistroComponents {
Expand Down
1 change: 1 addition & 0 deletions pkg/cluster/spec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ func validateSpecFromSimpleBundle(t *testing.T, gotSpec *cluster.Spec) {
validateImageURI(t, gotSpec.VersionsBundle.KubeDistro.ExternalAttacher, "public.ecr.aws/eks-distro/kubernetes-csi/external-attacher:v3.1.0-eks-1-19-4")
validateImageURI(t, gotSpec.VersionsBundle.KubeDistro.ExternalProvisioner, "public.ecr.aws/eks-distro/kubernetes-csi/external-provisioner:v2.1.1-eks-1-19-4")
validateImageURI(t, gotSpec.VersionsBundle.KubeDistro.EtcdImage, "public.ecr.aws/eks-distro/etcd-io/etcd:v3.4.14-eks-1-19-4")
validateImageURI(t, gotSpec.VersionsBundle.KubeDistro.KubeProxy, "public.ecr.aws/eks-distro/kubernetes/kube-proxy:v1.19.8-eks-1-19-4")
if gotSpec.VersionsBundle.KubeDistro.EtcdVersion != "3.4.14" {
t.Errorf("GetNewSpec() = Spec: Invalid etcd version, got %s, want 3.4.14", gotSpec.VersionsBundle.KubeDistro.EtcdVersion)
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/clustermanager/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ type KubernetesClient interface {
RemoveAnnotationInNamespace(ctx context.Context, resourceType, objectName, key string, cluster *types.Cluster, namespace string) error
}

type client struct {
type clusterManagerClient struct {
ClusterClient
}

func NewClient(clusterClient ClusterClient) *client {
return &client{ClusterClient: clusterClient}
func newClient(clusterClient ClusterClient) *clusterManagerClient {
return &clusterManagerClient{ClusterClient: clusterClient}
}

func (c *client) waitForDeployments(ctx context.Context, deploymentsByNamespace map[string][]string, cluster *types.Cluster, timeout string) error {
func (c *clusterManagerClient) waitForDeployments(ctx context.Context, deploymentsByNamespace map[string][]string, cluster *types.Cluster, timeout string) error {
for namespace, deployments := range deploymentsByNamespace {
for _, deployment := range deployments {
err := c.WaitForDeployment(ctx, cluster, timeout, "Available", deployment, namespace)
Expand Down
Loading

0 comments on commit b088d84

Please sign in to comment.