From 282ddcc18f7f76bad1d2f5643b2eb40841ed0e6d Mon Sep 17 00:00:00 2001 From: cgchinmay Date: Thu, 19 May 2022 16:51:26 -0700 Subject: [PATCH] cni manifest upgrade downgrade test (#1863) * Added upgrade/downgrade script template Refactored code Addon upgrade/downgrade test similar to https://github.com/aws/amazon-vpc-cni-k8s/pull/1795 Added tests for addon upgrade/downgrade Changed DEFAULT version Added addon status checks Fetch latest addon version for given K8s Cluster Update kops cluster config used in weekly tests (#1862) * Change to kops cluster creation scripts * Add logging for retry attempt * Switch kops cluster to use docker container runtime Co-authored-by: Jayanth Varavani <1111446+jayanthvn@users.noreply.github.com> Added upgrade/downgrade test for custom cni-manifest-file Added missing files remove upgrade-downgrade.sh * Add eks.go file , deleted by mistake * Extract apply manifest logic in common Remove redundant code * Add PD traffic test for cni upgrade downgrade test --- test/framework/options.go | 4 + .../host_networking_test.go | 76 ++++++++++++ .../pod_traffic_PD_enabled_test.go | 94 ++++++++++++++ .../upgrade_downgrade_suite_test.go | 116 ++++++++++++++++++ .../cni/pod_traffic_test_PD_enabled.go | 42 +------ test/integration/common/util.go | 46 +++++++ 6 files changed, 339 insertions(+), 39 deletions(-) create mode 100644 test/integration/cni-upgrade-downgrade/host_networking_test.go create mode 100644 test/integration/cni-upgrade-downgrade/pod_traffic_PD_enabled_test.go create mode 100644 test/integration/cni-upgrade-downgrade/upgrade_downgrade_suite_test.go diff --git a/test/framework/options.go b/test/framework/options.go index 1793a5731dd..69d796825ba 100644 --- a/test/framework/options.go +++ b/test/framework/options.go @@ -39,6 +39,8 @@ type Options struct { InstanceType string InitialAddon string TargetAddon string + InitialManifest string + TargetManifest string } func (options *Options) BindFlags() { @@ -51,6 +53,8 @@ func (options *Options) BindFlags() { flag.StringVar(&options.EKSEndpoint, "eks-endpoint", "", "optional eks api server endpoint") flag.StringVar(&options.InitialAddon, "initial-addon-version", "", "Initial CNI addon version before upgrade applied") flag.StringVar(&options.TargetAddon, "target-addon-version", "", "Target CNI addon version after upgrade applied") + flag.StringVar(&options.InitialManifest, "initial-manifest-file", "", "Initial CNI manifest, can be local file path or remote Url") + flag.StringVar(&options.TargetManifest, "target-manifest-file", "", "Target CNI manifest, can be local file path or remote Url") flag.StringVar(&options.CalicoVersion, "calico-version", "3.22.0", "calico version to be tested") flag.StringVar(&options.ContainerRuntime, "container-runtime", "", "Optionally can specify it as 'containerd' for the test nodes") flag.StringVar(&options.InstanceType, "instance-type", "amd64", "Optionally specify instance type as arm64 for the test nodes") diff --git a/test/integration/cni-upgrade-downgrade/host_networking_test.go b/test/integration/cni-upgrade-downgrade/host_networking_test.go new file mode 100644 index 00000000000..50e5540f6e6 --- /dev/null +++ b/test/integration/cni-upgrade-downgrade/host_networking_test.go @@ -0,0 +1,76 @@ +package cni_upgrade_downgrade + +import ( + "time" + + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/integration/common" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/apps/v1" +) + +var _ = Describe("test host networking", func() { + var err error + var podLabelKey = "app" + var podLabelVal = "host-networking-test" + var deployment *v1.Deployment + var podInput string + + Context("when pods using IP from primary and secondary ENI are created", func() { + It("should have correct host networking setup when pods are running and cleaned up when pods are terminated", func() { + By("applying initial cni manifest") + common.ApplyCNIManifest(initialManifest) + + // Launch enough pods so some pods end up using primary ENI IP and some using secondary + // ENI IP + deployment = manifest.NewBusyBoxDeploymentBuilder(). + Replicas(maxIPPerInterface*2). + PodLabel(podLabelKey, podLabelVal). + NodeName(primaryNode.Name). + Build() + + By("creating a deployment to launch pod using primary and secondary ENI IP") + deployment, err = f.K8sResourceManagers.DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deployment, utils.DefaultDeploymentReadyTimeout) + Expect(err).ToNot(HaveOccurred()) + + By("getting the list of pods using IP from primary and secondary ENI") + interfaceTypeToPodList := common.GetPodsOnPrimaryAndSecondaryInterface(primaryNode, podLabelKey, podLabelVal, f) + + // Primary ENI and Secondary ENI IPs are handled differently when setting up + // the host networking rule hence this check + Expect(len(interfaceTypeToPodList.PodsOnSecondaryENI)). + Should(BeNumerically(">", 0)) + Expect(len(interfaceTypeToPodList.PodsOnPrimaryENI)). + Should(BeNumerically(">", 0)) + + By("generating the pod networking validation input to be passed to tester") + podInput, err = common.GetPodNetworkingValidationInput(interfaceTypeToPodList, vpcCIDRs).Serialize() + Expect(err).NotTo(HaveOccurred()) + + By("validating host networking setup is setup correctly") + common.ValidateHostNetworking(common.NetworkingSetupSucceeds, podInput, primaryNode.Name, f) + + By("applying target cni manifest") + common.ApplyCNIManifest(targetManifest) + + By("deleting the deployment to test teardown") + err = f.K8sResourceManagers.DeploymentManager(). + DeleteAndWaitTillDeploymentIsDeleted(deployment) + Expect(err).ToNot(HaveOccurred()) + + By("waiting to allow CNI to tear down networking for terminated pods") + time.Sleep(time.Second * 60) + + By("validating host networking is teared down correctly") + common.ValidateHostNetworking(common.NetworkingTearDownSucceeds, podInput, primaryNode.Name, f) + }) + + AfterEach(func() { + By("revert to initial cni manifest") + common.ApplyCNIManifest(initialManifest) + }) + }) +}) diff --git a/test/integration/cni-upgrade-downgrade/pod_traffic_PD_enabled_test.go b/test/integration/cni-upgrade-downgrade/pod_traffic_PD_enabled_test.go new file mode 100644 index 00000000000..01e2074f52d --- /dev/null +++ b/test/integration/cni-upgrade-downgrade/pod_traffic_PD_enabled_test.go @@ -0,0 +1,94 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cni_upgrade_downgrade + +import ( + "fmt" + + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/integration/common" + + . "github.com/onsi/ginkgo/v2" +) + +var _ = Describe("Test pod networking with prefix delegation enabled", func() { + var ( + serverDeploymentBuilder *manifest.DeploymentBuilder + // Value for the Environment variable ENABLE_PREFIX_DELEGATION + enableIPv4PrefixDelegation string + ) + + JustBeforeEach(func() { + By("applying initial cni manifest") + common.ApplyCNIManifest(initialManifest) + + By("creating deployment") + serverDeploymentBuilder = manifest.NewDefaultDeploymentBuilder(). + Name("traffic-server"). + NodeSelector(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal) + + By("set PD") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, + utils.AwsNodeNamespace, utils.AwsNodeName, + map[string]string{"ENABLE_PREFIX_DELEGATION": enableIPv4PrefixDelegation}) + }) + + JustAfterEach(func() { + // Revert to Initial manifest file + By("revert to initial cni manifest") + common.ApplyCNIManifest(initialManifest) + + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, + utils.AwsNodeNamespace, utils.AwsNodeName, + map[string]string{"ENABLE_PREFIX_DELEGATION": "false"}) + }) + + Context("when testing TCP traffic between client and server pods", func() { + BeforeEach(func() { + enableIPv4PrefixDelegation = "true" + }) + + //TODO : Add pod IP validation if IP belongs to prefix or SIP + //TODO : remove hardcoding from client/server count + It("should have 99+% success rate", func() { + By("test with initial cni manifest file") + common.ValidateTraffic(f, serverDeploymentBuilder, 99, "tcp") + targetManifestStr := fmt.Sprintf("Testing with Target CNI Manifest: %s", targetManifest) + By(targetManifestStr) + By("Applying Taget CNI Manifest") + common.ApplyCNIManifest(targetManifest) + common.ValidateTraffic(f, serverDeploymentBuilder, 99, "tcp") + }) + }) + + Context("when testing UDP traffic between client and server pods", func() { + BeforeEach(func() { + enableIPv4PrefixDelegation = "true" + }) + + //TODO : Add pod IP validation if IP belongs to prefix or SIP + //TODO : remove hardcoding from client/server count + It("should have 99+% success rate", func() { + By("test with initial cni manifest file") + common.ValidateTraffic(f, serverDeploymentBuilder, 99, "udp") + targetManifestStr := fmt.Sprintf("Testing with Target CNI Manifest: %s", targetManifest) + By(targetManifestStr) + By("Applying Taget CNI Manifest") + common.ApplyCNIManifest(targetManifest) + common.ValidateTraffic(f, serverDeploymentBuilder, 99, "udp") + }) + }) +}) diff --git a/test/integration/cni-upgrade-downgrade/upgrade_downgrade_suite_test.go b/test/integration/cni-upgrade-downgrade/upgrade_downgrade_suite_test.go new file mode 100644 index 00000000000..0250f9740a7 --- /dev/null +++ b/test/integration/cni-upgrade-downgrade/upgrade_downgrade_suite_test.go @@ -0,0 +1,116 @@ +package cni_upgrade_downgrade + +import ( + "fmt" + "testing" + + "github.com/aws/amazon-vpc-cni-k8s/test/framework" + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "github.com/pkg/errors" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" +) + +const ( + InstanceTypeNodeLabelKey = "beta.kubernetes.io/instance-type" + DefaultManifestfile = "https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/master/config/master/aws-k8s-cni.yaml" +) + +var f *framework.Framework +var maxIPPerInterface int +var primaryNode v1.Node +var secondaryNode v1.Node +var instanceSecurityGroupID string +var vpcCIDRs []string +var initialManifest string +var targetManifest string + +func TestCNIPodNetworking(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "CNI Pod Networking Suite") +} + +var _ = BeforeSuite(func() { + f = framework.New(framework.GlobalOptions) + + By("creating test namespace") + f.K8sResourceManagers.NamespaceManager(). + CreateNamespace(utils.DefaultTestNamespace) + + By(fmt.Sprintf("getting the node with the node label key %s and value %s", + f.Options.NgNameLabelKey, f.Options.NgNameLabelVal)) + nodes, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal) + Expect(err).ToNot(HaveOccurred()) + + By("verifying more than 1 nodes are present for the test") + Expect(len(nodes.Items)).Should(BeNumerically(">", 1)) + + // Set the primary and secondary node for testing + primaryNode = nodes.Items[0] + secondaryNode = nodes.Items[1] + + // Get the node security group + instanceID := k8sUtils.GetInstanceIDFromNode(primaryNode) + primaryInstance, err := f.CloudServices.EC2().DescribeInstance(instanceID) + Expect(err).ToNot(HaveOccurred()) + + // This won't work if the first SG is only associated with the primary instance. + // Need a robust substring in the SGP name to identify node SGP + instanceSecurityGroupID = *primaryInstance.NetworkInterfaces[0].Groups[0].GroupId + + By("getting the instance type from node label " + InstanceTypeNodeLabelKey) + instanceType := primaryNode.Labels[InstanceTypeNodeLabelKey] + + By("getting the network interface details from ec2") + instanceOutput, err := f.CloudServices.EC2().DescribeInstanceType(instanceType) + Expect(err).ToNot(HaveOccurred()) + + // Subtract 2 for coredns pods if present, and both could be on same ENI + maxIPPerInterface = int(*instanceOutput[0].NetworkInfo.Ipv4AddressesPerInterface) - 2 + + By("describing the VPC to get the VPC CIDRs") + describeVPCOutput, err := f.CloudServices.EC2().DescribeVPC(f.Options.AWSVPCID) + Expect(err).ToNot(HaveOccurred()) + + for _, cidrBlockAssociationSet := range describeVPCOutput.Vpcs[0].CidrBlockAssociationSet { + vpcCIDRs = append(vpcCIDRs, *cidrBlockAssociationSet.CidrBlock) + } + + initialManifest = f.Options.InitialManifest + targetManifest = f.Options.TargetManifest + if len(targetManifest) == 0 { + err = errors.Errorf("Target Manifest file must be specified") + } + Expect(err).NotTo(HaveOccurred()) + + if len(initialManifest) == 0 { + initialManifest = DefaultManifestfile + } + + initialManifestStr := fmt.Sprintf("using initial cni manifest: %s", initialManifest) + targetManifestStr := fmt.Sprintf("using target cni manifest: %s", targetManifest) + + By(initialManifestStr) + By(targetManifestStr) + + // Set the WARM_ENI_TARGET to 0 to prevent all pods being scheduled on secondary ENI + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, "aws-node", "kube-system", + "aws-node", map[string]string{"WARM_IP_TARGET": "3", "WARM_ENI_TARGET": "0"}) +}) + +var _ = AfterSuite(func() { + By("deleting test namespace") + f.K8sResourceManagers.NamespaceManager(). + DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) + + k8sUtils.UpdateEnvVarOnDaemonSetAndWaitUntilReady(f, "aws-node", "kube-system", + "aws-node", + nil, + map[string]struct{}{ + "WARM_IP_TARGET": {}, + "WARM_ENI_TARGET": {}, + }) +}) diff --git a/test/integration/cni/pod_traffic_test_PD_enabled.go b/test/integration/cni/pod_traffic_test_PD_enabled.go index efd0af7f4b7..4a716fd625d 100644 --- a/test/integration/cni/pod_traffic_test_PD_enabled.go +++ b/test/integration/cni/pod_traffic_test_PD_enabled.go @@ -14,22 +14,16 @@ package cni import ( - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/agent" "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/integration/common" . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" ) var _ = Describe("Test pod networking with prefix delegation enabled", func() { var ( - // The Pod labels for client and server in order to retrieve the - // client and server Pods belonging to a Deployment/Jobs - labelKey = "app" - serverPodLabelVal = "server-pod" - clientPodLabelVal = "client-pod" serverDeploymentBuilder *manifest.DeploymentBuilder // Value for the Environment variable ENABLE_PREFIX_DELEGATION enableIPv4PrefixDelegation string @@ -61,22 +55,7 @@ var _ = Describe("Test pod networking with prefix delegation enabled", func() { //TODO : Add pod IP validation if IP belongs to prefix or SIP //TODO : remove hardcoding from client/server count It("should have 99+% success rate", func() { - trafficTester := agent.TrafficTest{ - Framework: f, - TrafficServerDeploymentBuilder: serverDeploymentBuilder, - ServerPort: 2273, - ServerProtocol: "tcp", - ClientCount: 20, - ServerCount: 20, - ServerPodLabelKey: labelKey, - ServerPodLabelVal: serverPodLabelVal, - ClientPodLabelKey: labelKey, - ClientPodLabelVal: clientPodLabelVal, - } - - successRate, err := trafficTester.TestTraffic() - Expect(err).ToNot(HaveOccurred()) - Expect(successRate).Should(BeNumerically(">=", float64(99))) + common.ValidateTraffic(f, serverDeploymentBuilder, 99, "tcp") }) }) @@ -88,22 +67,7 @@ var _ = Describe("Test pod networking with prefix delegation enabled", func() { //TODO : Add pod IP validation if IP belongs to prefix or SIP //TODO : remove hardcoding from client/server count It("should have 99+% success rate", func() { - trafficTester := agent.TrafficTest{ - Framework: f, - TrafficServerDeploymentBuilder: serverDeploymentBuilder, - ServerPort: 2273, - ServerProtocol: "udp", - ClientCount: 20, - ServerCount: 20, - ServerPodLabelKey: labelKey, - ServerPodLabelVal: serverPodLabelVal, - ClientPodLabelKey: labelKey, - ClientPodLabelVal: clientPodLabelVal, - } - - successRate, err := trafficTester.TestTraffic() - Expect(err).ToNot(HaveOccurred()) - Expect(successRate).Should(BeNumerically(">=", float64(99))) + common.ValidateTraffic(f, serverDeploymentBuilder, 99, "udp") }) }) }) diff --git a/test/integration/common/util.go b/test/integration/common/util.go index ea11f04814a..9252ae4fded 100644 --- a/test/integration/common/util.go +++ b/test/integration/common/util.go @@ -1,10 +1,15 @@ package common import ( + "bytes" "fmt" + "io" + "os" + "os/exec" "github.com/aws/amazon-vpc-cni-k8s/test/agent/pkg/input" "github.com/aws/amazon-vpc-cni-k8s/test/framework" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/agent" "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" "github.com/aws/aws-sdk-go/service/ec2" @@ -15,6 +20,14 @@ import ( type TestType int +var ( + // The Pod labels for client and server in order to retrieve the + // client and server Pods belonging to a Deployment/Jobs + labelKey = "app" + serverPodLabelVal = "server-pod" + clientPodLabelVal = "client-pod" +) + const ( NetworkingTearDownSucceeds TestType = iota NetworkingTearDownFails @@ -146,6 +159,21 @@ func GetPodsOnPrimaryAndSecondaryInterface(node coreV1.Node, return interfaceToPodList } +func GetTrafficTestConfig(f *framework.Framework, protocol string, serverDeploymentBuilder *manifest.DeploymentBuilder, clientCount int, serverCount int) agent.TrafficTest { + return agent.TrafficTest{ + Framework: f, + TrafficServerDeploymentBuilder: serverDeploymentBuilder, + ServerPort: 2273, + ServerProtocol: protocol, + ClientCount: clientCount, + ServerCount: serverCount, + ServerPodLabelKey: labelKey, + ServerPodLabelVal: serverPodLabelVal, + ClientPodLabelKey: labelKey, + ClientPodLabelVal: clientPodLabelVal, + } +} + func IsPrimaryENI(nwInterface *ec2.InstanceNetworkInterface, instanceIPAddr *string) bool { for _, privateIPAddress := range nwInterface.PrivateIpAddresses { if *privateIPAddress.PrivateIpAddress == *instanceIPAddr { @@ -154,3 +182,21 @@ func IsPrimaryENI(nwInterface *ec2.InstanceNetworkInterface, instanceIPAddr *str } return false } + +func ApplyCNIManifest(filepath string) { + var stdoutBuf, stderrBuf bytes.Buffer + By(fmt.Sprintf("applying manifest: %s", filepath)) + cmd := exec.Command("kubectl", "apply", "-f", filepath) + cmd.Stdout = io.MultiWriter(os.Stdout, &stdoutBuf) + cmd.Stderr = io.MultiWriter(os.Stderr, &stderrBuf) + err := cmd.Run() + Expect(err).NotTo(HaveOccurred()) +} + +func ValidateTraffic(f *framework.Framework, serverDeploymentBuilder *manifest.DeploymentBuilder, succesRate float64, protocol string) { + trafficTester := GetTrafficTestConfig(f, protocol, serverDeploymentBuilder, 20, 20) + successRate, err := trafficTester.TestTraffic() + Expect(err).ToNot(HaveOccurred()) + Expect(successRate).Should(BeNumerically(">=", succesRate)) + +}