From 6a15a84cae6576cba1c3302964431a26c435d07c Mon Sep 17 00:00:00 2001 From: Abhinav Pathak <31713659+abhipth@users.noreply.github.com> Date: Tue, 2 Nov 2021 07:31:22 -0700 Subject: [PATCH] add canary test entrypoint script (#1717) * add canary test entrypoint script * only run the linux tests on linux nodes * add details about the type of tests in README * run tests only on the latest addon version --- scripts/run-canary-test.sh | 122 ++++++++++++++++++ test/README.md | 26 ++++ .../resources/k8s/manifest/deployment.go | 4 +- test/framework/resources/k8s/manifest/job.go | 8 ++ test/framework/resources/k8s/manifest/pod.go | 2 +- test/integration-new/cni/pod_traffic_test.go | 4 +- .../cni/service_connectivity_test.go | 2 +- .../integration-new/ipamd/eni_ip_leak_test.go | 2 +- 8 files changed, 163 insertions(+), 7 deletions(-) create mode 100755 scripts/run-canary-test.sh create mode 100644 test/README.md diff --git a/scripts/run-canary-test.sh b/scripts/run-canary-test.sh new file mode 100755 index 0000000000..533776d1c2 --- /dev/null +++ b/scripts/run-canary-test.sh @@ -0,0 +1,122 @@ +#!/bin/bash + +# The script runs amazon-vpc-cni Canary tests on the default +# addon version and then runs smoke test on the latest addon version. + +set -e + +SECONDS=0 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +INTEGRATION_TEST_DIR="$SCRIPT_DIR/../test/integration-new" +VPC_CNI_ADDON_NAME="vpc-cni" + +echo "Running Canary tests for amazon-vpc-cni-k8s with the following variables +KUBE_CONFIG_PATH: $KUBE_CONFIG_PATH +CLUSTER_NAME: $CLUSTER_NAME +REGION: $REGION +ENDPOINT: $ENDPOINT" + +if [[ -n "${ENDPOINT}" ]]; then + ENDPOINT_FLAG="--endpoint $ENDPOINT" +fi + +function load_cluster_details() { + echo "loading cluster details $CLUSTER_NAME" + DESCRIBE_CLUSTER_OP=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" $ENDPOINT_FLAG) + VPC_ID=$(echo "$DESCRIBE_CLUSTER_OP" | jq -r '.cluster.resourcesVpcConfig.vpcId') + K8S_VERSION=$(echo "$DESCRIBE_CLUSTER_OP" | jq .cluster.version -r) +} + +function load_addon_details() { + echo "loading $VPC_CNI_ADDON_NAME addon details" + DESCRIBE_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name $VPC_CNI_ADDON_NAME --kubernetes-version "$K8S_VERSION") + + LATEST_ADDON_VERSION=$(echo "$DESCRIBE_ADDON_VERSIONS" | jq '.addons[0].addonVersions[0].addonVersion' -r) + DEFAULT_ADDON_VERSION=$(echo "$DESCRIBE_ADDON_VERSIONS" | jq -r '.addons[].addonVersions[] | select(.compatibilities[0].defaultVersion == true) | .addonVersion') +} + +function wait_for_addon_status() { + local expected_status=$1 + + if [ "$expected_status" = "DELETED" ]; then + while $(aws eks describe-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME); do + echo "addon is still not deleted" + sleep 5 + done + echo "addon deleted" + return + fi + + while true + do + STATUS=$(aws eks describe-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME | jq -r '.addon.status') + if [ "$STATUS" = "$expected_status" ]; then + echo "addon status matches expected status" + return + fi + echo "addon status is not equal to $expected_status" + sleep 5 + done +} + +function install_add_on() { + local new_addon_version=$1 + + if DESCRIBE_ADDON=$(aws eks describe-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME); then + local current_addon_version=$(echo "$DESCRIBE_ADDON" | jq '.addon.addonVersion' -r) + if [ "$new_addon_version" != "$current_addon_version" ]; then + echo "deleting the $current_addon_version to install $new_addon_version" + aws eks delete-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name "$VPC_CNI_ADDON_NAME" + wait_for_addon_status "DELETED" + else + echo "addon version $current_addon_version already installed" + patch_aws_node_maxunavialable + return + fi + fi + + echo "installing addon $new_addon_version" + aws eks create-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME --resolve-conflicts OVERWRITE --addon-version $new_addon_version + patch_aws_node_maxunavialable + wait_for_addon_status "ACTIVE" +} + +function patch_aws_node_maxunavialable() { + # Patch the aws-node, so any update in aws-node happens parallely for faster overall test execution + kubectl patch ds -n kube-system aws-node -p '{"spec":{"updateStrategy":{"rollingUpdate":{"maxUnavailable": "100%"}}}}' +} + +function run_ginkgo_test() { + local focus=$1 + echo "Running ginkgo tests with focus: $focus" + (cd "$INTEGRATION_TEST_DIR/cni" && ginkgo --focus="$focus" -v --timeout 20m --failOnPending -- --cluster-kubeconfig="$KUBE_CONFIG_PATH" --cluster-name="$CLUSTER_NAME" --aws-region="$REGION" --aws-vpc-id="$VPC_ID" --ng-name-label-key="kubernetes.io/os" --ng-name-label-val="linux") + (cd "$INTEGRATION_TEST_DIR/ipamd" && ginkgo --focus="$focus" -v --timeout 10m --failOnPending -- --cluster-kubeconfig="$KUBE_CONFIG_PATH" --cluster-name="$CLUSTER_NAME" --aws-region="$REGION" --aws-vpc-id="$VPC_ID" --ng-name-label-key="kubernetes.io/os" --ng-name-label-val="linux") +} + +load_cluster_details +load_addon_details + +# TODO: v1.7.5 restarts continiously if IMDS goes out of sync, the issue is mitigated +# from v.1.8.0 onwards, once the default addon is updated to v1.8.0+ we should uncomment +# the following code. See: https://github.com/aws/amazon-vpc-cni-k8s/issues/1340 + +# Run more comprehensive test on the default addon version. CANARY focused tests +# tests basic functionlity plus test that could detect issues with dependencies +# early on. +#echo "Running Canary tests on the default addon version" +#install_add_on "$DEFAULT_ADDON_VERSION" +#run_ginkgo_test "CANARY" + +# Run smoke test on the latest addon version. Smoke tests contains a subset of test +# used in Canary tests. +#echo "Running Smoke tests on the latest addon version" +#install_add_on "$LATEST_ADDON_VERSION" +#run_ginkgo_test "SMOKE" + +# TODO: Remove the following code once the v1.8.0+ is made the default addon version +echo "Running Canary tests on the latest addon version" +install_add_on "$LATEST_ADDON_VERSION" +run_ginkgo_test "CANARY" + + +echo "all tests ran successfully in $(($SECONDS / 60)) minutes and $(($SECONDS % 60)) seconds" \ No newline at end of file diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000000..2f8f2a6927 --- /dev/null +++ b/test/README.md @@ -0,0 +1,26 @@ +## amazon-vpc-cni-k8s Test Framework +The test framework consists of integration and e2e tests using the Ginkgo framework invoked manually and using collection of bash scripts using GitHub Workflow and Prow(not publicly available). + +### Types of Tests + +#### Pull Request Tests +Runs on each Pull Request, verifies the new code changes don't introduce any regression. Given the entire test suite may span for long duration, run only the integration tests. + +#### Nightly Integration Tests +Runs the entire test suite every night using the current GitHub build to catch regression. + +#### Canary Tests +Canary tests run frequently, multiple times in a day on live production environment. Given all integration tests run spans for hours we can only run a limited set of tests of most important features along with tests that have dependencies like Load Balancer Service Creation. These test runs are not publicly accessible at this moment. + +Ginkgo Focus: [CANARY] + +### Smoke Tests +Smoke test provide fail early mechanism by failing the test if basic functionality doesn't work. This can be used as a pre-requisite for the running the much longer Integration tests. + +Ginkgo Focus: [SMOKE] + + +#### Work In Progress +- Run Upstream Conformance tests as part of Nightly Integration tests. +- Run All Integration/e2e tests as part of Nightly Integration tests. +- Run all Integration tests on each Pull Request. diff --git a/test/framework/resources/k8s/manifest/deployment.go b/test/framework/resources/k8s/manifest/deployment.go index c821d38dd8..d72fad03f2 100644 --- a/test/framework/resources/k8s/manifest/deployment.go +++ b/test/framework/resources/k8s/manifest/deployment.go @@ -43,7 +43,7 @@ func NewBusyBoxDeploymentBuilder() *DeploymentBuilder { replicas: 10, container: NewBusyBoxContainerBuilder().Build(), labels: map[string]string{"role": "test"}, - nodeSelector: map[string]string{}, + nodeSelector: map[string]string{"kubernetes.io/os": "linux"}, terminationGracePeriod: 0, } } @@ -53,7 +53,7 @@ func NewDefaultDeploymentBuilder() *DeploymentBuilder { namespace: utils.DefaultTestNamespace, terminationGracePeriod: 0, labels: map[string]string{"role": "test"}, - nodeSelector: map[string]string{}, + nodeSelector: map[string]string{"kubernetes.io/os": "linux"}, } } diff --git a/test/framework/resources/k8s/manifest/job.go b/test/framework/resources/k8s/manifest/job.go index 540ba5a5f9..5a7d01d3a8 100644 --- a/test/framework/resources/k8s/manifest/job.go +++ b/test/framework/resources/k8s/manifest/job.go @@ -30,6 +30,7 @@ type JobBuilder struct { terminationGracePeriod int nodeName string hostNetwork bool + nodeSelector map[string]string } func NewDefaultJobBuilder() *JobBuilder { @@ -39,6 +40,7 @@ func NewDefaultJobBuilder() *JobBuilder { parallelism: 1, terminationGracePeriod: 0, labels: map[string]string{}, + nodeSelector: map[string]string{"kubernetes.io/os": "linux"}, } } @@ -47,6 +49,11 @@ func (j *JobBuilder) Name(name string) *JobBuilder { return j } +func (j *JobBuilder) NodeSelector(selectorKey string, selectorVal string) *JobBuilder { + j.nodeSelector[selectorKey] = selectorVal + return j +} + func (j *JobBuilder) Namespace(namespace string) *JobBuilder { j.namespace = namespace return j @@ -100,6 +107,7 @@ func (j *JobBuilder) Build() *batchV1.Job { TerminationGracePeriodSeconds: aws.Int64(int64(j.terminationGracePeriod)), RestartPolicy: v1.RestartPolicyNever, NodeName: j.nodeName, + NodeSelector: j.nodeSelector, }, }, }, diff --git a/test/framework/resources/k8s/manifest/pod.go b/test/framework/resources/k8s/manifest/pod.go index 915c6a9634..f0b0feac62 100644 --- a/test/framework/resources/k8s/manifest/pod.go +++ b/test/framework/resources/k8s/manifest/pod.go @@ -40,7 +40,7 @@ func NewDefaultPodBuilder() *PodBuilder { labels: map[string]string{}, terminationGracePeriod: 0, restartPolicy: v1.RestartPolicyNever, - nodeSelector: map[string]string{}, + nodeSelector: map[string]string{"kubernetes.io/os": "linux"}, } } diff --git a/test/integration-new/cni/pod_traffic_test.go b/test/integration-new/cni/pod_traffic_test.go index 1d27c6689e..7014ddac61 100644 --- a/test/integration-new/cni/pod_traffic_test.go +++ b/test/integration-new/cni/pod_traffic_test.go @@ -187,7 +187,7 @@ var _ = Describe("test pod networking", func() { }) }) - Context("when establishing UDP connection from tester to server", func() { + Context("[CANARY][SMOKE] when establishing UDP connection from tester to server", func() { BeforeEach(func() { serverPort = 2273 protocol = ec2.ProtocolUdp @@ -225,7 +225,7 @@ var _ = Describe("test pod networking", func() { }) }) - Context("when establishing TCP connection from tester to server", func() { + Context("[CANARY][SMOKE] when establishing TCP connection from tester to server", func() { BeforeEach(func() { serverPort = 2273 diff --git a/test/integration-new/cni/service_connectivity_test.go b/test/integration-new/cni/service_connectivity_test.go index 610b6fe20c..3a7a7d923e 100644 --- a/test/integration-new/cni/service_connectivity_test.go +++ b/test/integration-new/cni/service_connectivity_test.go @@ -34,7 +34,7 @@ const ( ) // Verifies connectivity to deployment behind different service types -var _ = Describe("test service connectivity", func() { +var _ = Describe("[CANARY] test service connectivity", func() { var err error // Deployment running the http server diff --git a/test/integration-new/ipamd/eni_ip_leak_test.go b/test/integration-new/ipamd/eni_ip_leak_test.go index db8f958a15..4c82146a36 100644 --- a/test/integration-new/ipamd/eni_ip_leak_test.go +++ b/test/integration-new/ipamd/eni_ip_leak_test.go @@ -17,7 +17,7 @@ const ( HOST_POD_LABEL_VAL = "host" ) -var _ = Describe("ENI/IP Leak Test", func() { +var _ = Describe("[CANARY] ENI/IP Leak Test", func() { Context("ENI/IP Released on Pod Deletion", func() { BeforeEach(func() { By("creating test namespace")