Skip to content

Commit

Permalink
add canary test entrypoint script (#1717)
Browse files Browse the repository at this point in the history
* add canary test entrypoint script

* only run the linux tests on linux nodes

* add details about the type of tests in README

* run tests only on the latest addon version
  • Loading branch information
abhipth authored Nov 2, 2021
1 parent 219f453 commit 6a15a84
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 7 deletions.
122 changes: 122 additions & 0 deletions scripts/run-canary-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/bin/bash

# The script runs amazon-vpc-cni Canary tests on the default
# addon version and then runs smoke test on the latest addon version.

set -e

SECONDS=0
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
INTEGRATION_TEST_DIR="$SCRIPT_DIR/../test/integration-new"
VPC_CNI_ADDON_NAME="vpc-cni"

echo "Running Canary tests for amazon-vpc-cni-k8s with the following variables
KUBE_CONFIG_PATH: $KUBE_CONFIG_PATH
CLUSTER_NAME: $CLUSTER_NAME
REGION: $REGION
ENDPOINT: $ENDPOINT"

if [[ -n "${ENDPOINT}" ]]; then
ENDPOINT_FLAG="--endpoint $ENDPOINT"
fi

function load_cluster_details() {
echo "loading cluster details $CLUSTER_NAME"
DESCRIBE_CLUSTER_OP=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" $ENDPOINT_FLAG)
VPC_ID=$(echo "$DESCRIBE_CLUSTER_OP" | jq -r '.cluster.resourcesVpcConfig.vpcId')
K8S_VERSION=$(echo "$DESCRIBE_CLUSTER_OP" | jq .cluster.version -r)
}

function load_addon_details() {
echo "loading $VPC_CNI_ADDON_NAME addon details"
DESCRIBE_ADDON_VERSIONS=$(aws eks describe-addon-versions --addon-name $VPC_CNI_ADDON_NAME --kubernetes-version "$K8S_VERSION")

LATEST_ADDON_VERSION=$(echo "$DESCRIBE_ADDON_VERSIONS" | jq '.addons[0].addonVersions[0].addonVersion' -r)
DEFAULT_ADDON_VERSION=$(echo "$DESCRIBE_ADDON_VERSIONS" | jq -r '.addons[].addonVersions[] | select(.compatibilities[0].defaultVersion == true) | .addonVersion')
}

function wait_for_addon_status() {
local expected_status=$1

if [ "$expected_status" = "DELETED" ]; then
while $(aws eks describe-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME); do
echo "addon is still not deleted"
sleep 5
done
echo "addon deleted"
return
fi

while true
do
STATUS=$(aws eks describe-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME | jq -r '.addon.status')
if [ "$STATUS" = "$expected_status" ]; then
echo "addon status matches expected status"
return
fi
echo "addon status is not equal to $expected_status"
sleep 5
done
}

function install_add_on() {
local new_addon_version=$1

if DESCRIBE_ADDON=$(aws eks describe-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME); then
local current_addon_version=$(echo "$DESCRIBE_ADDON" | jq '.addon.addonVersion' -r)
if [ "$new_addon_version" != "$current_addon_version" ]; then
echo "deleting the $current_addon_version to install $new_addon_version"
aws eks delete-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name "$VPC_CNI_ADDON_NAME"
wait_for_addon_status "DELETED"
else
echo "addon version $current_addon_version already installed"
patch_aws_node_maxunavialable
return
fi
fi

echo "installing addon $new_addon_version"
aws eks create-addon $ENDPOINT_FLAG --cluster-name "$CLUSTER_NAME" --addon-name $VPC_CNI_ADDON_NAME --resolve-conflicts OVERWRITE --addon-version $new_addon_version
patch_aws_node_maxunavialable
wait_for_addon_status "ACTIVE"
}

function patch_aws_node_maxunavialable() {
# Patch the aws-node, so any update in aws-node happens parallely for faster overall test execution
kubectl patch ds -n kube-system aws-node -p '{"spec":{"updateStrategy":{"rollingUpdate":{"maxUnavailable": "100%"}}}}'
}

function run_ginkgo_test() {
local focus=$1
echo "Running ginkgo tests with focus: $focus"
(cd "$INTEGRATION_TEST_DIR/cni" && ginkgo --focus="$focus" -v --timeout 20m --failOnPending -- --cluster-kubeconfig="$KUBE_CONFIG_PATH" --cluster-name="$CLUSTER_NAME" --aws-region="$REGION" --aws-vpc-id="$VPC_ID" --ng-name-label-key="kubernetes.io/os" --ng-name-label-val="linux")
(cd "$INTEGRATION_TEST_DIR/ipamd" && ginkgo --focus="$focus" -v --timeout 10m --failOnPending -- --cluster-kubeconfig="$KUBE_CONFIG_PATH" --cluster-name="$CLUSTER_NAME" --aws-region="$REGION" --aws-vpc-id="$VPC_ID" --ng-name-label-key="kubernetes.io/os" --ng-name-label-val="linux")
}

load_cluster_details
load_addon_details

# TODO: v1.7.5 restarts continiously if IMDS goes out of sync, the issue is mitigated
# from v.1.8.0 onwards, once the default addon is updated to v1.8.0+ we should uncomment
# the following code. See: https://github.com/aws/amazon-vpc-cni-k8s/issues/1340

# Run more comprehensive test on the default addon version. CANARY focused tests
# tests basic functionlity plus test that could detect issues with dependencies
# early on.
#echo "Running Canary tests on the default addon version"
#install_add_on "$DEFAULT_ADDON_VERSION"
#run_ginkgo_test "CANARY"

# Run smoke test on the latest addon version. Smoke tests contains a subset of test
# used in Canary tests.
#echo "Running Smoke tests on the latest addon version"
#install_add_on "$LATEST_ADDON_VERSION"
#run_ginkgo_test "SMOKE"

# TODO: Remove the following code once the v1.8.0+ is made the default addon version
echo "Running Canary tests on the latest addon version"
install_add_on "$LATEST_ADDON_VERSION"
run_ginkgo_test "CANARY"


echo "all tests ran successfully in $(($SECONDS / 60)) minutes and $(($SECONDS % 60)) seconds"
26 changes: 26 additions & 0 deletions test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
## amazon-vpc-cni-k8s Test Framework
The test framework consists of integration and e2e tests using the Ginkgo framework invoked manually and using collection of bash scripts using GitHub Workflow and Prow(not publicly available).

### Types of Tests

#### Pull Request Tests
Runs on each Pull Request, verifies the new code changes don't introduce any regression. Given the entire test suite may span for long duration, run only the integration tests.

#### Nightly Integration Tests
Runs the entire test suite every night using the current GitHub build to catch regression.

#### Canary Tests
Canary tests run frequently, multiple times in a day on live production environment. Given all integration tests run spans for hours we can only run a limited set of tests of most important features along with tests that have dependencies like Load Balancer Service Creation. These test runs are not publicly accessible at this moment.

Ginkgo Focus: [CANARY]

### Smoke Tests
Smoke test provide fail early mechanism by failing the test if basic functionality doesn't work. This can be used as a pre-requisite for the running the much longer Integration tests.

Ginkgo Focus: [SMOKE]


#### Work In Progress
- Run Upstream Conformance tests as part of Nightly Integration tests.
- Run All Integration/e2e tests as part of Nightly Integration tests.
- Run all Integration tests on each Pull Request.
4 changes: 2 additions & 2 deletions test/framework/resources/k8s/manifest/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func NewBusyBoxDeploymentBuilder() *DeploymentBuilder {
replicas: 10,
container: NewBusyBoxContainerBuilder().Build(),
labels: map[string]string{"role": "test"},
nodeSelector: map[string]string{},
nodeSelector: map[string]string{"kubernetes.io/os": "linux"},
terminationGracePeriod: 0,
}
}
Expand All @@ -53,7 +53,7 @@ func NewDefaultDeploymentBuilder() *DeploymentBuilder {
namespace: utils.DefaultTestNamespace,
terminationGracePeriod: 0,
labels: map[string]string{"role": "test"},
nodeSelector: map[string]string{},
nodeSelector: map[string]string{"kubernetes.io/os": "linux"},
}
}

Expand Down
8 changes: 8 additions & 0 deletions test/framework/resources/k8s/manifest/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type JobBuilder struct {
terminationGracePeriod int
nodeName string
hostNetwork bool
nodeSelector map[string]string
}

func NewDefaultJobBuilder() *JobBuilder {
Expand All @@ -39,6 +40,7 @@ func NewDefaultJobBuilder() *JobBuilder {
parallelism: 1,
terminationGracePeriod: 0,
labels: map[string]string{},
nodeSelector: map[string]string{"kubernetes.io/os": "linux"},
}
}

Expand All @@ -47,6 +49,11 @@ func (j *JobBuilder) Name(name string) *JobBuilder {
return j
}

func (j *JobBuilder) NodeSelector(selectorKey string, selectorVal string) *JobBuilder {
j.nodeSelector[selectorKey] = selectorVal
return j
}

func (j *JobBuilder) Namespace(namespace string) *JobBuilder {
j.namespace = namespace
return j
Expand Down Expand Up @@ -100,6 +107,7 @@ func (j *JobBuilder) Build() *batchV1.Job {
TerminationGracePeriodSeconds: aws.Int64(int64(j.terminationGracePeriod)),
RestartPolicy: v1.RestartPolicyNever,
NodeName: j.nodeName,
NodeSelector: j.nodeSelector,
},
},
},
Expand Down
2 changes: 1 addition & 1 deletion test/framework/resources/k8s/manifest/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func NewDefaultPodBuilder() *PodBuilder {
labels: map[string]string{},
terminationGracePeriod: 0,
restartPolicy: v1.RestartPolicyNever,
nodeSelector: map[string]string{},
nodeSelector: map[string]string{"kubernetes.io/os": "linux"},
}
}

Expand Down
4 changes: 2 additions & 2 deletions test/integration-new/cni/pod_traffic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ var _ = Describe("test pod networking", func() {
})
})

Context("when establishing UDP connection from tester to server", func() {
Context("[CANARY][SMOKE] when establishing UDP connection from tester to server", func() {
BeforeEach(func() {
serverPort = 2273
protocol = ec2.ProtocolUdp
Expand Down Expand Up @@ -225,7 +225,7 @@ var _ = Describe("test pod networking", func() {
})
})

Context("when establishing TCP connection from tester to server", func() {
Context("[CANARY][SMOKE] when establishing TCP connection from tester to server", func() {

BeforeEach(func() {
serverPort = 2273
Expand Down
2 changes: 1 addition & 1 deletion test/integration-new/cni/service_connectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const (
)

// Verifies connectivity to deployment behind different service types
var _ = Describe("test service connectivity", func() {
var _ = Describe("[CANARY] test service connectivity", func() {
var err error

// Deployment running the http server
Expand Down
2 changes: 1 addition & 1 deletion test/integration-new/ipamd/eni_ip_leak_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const (
HOST_POD_LABEL_VAL = "host"
)

var _ = Describe("ENI/IP Leak Test", func() {
var _ = Describe("[CANARY] ENI/IP Leak Test", func() {
Context("ENI/IP Released on Pod Deletion", func() {
BeforeEach(func() {
By("creating test namespace")
Expand Down

0 comments on commit 6a15a84

Please sign in to comment.