From 5900884df57da594d346bb3df369096480c34f49 Mon Sep 17 00:00:00 2001 From: Saylor Berman Date: Tue, 5 Mar 2024 15:25:58 -0700 Subject: [PATCH] Automate longevity test Problem: NFR tests are a burden to run manually, taking a lot of time and effort. Solution: Automate the longevity test to make it easier and faster for a developer to run this test. This test will be run separately from the other NFR tests, due to the fact that it is long-lived. It should not be run in the pipeline. There is still a manual step of collecting dashboard results. Also separated out functional and nfr tests in the Makefile and README to better separate the two types of tests. These changes force NFR tests to be run in a GKE environment. --- .github/workflows/nfr.yml | 4 +- .gitignore | 3 + .yamllint.yaml | 2 +- tests/Makefile | 78 ++++++--- tests/README.md | 95 +++++++++-- tests/framework/results.go | 9 ++ tests/longevity/longevity.md | 151 ------------------ .../longevity}/1.0.0/1.0.0.md | 0 .../longevity}/1.0.0/cpu.png | Bin .../longevity}/1.0.0/memory.png | Bin .../longevity}/1.0.0/reload-time.png | Bin .../longevity}/1.0.0/reloads.png | Bin .../longevity}/1.0.0/stub-status.png | Bin .../longevity}/1.1.0/1.1.0.md | 0 .../longevity}/1.1.0/cpu.png | Bin .../longevity}/1.1.0/memory.png | Bin .../longevity}/1.1.0/reload-time.png | Bin .../longevity}/1.1.0/reloads.png | Bin .../longevity}/1.1.0/stub-status.png | Bin tests/scripts/create-gke-cluster.sh | 3 +- tests/scripts/remote-scripts/install-deps.sh | 2 +- tests/scripts/remote-scripts/run-nfr-tests.sh | 19 +++ tests/scripts/run-tests-gcp-vm.sh | 16 +- tests/scripts/sync-files-to-vm.sh | 9 ++ tests/suite/dataplane_perf_test.go | 2 +- tests/suite/longevity_test.go | 137 ++++++++++++++++ .../manifests/longevity}/cafe-routes.yaml | 0 .../manifests/longevity}/cafe-secret.yaml | 0 .../manifests/longevity}/cafe.yaml | 0 .../manifests/longevity}/cronjob.yaml | 10 +- .../manifests/longevity}/gateway.yaml | 0 .../manifests/longevity}/prom.yaml | 2 +- tests/suite/sample_test.go | 2 +- tests/suite/scripts/longevity-wrk.sh | 9 ++ tests/suite/system_suite_test.go | 60 +++++-- tests/suite/upgrade_test.go | 17 +- 36 files changed, 391 insertions(+), 239 deletions(-) delete mode 100644 tests/longevity/longevity.md rename tests/{longevity/results => results/longevity}/1.0.0/1.0.0.md (100%) rename tests/{longevity/results => results/longevity}/1.0.0/cpu.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/memory.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/reload-time.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/reloads.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/stub-status.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/1.1.0.md (100%) rename tests/{longevity/results => results/longevity}/1.1.0/cpu.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/memory.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/reload-time.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/reloads.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/stub-status.png (100%) create mode 100644 tests/scripts/remote-scripts/run-nfr-tests.sh create mode 100755 tests/scripts/sync-files-to-vm.sh create mode 100644 tests/suite/longevity_test.go rename tests/{longevity/manifests => suite/manifests/longevity}/cafe-routes.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/cafe-secret.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/cafe.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/cronjob.yaml (86%) rename tests/{longevity/manifests => suite/manifests/longevity}/gateway.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/prom.yaml (79%) create mode 100755 tests/suite/scripts/longevity-wrk.sh diff --git a/.github/workflows/nfr.yml b/.github/workflows/nfr.yml index dabc986878..8e435b654e 100644 --- a/.github/workflows/nfr.yml +++ b/.github/workflows/nfr.yml @@ -144,9 +144,9 @@ jobs: working-directory: ./tests run: | if ${{ inputs.test_label != 'all' }}; then - sed -i '/^GINKGO_LABEL=/s/=.*/="${{ inputs.test_label }}"/' "scripts/vars.env" && make run-tests-on-vm; + sed -i '/^GINKGO_LABEL=/s/=.*/="${{ inputs.test_label }}"/' "scripts/vars.env" && make nfr-test; else - make run-tests-on-vm; + make nfr-test; fi - name: Cleanup diff --git a/.gitignore b/.gitignore index 81ad399f73..a87ca2ab36 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ internal/mode/static/nginx/modules/coverage # Credential files **/gha-creds-*.json + +# SSH config files +*.ssh diff --git a/.yamllint.yaml b/.yamllint.yaml index 20470b80df..478262b7dd 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -41,7 +41,7 @@ rules: .github/ deploy/manifests/nginx-gateway.yaml deploy/manifests/crds - tests/longevity/manifests/cronjob.yaml + tests/suite/manifests/longevity/cronjob.yaml .goreleaser.yml new-line-at-end-of-file: enable new-lines: enable diff --git a/tests/Makefile b/tests/Makefile index 6b32e47d07..a561b7ea87 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -32,6 +32,10 @@ help: Makefile ## Display this help create-kind-cluster: ## Create a kind cluster cd .. && make create-kind-cluster +.PHONY: delete-kind-cluster +delete-kind-cluster: ## Delete kind cluster + kind delete cluster + .PHONY: build-images build-images: ## Build NGF and NGINX images cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images @@ -48,46 +52,70 @@ load-images: ## Load NGF and NGINX images on configured kind cluster load-images-with-plus: ## Load NGF and NGINX Plus images on configured kind cluster cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) load-images-with-plus -test: ## Run the system tests against your default k8s cluster - go test -v ./suite $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) \ - --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ - --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ - --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ - --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) +.PHONY: setup-gcp-and-run-tests +setup-gcp-and-run-tests: create-gke-router create-and-setup-vm run-tests-on-vm ## Create and setup a GKE router and GCP VM for tests and run the functional tests -.PHONY: delete-kind-cluster -delete-kind-cluster: ## Delete kind cluster - kind delete cluster +.PHONY: setup-gcp-and-run-nfr-tests +setup-gcp-and-run-nfr-tests: create-gke-router create-and-setup-vm nfr-test ## Create and setup a GKE router and GCP VM for tests and run the NFR tests -.PHONY: run-tests-on-vm -run-tests-on-vm: ## Run the tests on a GCP VM - bash scripts/run-tests-gcp-vm.sh +.PHONY: create-gke-cluster +create-gke-cluster: ## Create a GKE cluster + bash scripts/create-gke-cluster.sh $(CI) .PHONY: create-and-setup-vm create-and-setup-vm: ## Create and setup a GCP VM for tests bash scripts/create-and-setup-gcp-vm.sh -.PHONY: cleanup-vm -cleanup-vm: ## Delete the test GCP VM and delete the firewall rule - bash scripts/cleanup-vm.sh - .PHONY: create-gke-router create-gke-router: ## Create a GKE router to allow egress traffic from private nodes (allows for external image pulls) bash scripts/create-gke-router.sh -.PHONY: cleanup-router -cleanup-router: ## Delete the GKE router - bash scripts/cleanup-router.sh +.PHONY: sync-files-to-vm +sync-files-to-vm: ## Syncs your local NGF files with the NGF repo on the VM + bash scripts/sync-files-to-vm.sh -.PHONY: setup-gcp-and-run-tests -setup-gcp-and-run-tests: create-gke-router create-and-setup-vm run-tests-on-vm ## Create and setup a GKE router and GCP VM for tests and run the tests +.PHONY: run-tests-on-vm +run-tests-on-vm: ## Run the functional tests on a GCP VM + bash scripts/run-tests-gcp-vm.sh + +.PHONY: nfr-test +nfr-test: ## Run the NFR tests on a GCP VM + bash scripts/run-tests-gcp-vm.sh true + +.PHONY: start-longevity-test +start-longevity-test: ## Start the longevity test to run for 4 days in GKE + START_LONGEVITY=true $(MAKE) nfr-test + +.PHONY: stop-longevity-test +stop-longevity-test: ## Stops the longevity test and collects results + STOP_LONGEVITY=true $(MAKE) nfr-test + +.PHONY: .vm-nfr-test +.vm-nfr-test: ## Runs the NFR tests on the GCP VM (called by `nfr-test`) + go test -v ./suite -ginkgo.label-filter "nfr" $(GINKGO_FLAGS) -ginkgo.v -args --gateway-api-version=$(GW_API_VERSION) \ + --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ + --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ + --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ + --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) + +.PHONY: test +test: ## Runs the functional tests on your default k8s cluster + go test -v ./suite -ginkgo.label-filter "functional" $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) \ + --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ + --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ + --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ + --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) .PHONY: cleanup-gcp cleanup-gcp: cleanup-router cleanup-vm delete-gke-cluster ## Cleanup all GCP resources -.PHONY: create-gke-cluster -create-gke-cluster: ## Create a GKE cluster - bash scripts/create-gke-cluster.sh $(CI) +.PHONY: cleanup-router +cleanup-router: ## Delete the GKE router + bash scripts/cleanup-router.sh + +.PHONY: cleanup-vm +cleanup-vm: ## Delete the test GCP VM and delete the firewall rule + bash scripts/cleanup-vm.sh .PHONY: delete-gke-cluster delete-gke-cluster: ## Delete the GKE cluster @@ -95,4 +123,4 @@ delete-gke-cluster: ## Delete the GKE cluster .PHONY: add-local-ip-to-cluster add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-networks - bash scripts/add-local-ip-to-cluster.sh + bash scripts/add-local-ip-auth-networks.sh diff --git a/tests/README.md b/tests/README.md index 07a8ea141e..9c81467dd6 100644 --- a/tests/README.md +++ b/tests/README.md @@ -4,19 +4,22 @@ The tests in this directory are meant to be run on a live Kubernetes environment are similar to the existing [conformance tests](../conformance/README.md), but will verify things such as: - NGF-specific functionality -- Non-Functional requirements testing (such as performance, scale, etc.) +- Non-Functional requirements (NFR) testing (such as performance, scale, etc.) When running locally, the tests create a port-forward from your NGF Pod to localhost using a port chosen by the test framework. Traffic is sent over this port. If running on a GCP VM targeting a GKE cluster, the tests will create an internal LoadBalancer service which will receive the test traffic. +**Important**: NFR tests can only be run on a GKE cluster. + Directory structure is as follows: - `framework`: contains utility functions for running the tests -- `suite`: contains the test files - `results`: contains the results files +- `scripts`: contain scripts used to set up the environment and run the tests +- `suite`: contains the test files -**Note**: Existing NFR tests will be migrated into this testing `suite` and results stored in the `results` directory. +> Note: Existing NFR tests will be migrated into this testing `suite` and results stored in the `results` directory. ## Prerequisites @@ -24,13 +27,13 @@ Directory structure is as follows: - Docker. - Golang. -If running the tests on a VM (`make create-vm-and-run-tests` or `make run-tests-on-vm`): +If running NFR tests, or running functional tests in GKE: - The [gcloud CLI](https://cloud.google.com/sdk/docs/install) - A GKE cluster (if `master-authorized-networks` is enabled, please set `ADD_VM_IP_AUTH_NETWORKS=true` in your vars.env file) - Access to GCP Service Account with Kubernetes admin permissions -**Note**: all commands in steps below are executed from the `tests` directory +> Note: all commands in steps below are executed from the `tests` directory ```shell make @@ -52,9 +55,14 @@ delete-kind-cluster Delete kind cluster help Display this help load-images-with-plus Load NGF and NGINX Plus images on configured kind cluster load-images Load NGF and NGINX images on configured kind cluster -run-tests-on-vm Run the tests on a GCP VM -setup-gcp-and-run-tests Create and setup a GKE router and GCP VM for tests and run the tests -test Run the system tests against your default k8s cluster +nfr-test Run the NFR tests on a GCP VM +run-tests-on-vm Run the functional tests on a GCP VM +setup-gcp-and-run-nfr-tests Create and setup a GKE router and GCP VM for tests and run the NFR tests +setup-gcp-and-run-tests Create and setup a GKE router and GCP VM for tests and run the functional tests +start-longevity-test Start the longevity test to run for 4 days in GKE +stop-longevity-test Stops the longevity test and collects results +sync-files-to-vm Syncs your local NGF files with the NGF repo on the VM +test Runs the functional tests on your default k8s cluster ``` **Note:** The following variables are configurable when running the below `make` commands: @@ -78,6 +86,8 @@ test Run the system tests against your default k8s clu This can be done in a cloud provider of choice, or locally using `kind`. +**Important**: NFR tests can only be run on a GKE cluster. + To create a local `kind` cluster: ```makefile @@ -128,7 +138,7 @@ make build-images-with-plus load-images-with-plus TAG=$(whoami) ## Step 3 - Run the tests -### 3a - Run the tests locally +### 3a - Run the functional tests locally ```makefile make test TAG=$(whoami) @@ -142,9 +152,9 @@ make test TAG=$(whoami) PLUS_ENABLED=true ### 3b - Run the tests on a GKE cluster from a GCP VM -This step only applies if you would like to run the tests on a GKE cluster from a GCP based VM. +This step only applies if you are running the NFR tests, or would like to run the functional tests on a GKE cluster from a GCP based VM. -Before running the below `make` command, copy the `scripts/vars.env-example` file to `scripts/vars.env` and populate the +Before running the below `make` commands, copy the `scripts/vars.env-example` file to `scripts/vars.env` and populate the required env vars. `GKE_SVC_ACCOUNT` needs to be the name of a service account that has Kubernetes admin permissions. In order to run the tests in GCP, you need a few things: @@ -153,30 +163,81 @@ In order to run the tests in GCP, you need a few things: - this assumes that your GKE cluster is using private nodes. If using public nodes, you don't need this. - GCP VM and firewall rule to send ingress traffic to GKE +To just set up the VM with no router (this will not run the tests): + +```makefile +make create-and-setup-vm +``` + +Otherwise, you can set up the VM, router, and run the tests with a single command. See the options in the sections below. + +By default, the tests run using the version of NGF that was `git cloned` during the setup. If you want to make +incremental changes and copy your local changes to the VM to test, you can run + +```makefile +make sync-files-to-vm +``` + +#### Functional Tests + To set up the GCP environment with the router and VM and then run the tests, run the following command: ```makefile make setup-gcp-and-run-tests ``` -If you just need a VM and no router (this will not run the tests): +To use an existing VM to run the tests, run the following ```makefile -make create-and-setup-vm +make run-tests-on-vm +``` + +#### NFR tests + +To set up the GCP environment with the router and VM and then run the tests, run the following command: + + +```makefile +make setup-gcp-and-run-nfr-tests ``` To use an existing VM to run the tests, run the following ```makefile -make run-tests-on-vm +make nfr-test ``` +##### Longevity testing + +This test is run on its own (and also not in a pipeline) due to its long-running nature. It will run for 4 days before +the tester must collect the results and complete the test. + +To start the longevity test, set up your VM (`create-and-setup-vm`) and run + +```makefile +make start-longevity-test +``` + +> Note: If you want to re-run the longevity test, you need to clear out the `cafe.example.com` entry from the `/etc/hosts` file on your VM. + +You can verify the test is working by checking nginx logs to see traffic flow, and check that the cronjob is running and redeploying apps. + +To complete the longevity test and collect results, first visit the [GCP Monitoring Dashboards](https://console.cloud.google.com/monitoring/dashboards) page and select the `NGF Longevity Test` dashboard. Take PNG screenshots of each chart for the time period in which your test ran, and save those to be added to the results file. + +Next, run: + +```makefile +make stop-longevity-test +``` + +This will tear down the test and collect results into a file, where you can add the PNGs of the dashboard. + ### Common test amendments -To run all tests with the label "performance", use the GINKGO_LABEL variable: +To run all tests with the label "my-label", use the GINKGO_LABEL variable: ```makefile -make test TAG=$(whoami) GINKGO_LABEL=performance +make test TAG=$(whoami) GINKGO_LABEL=my-label ``` or to pass a specific flag, e.g. run a specific test, use the GINKGO_FLAGS variable: @@ -185,6 +246,8 @@ or to pass a specific flag, e.g. run a specific test, use the GINKGO_FLAGS varia make test TAG=$(whoami) GINKGO_FLAGS='-ginkgo.focus "writes the system info to a results file"' ``` +> Note: if filtering on NFR tests (or functional tests on GKE), set the filter in the appropriate field in your `vars.env` file. + If you are running the tests in GCP, add your required label/ flags to `scripts/var.env`. You can also modify the tests code for a similar outcome. To run a specific test, you can "focus" it by adding the `F` diff --git a/tests/framework/results.go b/tests/framework/results.go index 5ea944563b..429dd40e6e 100644 --- a/tests/framework/results.go +++ b/tests/framework/results.go @@ -77,6 +77,15 @@ func WriteResults(resultsFile *os.File, metrics *Metrics) error { return reporter.Report(resultsFile) } +// WriteContent writes basic content to the results file. +func WriteContent(resultsFile *os.File, content string) error { + if _, err := fmt.Fprintln(resultsFile, content); err != nil { + return err + } + + return nil +} + // NewCSVEncoder returns a vegeta CSV encoder. func NewCSVEncoder(w io.Writer) vegeta.Encoder { return vegeta.NewCSVEncoder(w) diff --git a/tests/longevity/longevity.md b/tests/longevity/longevity.md deleted file mode 100644 index 1271678cca..0000000000 --- a/tests/longevity/longevity.md +++ /dev/null @@ -1,151 +0,0 @@ -# Longevity Test - -This document describes how we test NGF for longevity. - - - -- [Longevity Test](#longevity-test) - - [Goals](#goals) - - [Test Environment](#test-environment) - - [Steps](#steps) - - [Start](#start) - - [Check the Test is Running Correctly](#check-the-test-is-running-correctly) - - [End](#end) - - [Analyze](#analyze) - - [Results](#results) - - - -## Goals - -- Ensure that NGF successfully processes both control plane and data plane transactions over a period of time much - greater than in our other tests. -- Catch bugs that could only appear over a period of time (like resource leaks). - -## Test Environment - -- A Kubernetes cluster with 3 nodes on GKE - - Node: e2-medium (2 vCPU, 4GB memory) - - Enabled GKE logging. - - Enabled GKE Cloud monitoring with managed Prometheus service, with enabled: - - system. - - kube state - pods, deployments. -- Tester VMs on Google Cloud: - - Configuration: - - Debian - - Install packages: tmux, wrk - - Location - same zone as the Kubernetes cluster. - - First VM - for HTTP traffic - - Second VM - for sending HTTPs traffic -- NGF - - Deployment with 1 replica - - Exposed via a Service with type LoadBalancer, private IP - - Gateway, two listeners - HTTP and HTTPs - - Two apps: - - Coffee - 3 replicas - - Tea - 3 replicas - - Two HTTPRoutes - - Coffee (HTTP) - - Tea (HTTPS) - -## Steps - -### Start - -Test duration - 4 days. - -1. Create a Kubernetes cluster on GKE. -2. Deploy NGF. -3. Expose NGF via a LoadBalancer Service with `"networking.gke.io/load-balancer-type":"Internal"` annotation to - allocate an internal load balancer. -4. Apply the manifests which will: - 1. Deploy the coffee and tea backends. - 2. Configure HTTP and HTTPS listeners on the Gateway. - 3. Expose coffee via HTTP listener and tea via HTTPS listener. - 4. Create two CronJobs to re-rollout backends: - 1. Coffee - every minute for an hour every 6 hours - 2. Tea - every minute for an hour every 6 hours, 3 hours apart from coffee. - 5. Configure Prometheus on GKE to pick up NGF metrics (NB: Ensure that the `app.kubernetes.io/name` label matches - your NGF deployment). - - ```shell - kubectl apply -f files - ``` - -5. In Tester VMs, update `/etc/hosts` to have an entry with the External IP of the NGF Service (`10.128.0.10` in this - case): - - ```text - 10.128.0.10 cafe.example.com - ``` - -6. In Tester VMs, start a tmux session (this is needed so that even if you disconnect from the VM, any launched command - will keep running): - - ```shell - tmux - ``` - -7. In First VM, start wrk for 4 days for coffee via HTTP: - - ```shell - wrk -t2 -c100 -d96h http://cafe.example.com/coffee - ``` - -8. In Second VM, start wrk for 4 days for tea via HTTPS: - - ```shell - wrk -t2 -c100 -d96h https://cafe.example.com/tea - ``` - -Notes: - -- The updated coffee and tea backends in cafe.yaml include extra configuration for zero time upgrades, so that - wrk in Tester VMs doesn't get 502 from NGF. Based on https://learnk8s.io/graceful-shutdown - -### Check the Test is Running Correctly - -Check that you don't see any errors: - -1. Check that GKE exports NGF pod logs to Google Cloud Operations Logging and Prometheus metrics to Google Cloud - Monitoring. -2. Check that traffic is flowing - look at the access logs of NGINX in Google Cloud Operations Logging. -3. Check that CronJob can run. - - ```shell - kubectl create job --from=cronjob/coffee-rollout-mgr coffee-test - kubectl create job --from=cronjob/tea-rollout-mgr tea-test - ``` - -In case of errors, double check if you prepared the environment and launched the test correctly. - -### End - -- Remove CronJobs. - -## Analyze - -- Traffic - - Tester VMs (clients) - - As wrk stop, they will print output upon termination. To connect to the tmux session with wrk, - run `tmux attach -t 0` - - Check for errors, latency, RPS -- Logs - - Check the logs for errors in Google Cloud Operations Logging. - - NGF - - NGINX -- Check metrics in Google Cloud Monitoring. - - NGF - - CPU usage - - NGINX - - NGF - - Memory usage - - NGINX - - NGF - - NGINX metrics - - Reloads - -## Results - -- [1.0.0](results/1.0.0/1.0.0.md) -- [1.1.0](results/1.1.0/1.1.0.md) diff --git a/tests/longevity/results/1.0.0/1.0.0.md b/tests/results/longevity/1.0.0/1.0.0.md similarity index 100% rename from tests/longevity/results/1.0.0/1.0.0.md rename to tests/results/longevity/1.0.0/1.0.0.md diff --git a/tests/longevity/results/1.0.0/cpu.png b/tests/results/longevity/1.0.0/cpu.png similarity index 100% rename from tests/longevity/results/1.0.0/cpu.png rename to tests/results/longevity/1.0.0/cpu.png diff --git a/tests/longevity/results/1.0.0/memory.png b/tests/results/longevity/1.0.0/memory.png similarity index 100% rename from tests/longevity/results/1.0.0/memory.png rename to tests/results/longevity/1.0.0/memory.png diff --git a/tests/longevity/results/1.0.0/reload-time.png b/tests/results/longevity/1.0.0/reload-time.png similarity index 100% rename from tests/longevity/results/1.0.0/reload-time.png rename to tests/results/longevity/1.0.0/reload-time.png diff --git a/tests/longevity/results/1.0.0/reloads.png b/tests/results/longevity/1.0.0/reloads.png similarity index 100% rename from tests/longevity/results/1.0.0/reloads.png rename to tests/results/longevity/1.0.0/reloads.png diff --git a/tests/longevity/results/1.0.0/stub-status.png b/tests/results/longevity/1.0.0/stub-status.png similarity index 100% rename from tests/longevity/results/1.0.0/stub-status.png rename to tests/results/longevity/1.0.0/stub-status.png diff --git a/tests/longevity/results/1.1.0/1.1.0.md b/tests/results/longevity/1.1.0/1.1.0.md similarity index 100% rename from tests/longevity/results/1.1.0/1.1.0.md rename to tests/results/longevity/1.1.0/1.1.0.md diff --git a/tests/longevity/results/1.1.0/cpu.png b/tests/results/longevity/1.1.0/cpu.png similarity index 100% rename from tests/longevity/results/1.1.0/cpu.png rename to tests/results/longevity/1.1.0/cpu.png diff --git a/tests/longevity/results/1.1.0/memory.png b/tests/results/longevity/1.1.0/memory.png similarity index 100% rename from tests/longevity/results/1.1.0/memory.png rename to tests/results/longevity/1.1.0/memory.png diff --git a/tests/longevity/results/1.1.0/reload-time.png b/tests/results/longevity/1.1.0/reload-time.png similarity index 100% rename from tests/longevity/results/1.1.0/reload-time.png rename to tests/results/longevity/1.1.0/reload-time.png diff --git a/tests/longevity/results/1.1.0/reloads.png b/tests/results/longevity/1.1.0/reloads.png similarity index 100% rename from tests/longevity/results/1.1.0/reloads.png rename to tests/results/longevity/1.1.0/reloads.png diff --git a/tests/longevity/results/1.1.0/stub-status.png b/tests/results/longevity/1.1.0/stub-status.png similarity index 100% rename from tests/longevity/results/1.1.0/stub-status.png rename to tests/results/longevity/1.1.0/stub-status.png diff --git a/tests/scripts/create-gke-cluster.sh b/tests/scripts/create-gke-cluster.sh index 20e7c08bcf..1e05db4d4b 100644 --- a/tests/scripts/create-gke-cluster.sh +++ b/tests/scripts/create-gke-cluster.sh @@ -14,7 +14,8 @@ gcloud container clusters create ${GKE_CLUSTER_NAME} \ --service-account ${GKE_NODES_SERVICE_ACCOUNT} \ --enable-private-nodes \ --master-ipv4-cidr 172.16.${ip_random_digit}.32/28 \ - --metadata=block-project-ssh-keys=TRUE + --metadata=block-project-ssh-keys=TRUE \ + --monitoring=SYSTEM,POD,DEPLOYMENT # Add current IP to GKE master control node access, if this script is not invoked during a CI run. if [ "${IS_CI}" = "false" ]; then diff --git a/tests/scripts/remote-scripts/install-deps.sh b/tests/scripts/remote-scripts/install-deps.sh index 371f75ff62..1196a1f21c 100644 --- a/tests/scripts/remote-scripts/install-deps.sh +++ b/tests/scripts/remote-scripts/install-deps.sh @@ -4,7 +4,7 @@ set -e source ~/vars.env -sudo apt-get -y update && sudo apt-get -y install git make kubectl google-cloud-sdk-gke-gcloud-auth-plugin jq gnuplot && \ +sudo apt-get -y update && sudo apt-get -y install git make kubectl google-cloud-sdk-gke-gcloud-auth-plugin jq gnuplot rsync wrk && \ curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash && \ export GO_VERSION=$(curl -sSL "https://golang.org/dl/?mode=json" | jq -r '.[0].version') && \ wget https://go.dev/dl/${GO_VERSION}.linux-amd64.tar.gz && \ diff --git a/tests/scripts/remote-scripts/run-nfr-tests.sh b/tests/scripts/remote-scripts/run-nfr-tests.sh new file mode 100644 index 0000000000..10b4c1ea06 --- /dev/null +++ b/tests/scripts/remote-scripts/run-nfr-tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +source ~/vars.env + +echo "export PATH=$PATH:/usr/local/go/bin" >> $HOME/.profile && . $HOME/.profile + +if [ "$START_LONGEVITY" == "true" ]; then + GINKGO_LABEL="longevity-setup" +elif [ "$STOP_LONGEVITY" == "true" ]; then + GINKGO_LABEL="longevity-teardown" +fi + +cd nginx-gateway-fabric/tests && make .vm-nfr-test TAG=${TAG} PREFIX=${PREFIX} NGINX_PREFIX=${NGINX_PREFIX} NGINX_PLUS_PREFIX=${NGINX_PLUS_PREFIX} PLUS_ENABLED=${PLUS_ENABLED} GINKGO_LABEL=${GINKGO_LABEL} GINKGO_FLAGS=${GINKGO_FLAGS} PULL_POLICY=Always GW_SERVICE_TYPE=LoadBalancer GW_SVC_GKE_INTERNAL=true NGF_VERSION=${NGF_VERSION} + +if [ "$START_LONGEVITY" == "true" ]; then + suite/scripts/longevity-wrk.sh +fi diff --git a/tests/scripts/run-tests-gcp-vm.sh b/tests/scripts/run-tests-gcp-vm.sh index 2a407bfa19..521cd078ae 100644 --- a/tests/scripts/run-tests-gcp-vm.sh +++ b/tests/scripts/run-tests-gcp-vm.sh @@ -2,10 +2,22 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +NFR=${1:-false} + source scripts/vars.env +SCRIPT=run-tests.sh +if [ "${NFR}" = "true" ]; then + SCRIPT=run-nfr-tests.sh +fi + gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} ${SCRIPT_DIR}/vars.env username@${RESOURCE_NAME}:~ -gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} username@${RESOURCE_NAME} --command="bash -s" < ${SCRIPT_DIR}/remote-scripts/run-tests.sh +gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} username@${RESOURCE_NAME} \ + --command="export START_LONGEVITY=${START_LONGEVITY} &&\ + export STOP_LONGEVITY=${STOP_LONGEVITY} &&\ + bash -s" < ${SCRIPT_DIR}/remote-scripts/${SCRIPT} -gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --recurse username@${RESOURCE_NAME}:~/nginx-gateway-fabric/tests/results . +if [ "${NFR}" = "true" ]; then + gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --recurse username@${RESOURCE_NAME}:~/nginx-gateway-fabric/tests/results . +fi diff --git a/tests/scripts/sync-files-to-vm.sh b/tests/scripts/sync-files-to-vm.sh new file mode 100755 index 0000000000..c7862c2054 --- /dev/null +++ b/tests/scripts/sync-files-to-vm.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +source scripts/vars.env + +NGF_DIR=$(dirname "$PWD") + +gcloud compute config-ssh --ssh-config-file ngf-gcp.ssh > /dev/null + +rsync -ave 'ssh -F ngf-gcp.ssh' ${NGF_DIR} username@${RESOURCE_NAME}.${GKE_CLUSTER_ZONE}.${GKE_PROJECT}:~ diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index 09f7a67482..9af85a1b27 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -17,7 +17,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Dataplane performance", Ordered, Label("performance"), func() { +var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), func() { files := []string{ "dp-perf/coffee.yaml", "dp-perf/gateway.yaml", diff --git a/tests/suite/longevity_test.go b/tests/suite/longevity_test.go new file mode 100644 index 0000000000..4542c2a84f --- /dev/null +++ b/tests/suite/longevity_test.go @@ -0,0 +1,137 @@ +package suite + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +// Longevity test is an NFR test, but does not include the "nfr" label. It needs to run on its own, +// outside of the scope of the other NFR tests. This is because it's a long-term test whose environment +// shouldn't be torn down. +var _ = Describe("Longevity", Label("longevity-setup", "longevity-teardown"), func() { + var ( + files = []string{ + "longevity/cafe.yaml", + "longevity/cafe-secret.yaml", + "longevity/gateway.yaml", + "longevity/cafe-routes.yaml", + "longevity/cronjob.yaml", + } + promFile = []string{ + "longevity/prom.yaml", + } + + ns = &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "longevity", + }, + } + + labelFilter = GinkgoLabelFilter() + ) + + BeforeEach(func() { + if !strings.Contains(labelFilter, "longevity") { + Skip("skipping longevity test unless 'longevity' label is explicitly defined when running") + } + }) + + It("sets up the longevity test", Label("longevity-setup"), func() { + if !strings.Contains(labelFilter, "longevity-setup") { + Skip("'longevity-setup' label not specified; skipping...") + } + + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(promFile, ngfNamespace)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + }) + + It("collects results", Label("longevity-teardown"), func() { + if !strings.Contains(labelFilter, "longevity-teardown") { + Skip("'longevity-teardown' label not specified; skipping...") + } + + resultsDir, err := framework.CreateResultsDir("longevity", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, fmt.Sprintf("%s.md", version)) + resultsFile, err := framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + defer resultsFile.Close() + + Expect(framework.WriteSystemInfoToFile(resultsFile, clusterInfo, *plusEnabled)).To(Succeed()) + + // gather wrk output + homeDir, err := os.UserHomeDir() + Expect(err).ToNot(HaveOccurred()) + + Expect(framework.WriteContent(resultsFile, "\n## Traffic\n")) + writeTrafficResults(resultsFile, homeDir, "coffee.txt", "HTTP") + writeTrafficResults(resultsFile, homeDir, "tea.txt", "HTTPS") + + // gather any error logs + names, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + + Expect(framework.WriteContent(resultsFile, "\n## Error Logs\n")) + writeErrorLogs(resultsFile, names[0], "nginx-gateway") + writeErrorLogs(resultsFile, names[0], "nginx") + + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + }) +}) + +func writeTrafficResults(resultsFile *os.File, homeDir, filename, testname string) { + file := fmt.Sprintf("%s/%s", homeDir, filename) + content, err := os.ReadFile(file) + Expect(err).ToNot(HaveOccurred()) + + formattedContent := fmt.Sprintf("%s:\n\n```text\n%s```\n", testname, string(content)) + Expect(framework.WriteContent(resultsFile, formattedContent)).To(Succeed()) +} + +func writeErrorLogs(resultsFile *os.File, pod, container string) { + logReq := clientGoClient.CoreV1().Pods(ngfNamespace).GetLogs(pod, &core.PodLogOptions{Container: container}) + + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + logs, err := logReq.Stream(ctx) + Expect(err).ToNot(HaveOccurred()) + defer logs.Close() + + buf := new(bytes.Buffer) + _, err = io.Copy(buf, logs) + Expect(err).ToNot(HaveOccurred()) + + Expect(framework.WriteContent(resultsFile, fmt.Sprintf("\n### %s\n", container))) + + scanner := bufio.NewScanner(strings.NewReader(buf.String())) + for scanner.Scan() { + line := scanner.Text() + if isError(line) { + Expect(framework.WriteContent(resultsFile, line)).To(Succeed()) + } + } + Expect(scanner.Err()).ToNot(HaveOccurred()) +} + +func isError(line string) bool { + return strings.Contains(line, "error") || strings.Contains(line, "warn") || strings.Contains(line, "emerg") +} diff --git a/tests/longevity/manifests/cafe-routes.yaml b/tests/suite/manifests/longevity/cafe-routes.yaml similarity index 100% rename from tests/longevity/manifests/cafe-routes.yaml rename to tests/suite/manifests/longevity/cafe-routes.yaml diff --git a/tests/longevity/manifests/cafe-secret.yaml b/tests/suite/manifests/longevity/cafe-secret.yaml similarity index 100% rename from tests/longevity/manifests/cafe-secret.yaml rename to tests/suite/manifests/longevity/cafe-secret.yaml diff --git a/tests/longevity/manifests/cafe.yaml b/tests/suite/manifests/longevity/cafe.yaml similarity index 100% rename from tests/longevity/manifests/cafe.yaml rename to tests/suite/manifests/longevity/cafe.yaml diff --git a/tests/longevity/manifests/cronjob.yaml b/tests/suite/manifests/longevity/cronjob.yaml similarity index 86% rename from tests/longevity/manifests/cronjob.yaml rename to tests/suite/manifests/longevity/cronjob.yaml index 234ff903d8..1f7511cf35 100644 --- a/tests/longevity/manifests/cronjob.yaml +++ b/tests/suite/manifests/longevity/cronjob.yaml @@ -2,13 +2,11 @@ apiVersion: v1 kind: ServiceAccount metadata: name: rollout-mgr - namespace: default --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: rollout-mgr - namespace: default rules: - apiGroups: - "apps" @@ -21,7 +19,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: rollout-mgr - namespace: default roleRef: apiGroup: rbac.authorization.k8s.io kind: Role @@ -29,13 +26,11 @@ roleRef: subjects: - kind: ServiceAccount name: rollout-mgr - namespace: default --- apiVersion: batch/v1 kind: CronJob metadata: name: coffee-rollout-mgr - namespace: default spec: schedule: "* */6 * * *" # every minute every 6 hours jobTemplate: @@ -58,14 +53,13 @@ spec: -H "Authorization: Bearer $TOKEN" \ -H "Content-type: application/merge-patch+json" \ --data-raw "{\"spec\": {\"template\": {\"metadata\": {\"annotations\": {\"kubectl.kubernetes.io/restartedAt\": \"$RESTARTED_AT\"}}}}}" \ - "https://kubernetes/apis/apps/v1/namespaces/default/deployments/coffee?fieldManager=kubectl-rollout" 2>&1 + "https://kubernetes.default/apis/apps/v1/namespaces/default/deployments/coffee?fieldManager=kubectl-rollout" 2>&1 restartPolicy: OnFailure --- apiVersion: batch/v1 kind: CronJob metadata: name: tea-rollout-mgr - namespace: default spec: schedule: "* 3,9,15,21 * * *" # every minute every 6 hours, 3 hours apart from coffee jobTemplate: @@ -88,5 +82,5 @@ spec: -H "Authorization: Bearer $TOKEN" \ -H "Content-type: application/merge-patch+json" \ --data-raw "{\"spec\": {\"template\": {\"metadata\": {\"annotations\": {\"kubectl.kubernetes.io/restartedAt\": \"$RESTARTED_AT\"}}}}}" \ - "https://kubernetes/apis/apps/v1/namespaces/default/deployments/tea?fieldManager=kubectl-rollout" 2>&1 + "https://kubernetes.default/apis/apps/v1/namespaces/default/deployments/tea?fieldManager=kubectl-rollout" 2>&1 restartPolicy: OnFailure diff --git a/tests/longevity/manifests/gateway.yaml b/tests/suite/manifests/longevity/gateway.yaml similarity index 100% rename from tests/longevity/manifests/gateway.yaml rename to tests/suite/manifests/longevity/gateway.yaml diff --git a/tests/longevity/manifests/prom.yaml b/tests/suite/manifests/longevity/prom.yaml similarity index 79% rename from tests/longevity/manifests/prom.yaml rename to tests/suite/manifests/longevity/prom.yaml index e5d35fae72..24de26577e 100644 --- a/tests/longevity/manifests/prom.yaml +++ b/tests/suite/manifests/longevity/prom.yaml @@ -6,7 +6,7 @@ metadata: spec: selector: matchLabels: - app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/name: nginx-gateway-fabric endpoints: - port: metrics interval: 30s diff --git a/tests/suite/sample_test.go b/tests/suite/sample_test.go index 0e6ce59f16..3996c67646 100644 --- a/tests/suite/sample_test.go +++ b/tests/suite/sample_test.go @@ -14,7 +14,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Basic test example", func() { +var _ = Describe("Basic test example", Label("functional"), func() { files := []string{ "hello/hello.yaml", "hello/gateway.yaml", diff --git a/tests/suite/scripts/longevity-wrk.sh b/tests/suite/scripts/longevity-wrk.sh new file mode 100755 index 0000000000..20cf551d50 --- /dev/null +++ b/tests/suite/scripts/longevity-wrk.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +SVC_IP=$(kubectl -n nginx-gateway get svc ngf-test-nginx-gateway-fabric -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + +echo "${SVC_IP} cafe.example.com" | sudo tee -a /etc/hosts + +nohup wrk -t2 -c100 -d96h http://cafe.example.com/coffee &> ~/coffee.txt & + +nohup wrk -t2 -c100 -d96h https://cafe.example.com/tea &> ~/tea.txt & diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 8d2af38b57..a46b08d8ba 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -21,6 +21,7 @@ import ( k8sRuntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" ctlr "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -60,6 +61,7 @@ var ( //go:embed manifests/* manifests embed.FS k8sClient client.Client + clientGoClient kubernetes.Interface // used for getting Pod logs resourceManager framework.ResourceManager portForwardStopCh = make(chan struct{}, 1) portFwdPort int @@ -68,6 +70,7 @@ var ( address string version string clusterInfo framework.ClusterInfo + skipNFRTests bool ) const ( @@ -79,6 +82,7 @@ type setupConfig struct { chartPath string gwAPIVersion string deploy bool + nfr bool } func setup(cfg setupConfig, extraInstallArgs ...string) { @@ -100,6 +104,9 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { k8sClient, err = client.New(k8sConfig, options) Expect(err).ToNot(HaveOccurred()) + clientGoClient, err = kubernetes.NewForConfig(k8sConfig) + Expect(err).ToNot(HaveOccurred()) + timeoutConfig = framework.DefaultTimeoutConfig() resourceManager = framework.ResourceManager{ K8sClient: k8sClient, @@ -110,6 +117,24 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { clusterInfo, err = resourceManager.GetClusterInfo() Expect(err).ToNot(HaveOccurred()) + if cfg.nfr && !clusterInfo.IsGKE { + skipNFRTests = true + Skip("NFR tests can only run in GKE") + } + + if cfg.nfr && *serviceType != "LoadBalancer" { + skipNFRTests = true + Skip("GW_SERVICE_TYPE must be 'LoadBalancer' for NFR tests") + } + + if *versionUnderTest != "" { + version = *versionUnderTest + } else if *imageTag != "" { + version = *imageTag + } else { + version = "edge" + } + if !cfg.deploy { return } @@ -131,14 +156,6 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { installCfg.ImagePullPolicy = *imagePullPolicy } - if *versionUnderTest != "" { - version = *versionUnderTest - } else if *imageTag != "" { - version = *imageTag - } else { - version = "edge" - } - output, err := framework.InstallGatewayAPI(k8sClient, cfg.gwAPIVersion, *k8sVersion) Expect(err).ToNot(HaveOccurred(), string(output)) @@ -209,10 +226,13 @@ var _ = BeforeSuite(func() { deploy: true, } - // If we are running the upgrade test only, then skip the initial deployment. - // The upgrade test will deploy its own version of NGF. - suiteConfig, _ := GinkgoConfiguration() - if suiteConfig.LabelFilter == "upgrade" { + labelFilter := GinkgoLabelFilter() + cfg.nfr = isNFR(labelFilter) + + // Skip deployment if: + // - running upgrade test (this test will deploy its own version) + // - running longevity teardown (deployment will already exist) + if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") { cfg.deploy = false } @@ -220,5 +240,19 @@ var _ = BeforeSuite(func() { }) var _ = AfterSuite(func() { - teardown() + if skipNFRTests { + Skip("") + } + + labelFilter := GinkgoLabelFilter() + if !strings.Contains(labelFilter, "longevity-setup") { + teardown() + } }) + +func isNFR(labelFilter string) bool { + return strings.Contains(labelFilter, "nfr") || + strings.Contains(labelFilter, "longevity") || + strings.Contains(labelFilter, "performance") || + strings.Contains(labelFilter, "upgrade") +} diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go index 3fa71bcc6d..401b8d9a91 100644 --- a/tests/suite/upgrade_test.go +++ b/tests/suite/upgrade_test.go @@ -26,7 +26,7 @@ import ( // This test installs the latest released version of NGF, then upgrades to the edge version (or dev version). // During the upgrade, traffic is continuously sent to ensure no downtime. // We also check that the leader election lease has been updated, and that Gateway updates are processed. -var _ = Describe("Upgrade testing", Label("upgrade"), func() { +var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { var ( files = []string{ "ngf-upgrade/cafe.yaml", @@ -44,20 +44,9 @@ var _ = Describe("Upgrade testing", Label("upgrade"), func() { valuesFile = "manifests/ngf-upgrade/values.yaml" resultsFile *os.File resultsDir string - skipped bool ) BeforeEach(func() { - if !clusterInfo.IsGKE { - skipped = true - Skip("Upgrade tests can only run in GKE") - } - - if *serviceType != "LoadBalancer" { - skipped = true - Skip("GW_SERVICE_TYPE must be 'LoadBalancer' for upgrade tests") - } - // this test is unique in that it needs to install the previous version of NGF, // so we need to uninstall the version installed at the suite level, then install the custom version teardown() @@ -84,10 +73,6 @@ var _ = Describe("Upgrade testing", Label("upgrade"), func() { }) AfterEach(func() { - if skipped { - Skip("") - } - Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) resultsFile.Close()