From a60a2774cd098d1db16c43b0dba85e8d3bfe4206 Mon Sep 17 00:00:00 2001
From: Lucian Petrut <lpetrut@cloudbasesolutions.com>
Date: Fri, 14 Feb 2025 20:01:16 +0200
Subject: [PATCH] Perf test improvements (#249)

* Perf test improvements

* collect inspection reports even if the job succeeds
  * the k8s-dqlite logs can help us understand if the expected
    code paths were reached or if there's excessive logging
* multiple kube-burner iterations
  * helps us determine if an error is transient or if dqlite was
    compromised
  * longer test duration, covering periodic jobs such as compaction

* 15 kube-burner iterations by default

* Fix typo
---
 .github/workflows/performance.yaml          |  6 ++--
 test/performance/tests/test_util/config.py  |  3 +-
 test/performance/tests/test_util/metrics.py | 39 +++++++++++++++------
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml
index a1670356..f864cf91 100644
--- a/.github/workflows/performance.yaml
+++ b/.github/workflows/performance.yaml
@@ -77,6 +77,7 @@ jobs:
           TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports
           TEST_METRICS_DIR: ${{ github.workspace }}/test/performance/results/head
           TEST_RUN_NAME: head
+          TEST_KUBE_BURNER_ITERATIONS: 15
         run: |
           cd test/performance
           mkdir -p ./results/head
@@ -89,6 +90,7 @@ jobs:
           TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports
           TEST_METRICS_DIR: ${{ github.workspace }}/test/performance/results/base-code
           TEST_RUN_NAME: base-code
+          TEST_KUBE_BURNER_ITERATIONS: 15
         run: |
           cd test/performance 
           mkdir -p ./results/base-code
@@ -113,12 +115,12 @@ jobs:
           name: performance-results
           path: ${{ github.workspace }}/test/performance/results
       - name: Prepare inspection reports
-        if: failure()
+        if: always()
         run: |
           tar -czvf inspection-reports.tar.gz -C ${{ github.workspace }} inspection-reports
           echo "artifact_name=inspection-reports" | sed 's/:/-/g' >> $GITHUB_ENV
       - name: Upload inspection report artifact
-        if: failure()
+        if: always()
         uses: actions/upload-artifact@v4
         with:
           name: ${{ env.artifact_name }}
diff --git a/test/performance/tests/test_util/config.py b/test/performance/tests/test_util/config.py
index 3b9bef29..02e6e788 100644
--- a/test/performance/tests/test_util/config.py
+++ b/test/performance/tests/test_util/config.py
@@ -41,9 +41,10 @@
     os.getenv("TEST_KUBE_BURNER_URL")
     or "https://github.com/kube-burner/kube-burner/releases/download/v1.2/kube-burner-1.2-Linux-x86_64.tar.gz"
 )
-
 # Global kube-burner invocation timeout.
 KUBE_BURNER_TIMEOUT = os.getenv("TEST_KUBE_BURNER_TIMEOUT") or "10m"
+# The number of kube-burner invocations.
+KUBE_BURNER_ITERATIONS = int(os.getenv("TEST_KUBE_BURNER_ITERATIONS") or 15)
 
 # FLAVOR is the flavour to use for running the performance tests.
 FLAVOR = os.getenv("TEST_FLAVOR") or ""
diff --git a/test/performance/tests/test_util/metrics.py b/test/performance/tests/test_util/metrics.py
index 69ee05ab..e168bd3b 100644
--- a/test/performance/tests/test_util/metrics.py
+++ b/test/performance/tests/test_util/metrics.py
@@ -136,17 +136,34 @@ def configure_kube_burner(instance: harness.Instance):
     )
 
 
-def run_kube_burner(instance: harness.Instance):
+def run_kube_burner(
+    instance: harness.Instance, iterations: int = config.KUBE_BURNER_ITERATIONS
+):
     """Copies kubeconfig and runs kube-burner on the instance."""
     instance.exec(["mkdir", "-p", "/root/.kube"])
     instance.exec(["k8s", "config", ">", "/root/.kube/config"])
-    instance.exec(
-        [
-            "/root/kube-burner",
-            "init",
-            "--timeout",
-            config.KUBE_BURNER_TIMEOUT,
-            "-c",
-            "/root/api-intensive.yaml",
-        ]
-    )
+
+    raised_exc = None
+    for iteration in range(iterations):
+        LOG.info("Starting kube-burner iteration %s of %s.", iteration, iterations)
+        try:
+            instance.exec(
+                [
+                    "/root/kube-burner",
+                    "init",
+                    "--timeout",
+                    config.KUBE_BURNER_TIMEOUT,
+                    "-c",
+                    "/root/api-intensive.yaml",
+                ]
+            )
+        except Exception as ex:
+            # We'll continue the loop even after encountering failures
+            # in order to determine if this is a transient failure or if the
+            # dqlite service was completely compromised (e.g. deadlock or crash).
+            LOG.exception("kube-burner job failed, continuing...")
+            raised_exc = ex
+
+    # Raise encountered exceptions, if any.
+    if raised_exc:
+        raise raised_exc