From a60a2774cd098d1db16c43b0dba85e8d3bfe4206 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Fri, 14 Feb 2025 20:01:16 +0200 Subject: [PATCH] Perf test improvements (#249) * Perf test improvements * collect inspection reports even if the job succeeds * the k8s-dqlite logs can help us understand if the expected code paths were reached or if there's excessive logging * multiple kube-burner iterations * helps us determine if an error is transient or if dqlite was compromised * longer test duration, covering periodic jobs such as compaction * 15 kube-burner iterations by default * Fix typo --- .github/workflows/performance.yaml | 6 ++-- test/performance/tests/test_util/config.py | 3 +- test/performance/tests/test_util/metrics.py | 39 +++++++++++++++------ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml index a1670356..f864cf91 100644 --- a/.github/workflows/performance.yaml +++ b/.github/workflows/performance.yaml @@ -77,6 +77,7 @@ jobs: TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports TEST_METRICS_DIR: ${{ github.workspace }}/test/performance/results/head TEST_RUN_NAME: head + TEST_KUBE_BURNER_ITERATIONS: 15 run: | cd test/performance mkdir -p ./results/head @@ -89,6 +90,7 @@ jobs: TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports TEST_METRICS_DIR: ${{ github.workspace }}/test/performance/results/base-code TEST_RUN_NAME: base-code + TEST_KUBE_BURNER_ITERATIONS: 15 run: | cd test/performance mkdir -p ./results/base-code @@ -113,12 +115,12 @@ jobs: name: performance-results path: ${{ github.workspace }}/test/performance/results - name: Prepare inspection reports - if: failure() + if: always() run: | tar -czvf inspection-reports.tar.gz -C ${{ github.workspace }} inspection-reports echo "artifact_name=inspection-reports" | sed 's/:/-/g' >> $GITHUB_ENV - name: Upload inspection report artifact - if: failure() + if: always() uses: actions/upload-artifact@v4 with: name: ${{ env.artifact_name }} diff --git a/test/performance/tests/test_util/config.py b/test/performance/tests/test_util/config.py index 3b9bef29..02e6e788 100644 --- a/test/performance/tests/test_util/config.py +++ b/test/performance/tests/test_util/config.py @@ -41,9 +41,10 @@ os.getenv("TEST_KUBE_BURNER_URL") or "https://github.com/kube-burner/kube-burner/releases/download/v1.2/kube-burner-1.2-Linux-x86_64.tar.gz" ) - # Global kube-burner invocation timeout. KUBE_BURNER_TIMEOUT = os.getenv("TEST_KUBE_BURNER_TIMEOUT") or "10m" +# The number of kube-burner invocations. +KUBE_BURNER_ITERATIONS = int(os.getenv("TEST_KUBE_BURNER_ITERATIONS") or 15) # FLAVOR is the flavour to use for running the performance tests. FLAVOR = os.getenv("TEST_FLAVOR") or "" diff --git a/test/performance/tests/test_util/metrics.py b/test/performance/tests/test_util/metrics.py index 69ee05ab..e168bd3b 100644 --- a/test/performance/tests/test_util/metrics.py +++ b/test/performance/tests/test_util/metrics.py @@ -136,17 +136,34 @@ def configure_kube_burner(instance: harness.Instance): ) -def run_kube_burner(instance: harness.Instance): +def run_kube_burner( + instance: harness.Instance, iterations: int = config.KUBE_BURNER_ITERATIONS +): """Copies kubeconfig and runs kube-burner on the instance.""" instance.exec(["mkdir", "-p", "/root/.kube"]) instance.exec(["k8s", "config", ">", "/root/.kube/config"]) - instance.exec( - [ - "/root/kube-burner", - "init", - "--timeout", - config.KUBE_BURNER_TIMEOUT, - "-c", - "/root/api-intensive.yaml", - ] - ) + + raised_exc = None + for iteration in range(iterations): + LOG.info("Starting kube-burner iteration %s of %s.", iteration, iterations) + try: + instance.exec( + [ + "/root/kube-burner", + "init", + "--timeout", + config.KUBE_BURNER_TIMEOUT, + "-c", + "/root/api-intensive.yaml", + ] + ) + except Exception as ex: + # We'll continue the loop even after encountering failures + # in order to determine if this is a transient failure or if the + # dqlite service was completely compromised (e.g. deadlock or crash). + LOG.exception("kube-burner job failed, continuing...") + raised_exc = ex + + # Raise encountered exceptions, if any. + if raised_exc: + raise raised_exc