From c8b04affe9890c3911b18b30bd5d57582be86ef0 Mon Sep 17 00:00:00 2001 From: Radhika Lakhtakia Date: Mon, 24 Nov 2025 19:06:09 +0000 Subject: [PATCH] Add decode heavy benchmark e2e test to github actions. --- .github/workflows/e2e-decode-heavy-gke.yaml | 280 ++++++++++++++++++ .../inference-perf/templates/job.yaml | 28 +- .../single-workload/decode-heavy-values.yaml | 2 +- 3 files changed, 303 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/e2e-decode-heavy-gke.yaml diff --git a/.github/workflows/e2e-decode-heavy-gke.yaml b/.github/workflows/e2e-decode-heavy-gke.yaml new file mode 100644 index 000000000..30ba1c585 --- /dev/null +++ b/.github/workflows/e2e-decode-heavy-gke.yaml @@ -0,0 +1,280 @@ +name: GKE Decode Heavy Test + +on: + # Runs with a PR comment /run-gke-decode-heavy + issue_comment: + types: [created] + workflow_dispatch: + inputs: + pr_or_branch: + description: 'Pull-request number or branch name to test' + required: true + default: 'main' + type: string + +permissions: + contents: read + +jobs: + deploy_and_validate: + if: > + github.event_name == 'workflow_dispatch' || + ( + github.event_name == 'issue_comment' && + github.event.issue.pull_request && + github.event.issue.pull_request.base.ref == 'main' && + contains(github.event.comment.body, '/run-gke-decode-heavy') + && + ( + github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'MEMBER' || + github.event.comment.author_association == 'COLLABORATOR' + ) + ) + name: Test on ${{ matrix.accelerator.name }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + max-parallel: 1 + matrix: + accelerator: + - name: GPU + + env: + GCP_PROJECT_ID: llm-d-scale + GKE_CLUSTER_NAME: llm-d-e2e-us-east5 + GKE_CLUSTER_ZONE: us-east5 + NAMESPACE: igw-decode-heavy + GATEWAY: gke-l7-regional-external-managed + GATEWAY_TYPE: gke + PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + MODEL: meta-llama/Llama-3.1-8B-Instruct + GSA_EMAIL: ${{ secrets.GCS_WORKLOAD_SA }} + GCS_BUCKET: igw-e2e-benchmark-results + KSA_NAME: igw-e2e-benchmark-sa + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Determine if pr_or_branch is a PR number + id: check_pr + env: + PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch }} + shell: bash + run: | + echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV" + if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then + echo "is_pr=true" >> "$GITHUB_OUTPUT" + elif [[ "${{ github.event_name }}" = "pull_request" ]]; then + echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV + echo "is_pr=true" >> "$GITHUB_OUTPUT" + else + echo "is_pr=false" >> "$GITHUB_OUTPUT" + fi + + - name: Fetch and checkout PR + if: steps.check_pr.outputs.is_pr == 'true' + run: | + git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH" + git checkout pr-"$PR_OR_BRANCH" + + - name: Checkout branch + if: steps.check_pr.outputs.is_pr == 'false' + run: git checkout "$PR_OR_BRANCH" + + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + + - name: Set up gcloud CLI and kubectl + uses: google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397 + with: + project_id: ${{ env.GCP_PROJECT_ID }} + install_components: 'kubectl,gke-gcloud-auth-plugin' + + - name: Get GKE credentials + run: | + gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}" + + - name: Create namespace + run: | + kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists" + + - name: Create hf-token secret + run: | + kubectl create secret generic hf-token \ + --from-literal="token=${{ secrets.HF_TOKEN }}" \ + --namespace "${NAMESPACE}" \ + --dry-run=client -o yaml | kubectl apply -f - + + - name: Create and Annotate KSA for Workload Identity + run: | + kubectl create serviceaccount $KSA_NAME --namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f - + kubectl annotate serviceaccount $KSA_NAME \ + iam.gke.io/gcp-service-account=$GSA_EMAIL \ + --overwrite \ + --namespace "${NAMESPACE}" + + - name: Deploy Model Server and CRDs + run: | + cd config/manifests/vllm + echo "Deploying Model Server..." + kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-decode-heavy-deployment.log + echo "Installing CRDs" + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml + echo "---------------------------------------" >> ~/igw-decode-heavy-deployment.log + + - name: Deploy InferencePool and Endpoint Picker Extension + run: | + export IGW_CHART_VERSION=v1.1.0 + helm install vllm-llama3-8b-instruct \ + --namespace $NAMESPACE \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_TYPE \ + --version $IGW_CHART_VERSION \ + oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-decode-heavy-deployment.log + echo "---------------------------------------" >> ~/igw-decode-heavy-deployment.log + + - name: Wait for all pods to be ready + run: | + kubectl wait pod \ + --for=condition=Ready \ + --all \ + -n "${NAMESPACE}" \ + --timeout=25m + echo "✅ All pods are ready." + kubectl get pods -n "${NAMESPACE}" + + - name: Deploy Gateway + run: | + GATEWAY_NAME=inference-gateway + kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found + kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found + echo "Deploying Gateway..." + kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml -n ${NAMESPACE} | tee ~/igw-decode-heavy-deployment.log + echo "Deploying HTTPRoute..." + kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/httproute.yaml -n ${NAMESPACE} | tee ~/igw-decode-heavy-deployment.log + echo "---------------------------------------" >> ~/igw-decode-heavy-deployment.log + + - name: Wait for gateway to be ready + run: | + GATEWAY_NAME=inference-gateway + kubectl wait gateway/${GATEWAY_NAME} \ + --for=condition=Programmed=True \ + -n "${NAMESPACE}" \ + --timeout=500s + echo "✅ Gateway is ready." + kubectl get gateway -n "${NAMESPACE}" + + - name: Show deployment status + run: | + echo "=== Deployments ===" + kubectl get deployments -n "${NAMESPACE}" + echo "" + echo "=== Pods ===" + kubectl get pods -n "${NAMESPACE}" + echo "" + echo "=== Services ===" + kubectl get svc -n "${NAMESPACE}" + echo "" + echo "=== Helm releases ===" + helm list -n "${NAMESPACE}" || true + echo "" + echo "=== Inference Pools ===" + kubectl get inferencepools -n "${NAMESPACE}" || true + echo "" + echo "=== HTTPRoutes ===" + kubectl get httproutes -n "${NAMESPACE}" -o yaml || true + echo "" + echo "=== Gateway ===" + kubectl get Gateway -n "${NAMESPACE}" || true + echo "" + + - name: Verify installation and run validation test + run: | + cd .github/scripts/e2e + ./e2e-validate.sh -n "${NAMESPACE}" -v -m ${MODEL} + + - name: Run benchmarking test + run: | + TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S") + cd benchmarking/single-workload + host="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \ + -o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}" + if [[ -z "$host" ]]; then + echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2 + exit 1 + fi + port=80 + svc_host="${host}:${port}" + helm install decode-heavy-benchmark ../inference-perf/ -f decode-heavy-values.yaml \ + --namespace "${NAMESPACE}" \ + --create-namespace \ + --set hfToken="${HF_TOKEN}" \ + --set "config.server.base_url=http://${svc_host}" \ + --set "job.serviceAccountName=$KSA_NAME" \ + --set "job.image.tag=v0.2.0" \ + --set "config.storage.google_cloud_storage.bucket_name=${GCS_BUCKET}" \ + --set "config.storage.google_cloud_storage.path=${NAMESPACE}/${TIMESTAMP}" \ + --set "gcsPath=gs://${GCS_BUCKET}/datasets/infinity_instruct.json" \ + --set "config.data.path=/gcsDataset/gcs-dataset.json" \ + --set-string 'job.resources.limits.nvidia\.com/gpu=1' + + - name: Wait for benchmarking job to finish + run: | + job_name=decode-heavy-benchmark-inference-perf-job + TIMEOUT_DURATION="7200s" + if ! kubectl wait --for=condition=complete job/"$job_name" -n "$NAMESPACE" --timeout="$TIMEOUT_DURATION"; then + echo "Error: Benchmark job $job_name did not complete successfully within $TIMEOUT_DURATION." >&2 + echo "--- Job Description ---" >&2 + kubectl describe job "$job_name" -n "$NAMESPACE" >&2 + echo "--- Pod Logs (Last 50 lines) ---" >&2 + kubectl logs -l job-name="$job_name" -n "$NAMESPACE" --all-containers=true --tail 50 >&2 + exit 1 + fi + echo "✅ Benchmarking Job Completed." + + - name: Collect and upload Kubernetes pod logs + if: always() + run: | + mkdir -p pod-logs-inference-decode-heavy + cd pod-logs-inference-decode-heavy + echo "Fetching ${NAMESPACE} pods log..." + kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \ + | xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1' + echo "Fetching ${NAMESPACE} pods descriptions..." + kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \ + | xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1' + mv ~/igw-decode-heavy-deployment.log . || true + mv ~/install-deps.log . || true + + - name: Upload pod logs as artifact + uses: actions/upload-artifact@v4 + if: always() + with: + name: igw-pod-logs-inference-decode-heavy-${{ matrix.accelerator.name }} + path: pod-logs-inference-decode-heavy + + - name: Send Google Chat notification on failure + if: failure() + uses: SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb + with: + webhookUrl: ${{ secrets.GOOGLE_CHAT_WEBHOOK }} + jobStatus: ${{ job.status }} + title: '${{ github.workflow }} - ${{ matrix.accelerator.name }}' + + - name: Cleanup deployment + if: always() + run: | + GATEWAY_NAME=inference-gateway + helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE} --ignore-not-found + helm uninstall decode-heavy-benchmark -n ${NAMESPACE} --ignore-not-found + kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found + kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found \ No newline at end of file diff --git a/benchmarking/inference-perf/templates/job.yaml b/benchmarking/inference-perf/templates/job.yaml index b581537db..3ca102ba1 100644 --- a/benchmarking/inference-perf/templates/job.yaml +++ b/benchmarking/inference-perf/templates/job.yaml @@ -23,19 +23,19 @@ spec: initContainers: - name: fetch-gcs-dataset image: google/cloud-sdk:latest - command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /dataset/gcs-dataset.json"] + command: ["sh", "-c", "gsutil cp {{ .Values.gcsPath }} /gcsDataset/gcs-dataset.json"] volumeMounts: - - name: dataset-volume - mountPath: /dataset + - name: gcs-dataset-volume + mountPath: /gcsDataset {{- end }} {{- if .Values.s3Path}} initContainers: - name: fetch-s3-dataset image: google/cloud-sdk:latest - command: ["sh", "-c", "aws s3 cp s3://{{ .Values.s3Path }} /dataset/s3-dataset.json"] + command: ["sh", "-c", "aws s3 cp s3://{{ .Values.s3Path }} /s3Dataset/s3-dataset.json"] volumeMounts: - - name: dataset-volume - mountPath: /dataset + - name: s3-dataset-volume + mountPath: /s3Dataset {{- end }} containers: - name: inference-perf-container @@ -58,9 +58,25 @@ spec: - name: config-volume mountPath: {{ include "inference-perf.configMount" . }} readOnly: true + {{- if .Values.gcsPath}} + - name: gcs-dataset-volume + mountPath: /gcsDataset + {{- end }} + {{- if .Values.s3Path}} + - name: s3-dataset-volume + mountPath: /s3Dataset + {{- end }} resources: {{- toYaml .Values.job.resources | nindent 12 }} volumes: - name: config-volume configMap: name: {{ include "inference-perf.fullname" . }}-config +{{- if .Values.gcsPath}} + - name: gcs-dataset-volume + emptyDir: {} +{{- end }} +{{- if .Values.s3Path}} + - name: s3-dataset-volume + emptyDir: {} +{{- end }} diff --git a/benchmarking/single-workload/decode-heavy-values.yaml b/benchmarking/single-workload/decode-heavy-values.yaml index a5811e021..aa69b25a8 100644 --- a/benchmarking/single-workload/decode-heavy-values.yaml +++ b/benchmarking/single-workload/decode-heavy-values.yaml @@ -20,7 +20,7 @@ logLevel: INFO # A GCS bucket path that points to the dataset file. # The file will be copied from this path to the local file system # at /dataset/dataset.json for use during the run. -# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/dataset.json. +# NOTE: For this dataset to be used, config.data.path must also be explicitly set to /dataset/gcs-dataset.json. gcsPath: "" # An S3 bucket path that points to the dataset file.