From 14171038e9b23907638dd3228da95e1669026b3b Mon Sep 17 00:00:00 2001
From: Karan Goel <karangoel@google.com>
Date: Wed, 17 Sep 2025 14:53:46 +0000
Subject: [PATCH 1/7] Add a batched auto tune script

Signed-off-by: Karan Goel <karangoel@google.com>
---
 benchmarks/auto_tune/README.md          |  67 +++++++++++++
 benchmarks/auto_tune/batch_auto_tune.sh | 124 ++++++++++++++++++++++++
 2 files changed, 191 insertions(+)
 create mode 100755 benchmarks/auto_tune/batch_auto_tune.sh

diff --git a/benchmarks/auto_tune/README.md b/benchmarks/auto_tune/README.md
index 3aa988aac254..bf57424091ba 100644
--- a/benchmarks/auto_tune/README.md
+++ b/benchmarks/auto_tune/README.md
@@ -149,3 +149,70 @@ The script follows a systematic process to find the optimal parameters:
 4. **Track Best Result**: Throughout the process, the script tracks the parameter combination that has yielded the highest valid throughput so far.
 
 5. **Profile Collection**: For the best-performing run, the script saves the vLLM profiler output, which can be used for deep-dive performance analysis with tools like TensorBoard.
+
+## Batched `auto_tune`
+
+The `batch_auto_tune.sh` script allows you to run multiple `auto_tune.sh` experiments sequentially from a single configuration file. It iterates through a list of parameter sets, executes `auto_tune.sh` for each, and records the results back into the input file.
+
+### Prerequisites
+
+- **jq**: This script requires `jq` to parse the JSON configuration file.
+- **gcloud**: If you plan to upload results to Google Cloud Storage, the `gcloud` CLI must be installed and authenticated.
+
+### How to Run
+
+1.  **Create a JSON configuration file**: Create a file (e.g., `runs_config.json`) containing an array of JSON objects. Each object defines the parameters for a single `auto_tune.sh` run.
+
+2.  **Execute the script**:
+
+    ```bash
+    bash batch_auto_tune.sh <path_to_json_file> [gcs_upload_path]
+    ```
+
+    -   `<path_to_json_file>`: **Required.** Path to your JSON configuration file.
+    -   `[gcs_upload_path]`: **Optional.** A GCS path (e.g., `gs://my-bucket/benchmark-results`) where the detailed results and profiles for each run will be uploaded.
+
+### Configuration File
+
+The JSON configuration file should contain an array of objects. Each object's keys correspond to the configuration variables for `auto_tune.sh` (see the [Configuration table above](#configuration)). These keys will be converted to uppercase environment variables for each run.
+
+Here is an example `runs_config.json` with two benchmark configurations:
+
+```json
+[
+  {
+    "base": "/home/user",
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "system": "TPU",
+    "tp": 8,
+    "input_len": 128,
+    "output_len": 2048,
+    "max_model_len": 2300,
+    "num_seqs_list": "128 256",
+    "num_batched_tokens_list": "8192 16384"
+  },
+  {
+    "base": "/home/user",
+    "model": "meta-llama/Llama-3.1-70B-Instruct",
+    "system": "TPU",
+    "tp": 8,
+    "input_len": 4000,
+    "output_len": 16,
+    "max_model_len": 4096,
+    "num_seqs_list": "64 128",
+    "num_batched_tokens_list": "4096 8192",
+    "max_latency_allowed_ms": 500
+  }
+]
+```
+
+### Output
+
+The script modifies the input JSON file in place, adding the results of each run to the corresponding object. The following fields are added:
+
+-   `run_id`: A unique identifier for the run, derived from the timestamp.
+-   `status`: The outcome of the run (`SUCCESS`, `FAILURE`, or `WARNING_NO_RESULT_FILE`).
+-   `results`: The content of the `result.txt` file from the `auto_tune.sh` run.
+-   `gcs_results`: The GCS URL where the run's artifacts are stored (if a GCS path was provided).
+
+A summary of successful and failed runs is also printed to the console upon completion.
diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh
new file mode 100755
index 000000000000..7351b0b0855b
--- /dev/null
+++ b/benchmarks/auto_tune/batch_auto_tune.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+INPUT_JSON="$1"
+GCS_PATH="$2" # Optional GCS path for uploading results for each run
+
+AUTOTUNE_SCRIPT="auto_tune.sh"
+
+if [[ -z "$INPUT_JSON" ]]; then
+  echo "Error: Input JSON file not provided."
+  echo "Usage: $0 <path_to_json_file> [gcs_upload_path]"
+  exit 1
+fi
+
+if [[ ! -f "$INPUT_JSON" ]]; then
+  echo "Error: File not found at '$INPUT_JSON'"
+  exit 1
+fi
+
+if ! command -v jq &> /dev/null; then
+    echo "Error: 'jq' command not found. Please install jq to process the JSON input."
+    exit 1
+fi
+
+if [[ -n "$GCS_PATH" ]] && ! command -v gcloud &> /dev/null; then
+    echo "Error: 'gcloud' command not found, but a GCS_PATH was provided."
+    exit 1
+fi
+
+SUCCESS_COUNT=0
+FAILURE_COUNT=0
+FAILED_RUNS=()
+SCRIPT_START_TIME=$(date +%s)
+
+json_content=$(cat "$INPUT_JSON")
+num_runs=$(echo "$json_content" | jq 'length')
+
+echo "Found $num_runs benchmark configurations in $INPUT_JSON."
+echo "Starting benchmark runs..."
+echo "--------------------------------------------------"
+
+for i in $(seq 0 $(($num_runs - 1))); do
+  run_object=$(echo "$json_content" | jq ".[$i]")
+
+  RUN_START_TIME=$(date +%s)
+  ENV_VARS_ARRAY=()
+  # Dynamically create env vars from the JSON object's keys
+  for key in $(echo "$run_object" | jq -r 'keys_unsorted[]'); do
+    value=$(echo "$run_object" | jq -r ".$key")
+    var_name=$(echo "$key" | tr '[:lower:]' '[:upper:]' | tr -cd 'A-Z0-9_')
+    ENV_VARS_ARRAY+=("${var_name}=${value}")
+  done
+
+  echo "Executing run #$((i+1))/$num_runs with parameters: ${ENV_VARS_ARRAY[*]}"
+
+  # Execute auto_tune.sh and capture output
+  RUN_OUTPUT_FILE=$(mktemp)
+  if env "${ENV_VARS_ARRAY[@]}" bash "$AUTOTUNE_SCRIPT" > >(tee -a "$RUN_OUTPUT_FILE") 2>&1; then
+    STATUS="SUCCESS"
+    ((SUCCESS_COUNT++))
+  else
+    STATUS="FAILURE"
+    ((FAILURE_COUNT++))
+    FAILED_RUNS+=("Run #$((i+1)): $(echo $run_object | jq -c .)")
+  fi
+
+  RUN_OUTPUT=$(<"$RUN_OUTPUT_FILE")
+  rm "$RUN_OUTPUT_FILE"
+
+  # Parse results and optionally upload them to GCS
+  RUN_ID=""
+  RESULTS=""
+  GCS_RESULTS_URL=""
+  if [[ "$STATUS" == "SUCCESS" ]]; then
+    RESULT_FILE_PATH=$(echo "$RUN_OUTPUT" | grep 'RESULT_FILE=' | tail -n 1 | cut -d'=' -f2 | tr -s '/' || true)
+
+    if [[ -n "$RESULT_FILE_PATH" && -f "$RESULT_FILE_PATH" ]]; then
+      RUN_ID=$(basename "$(dirname "$RESULT_FILE_PATH")")
+      RESULT_DIR=$(dirname "$RESULT_FILE_PATH")
+      RESULTS=$(cat "$RESULT_FILE_PATH")
+
+      if [[ -n "$GCS_PATH" ]]; then
+        GCS_RESULTS_URL="${GCS_PATH}/${RUN_ID}"
+        echo "Uploading results to GCS..."
+        if gcloud storage rsync --recursive "$RESULT_DIR/" "$GCS_RESULTS_URL"; then
+          echo "GCS upload successful."
+        else
+          echo "Warning: GCS upload failed for RUN_ID $RUN_ID."
+        fi
+      fi
+    else
+      echo "Warning: Could not find result file for a successful run."
+      STATUS="WARNING_NO_RESULT_FILE"
+    fi
+  fi
+
+  # Add the results back into the JSON object for this run
+  json_content=$(echo "$json_content" | jq --argjson i "$i" --arg run_id "$RUN_ID" --arg status "$STATUS" --arg results "$RESULTS" --arg gcs_results "$GCS_RESULTS_URL" \
+    '.[$i] += {run_id: $run_id, status: $status, results: $results, gcs_results: $gcs_results}')
+
+  RUN_END_TIME=$(date +%s)
+  echo "Run finished in $((RUN_END_TIME - RUN_START_TIME)) seconds. Status: $STATUS"
+  echo "--------------------------------------------------"
+
+  # Save intermediate progress back to the file
+  echo "$json_content" > "$INPUT_JSON"
+
+done
+
+SCRIPT_END_TIME=$(date +%s)
+echo "All benchmark runs completed in $((SCRIPT_END_TIME - SCRIPT_START_TIME)) seconds."
+echo
+echo "====================== SUMMARY ======================"
+echo "Successful runs: $SUCCESS_COUNT"
+echo "Failed runs:     $FAILURE_COUNT"
+echo "==================================================="
+
+if [[ $FAILURE_COUNT -gt 0 ]]; then
+  echo "Details of failed runs (see JSON file for full parameters):"
+  for failed in "${FAILED_RUNS[@]}"; do
+    echo "  - $failed"
+  done
+fi
+
+echo "Updated results have been saved to '$INPUT_JSON'."

From 0b2cd2b12aedc69e23509c818b31e9230c40f5c3 Mon Sep 17 00:00:00 2001
From: Karan Goel <3261985+karan@users.noreply.github.com>
Date: Wed, 17 Sep 2025 07:59:11 -0700
Subject: [PATCH 2/7] Update benchmarks/auto_tune/batch_auto_tune.sh

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Signed-off-by: Karan Goel <3261985+karan@users.noreply.github.com>
---
 benchmarks/auto_tune/batch_auto_tune.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh
index 7351b0b0855b..78de598624bf 100755
--- a/benchmarks/auto_tune/batch_auto_tune.sh
+++ b/benchmarks/auto_tune/batch_auto_tune.sh
@@ -102,7 +102,7 @@ for i in $(seq 0 $(($num_runs - 1))); do
   echo "--------------------------------------------------"
 
   # Save intermediate progress back to the file
-  echo "$json_content" > "$INPUT_JSON"
+  echo "$json_content" > "$INPUT_JSON.tmp" && mv "$INPUT_JSON.tmp" "$INPUT_JSON"
 
 done
 

From 4e04aa143f33cd5db51d3773bf734e4ccf2f7432 Mon Sep 17 00:00:00 2001
From: Karan Goel <3261985+karan@users.noreply.github.com>
Date: Wed, 17 Sep 2025 07:59:37 -0700
Subject: [PATCH 3/7] Update benchmarks/auto_tune/batch_auto_tune.sh

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Signed-off-by: Karan Goel <3261985+karan@users.noreply.github.com>
---
 benchmarks/auto_tune/batch_auto_tune.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh
index 78de598624bf..329dd3e04cd0 100755
--- a/benchmarks/auto_tune/batch_auto_tune.sh
+++ b/benchmarks/auto_tune/batch_auto_tune.sh
@@ -3,7 +3,8 @@
 INPUT_JSON="$1"
 GCS_PATH="$2" # Optional GCS path for uploading results for each run
 
-AUTOTUNE_SCRIPT="auto_tune.sh"
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+AUTOTUNE_SCRIPT="$SCRIPT_DIR/auto_tune.sh"
 
 if [[ -z "$INPUT_JSON" ]]; then
   echo "Error: Input JSON file not provided."

From 1069544b4fca2e8974025f3a651841e8b6a23da0 Mon Sep 17 00:00:00 2001
From: Karan Goel <3261985+karan@users.noreply.github.com>
Date: Wed, 17 Sep 2025 07:59:48 -0700
Subject: [PATCH 4/7] Update benchmarks/auto_tune/batch_auto_tune.sh

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Signed-off-by: Karan Goel <3261985+karan@users.noreply.github.com>
---
 benchmarks/auto_tune/batch_auto_tune.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh
index 329dd3e04cd0..57ef20daf6b7 100755
--- a/benchmarks/auto_tune/batch_auto_tune.sh
+++ b/benchmarks/auto_tune/batch_auto_tune.sh
@@ -33,7 +33,10 @@ FAILED_RUNS=()
 SCRIPT_START_TIME=$(date +%s)
 
 json_content=$(cat "$INPUT_JSON")
-num_runs=$(echo "$json_content" | jq 'length')
+if ! num_runs=$(echo "$json_content" | jq 'length'); then
+  echo "Error: Invalid JSON in $INPUT_JSON. 'jq' failed to get array length." >&2
+  exit 1
+fi
 
 echo "Found $num_runs benchmark configurations in $INPUT_JSON."
 echo "Starting benchmark runs..."

From 5c68c3a29e28d79c319677c248451c7220a194fb Mon Sep 17 00:00:00 2001
From: Karan Goel <karangoel@google.com>
Date: Wed, 17 Sep 2025 15:49:38 +0000
Subject: [PATCH 5/7] Fix markdown linting issues with list items

Signed-off-by: Karan Goel <karangoel@google.com>
---
 benchmarks/auto_tune/README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/benchmarks/auto_tune/README.md b/benchmarks/auto_tune/README.md
index bf57424091ba..d841162b820a 100644
--- a/benchmarks/auto_tune/README.md
+++ b/benchmarks/auto_tune/README.md
@@ -161,16 +161,16 @@ The `batch_auto_tune.sh` script allows you to run multiple `auto_tune.sh` experi
 
 ### How to Run
 
-1.  **Create a JSON configuration file**: Create a file (e.g., `runs_config.json`) containing an array of JSON objects. Each object defines the parameters for a single `auto_tune.sh` run.
+1. **Create a JSON configuration file**: Create a file (e.g., `runs_config.json`) containing an array of JSON objects. Each object defines the parameters for a single `auto_tune.sh` run.
 
-2.  **Execute the script**:
+2. **Execute the script**:
 
     ```bash
     bash batch_auto_tune.sh <path_to_json_file> [gcs_upload_path]
     ```
 
-    -   `<path_to_json_file>`: **Required.** Path to your JSON configuration file.
-    -   `[gcs_upload_path]`: **Optional.** A GCS path (e.g., `gs://my-bucket/benchmark-results`) where the detailed results and profiles for each run will be uploaded.
+    - `<path_to_json_file>`: **Required.** Path to your JSON configuration file.
+    - `[gcs_upload_path]`: **Optional.** A GCS path (e.g., `gs://my-bucket/benchmark-results`) where the detailed results and profiles for each run will be uploaded.
 
 ### Configuration File
 
@@ -210,9 +210,9 @@ Here is an example `runs_config.json` with two benchmark configurations:
 
 The script modifies the input JSON file in place, adding the results of each run to the corresponding object. The following fields are added:
 
--   `run_id`: A unique identifier for the run, derived from the timestamp.
--   `status`: The outcome of the run (`SUCCESS`, `FAILURE`, or `WARNING_NO_RESULT_FILE`).
--   `results`: The content of the `result.txt` file from the `auto_tune.sh` run.
--   `gcs_results`: The GCS URL where the run's artifacts are stored (if a GCS path was provided).
+- `run_id`: A unique identifier for the run, derived from the timestamp.
+- `status`: The outcome of the run (`SUCCESS`, `FAILURE`, or `WARNING_NO_RESULT_FILE`).
+- `results`: The content of the `result.txt` file from the `auto_tune.sh` run.
+- `gcs_results`: The GCS URL where the run's artifacts are stored (if a GCS path was provided).
 
 A summary of successful and failed runs is also printed to the console upon completion.

From 5f70152f03f2170e5d85fc90712b559c230e87d6 Mon Sep 17 00:00:00 2001
From: Karan Goel <karangoel@google.com>
Date: Wed, 17 Sep 2025 18:34:01 +0000
Subject: [PATCH 6/7] Add a note about local results

Signed-off-by: Karan Goel <karangoel@google.com>
---
 benchmarks/auto_tune/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/auto_tune/README.md b/benchmarks/auto_tune/README.md
index d841162b820a..79af3ac124c1 100644
--- a/benchmarks/auto_tune/README.md
+++ b/benchmarks/auto_tune/README.md
@@ -170,7 +170,7 @@ The `batch_auto_tune.sh` script allows you to run multiple `auto_tune.sh` experi
     ```
 
     - `<path_to_json_file>`: **Required.** Path to your JSON configuration file.
-    - `[gcs_upload_path]`: **Optional.** A GCS path (e.g., `gs://my-bucket/benchmark-results`) where the detailed results and profiles for each run will be uploaded.
+    - `[gcs_upload_path]`: **Optional.** A GCS path (e.g., `gs://my-bucket/benchmark-results`) where the detailed results and profiles for each run will be uploaded. If this is empty, the results will be available on the local filesystem (see the log for `RESULT_FILE=/path/to/results/file.txt`).
 
 ### Configuration File
 

From fa088f7384111a602ed027c7ffa3db178720f259 Mon Sep 17 00:00:00 2001
From: Karan Goel <karangoel@google.com>
Date: Wed, 17 Sep 2025 21:53:55 +0000
Subject: [PATCH 7/7] Update readme for auto_tune

Signed-off-by: Karan Goel <karangoel@google.com>
---
 benchmarks/auto_tune/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/auto_tune/README.md b/benchmarks/auto_tune/README.md
index 79af3ac124c1..d1bdb4c43f10 100644
--- a/benchmarks/auto_tune/README.md
+++ b/benchmarks/auto_tune/README.md
@@ -183,7 +183,7 @@ Here is an example `runs_config.json` with two benchmark configurations:
   {
     "base": "/home/user",
     "model": "meta-llama/Llama-3.1-8B-Instruct",
-    "system": "TPU",
+    "system": "TPU", # OR GPU
     "tp": 8,
     "input_len": 128,
     "output_len": 2048,
@@ -194,7 +194,7 @@ Here is an example `runs_config.json` with two benchmark configurations:
   {
     "base": "/home/user",
     "model": "meta-llama/Llama-3.1-70B-Instruct",
-    "system": "TPU",
+    "system": "TPU", # OR GPU
     "tp": 8,
     "input_len": 4000,
     "output_len": 16,