feat: update benchmarking and deploy utils (#2933)

hhzhang16 · web-flow · commit 09c7b73caf8e · 2025-09-08T22:53:55.000-04:00
Signed-off-by: Hannah Zhang &lt;hannahz@nvidia.com&gt;
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -61,6 +61,11 @@ The benchmarking framework supports:
 - Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models
 - Automated performance plot generation with custom labels
 
+**Sequential GPU Usage:**
+- Models are deployed and benchmarked **sequentially**, not in parallel
+- Each deployment gets exclusive access to all available GPUs during its benchmark run
+- Ensures accurate performance measurements and fair comparison across configurations
+
 **Supported Backends:**
 - DynamoGraphDeployments
 - External HTTP endpoints (for comparison with non-Dynamo backends)
diff --git a/benchmarks/benchmark.sh b/benchmarks/benchmark.sh
@@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
 
 # Configuration - all set via command line arguments
 NAMESPACE=""
-MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+MODEL="Qwen/Qwen3-0.6B"
 ISL=2000
 STD=10
 OSL=256
@@ -46,7 +46,7 @@ REQUIRED:
 
 OPTIONS:
     -h, --help                    Show this help message
-    -m, --model MODEL             Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
+    -m, --model MODEL             Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B)
                                   NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints.
     -i, --isl LENGTH              Input sequence length (default: $ISL)
     -s, --std STDDEV              Input sequence standard deviation (default: $STD)
diff --git a/benchmarks/profiler/deploy/profile_sla_job.yaml b/benchmarks/profiler/deploy/profile_sla_job.yaml
@@ -29,9 +29,9 @@ spec:
         command: ["python", "-m", "benchmarks.profiler.profile_sla"]
         args:
           - --config
-          - /workspace/configs/disagg.yaml
+          - /data/configs/disagg.yaml
           - --output-dir
-          - /workspace/profiling_results
+          - /data/profiling_results
           - --namespace
           - ${NAMESPACE}
           - --backend
@@ -50,15 +50,10 @@ spec:
           - "20"
         volumeMounts:
           - name: output-volume
-            mountPath: /workspace/profiling_results
-          - name: configs
-            mountPath: /workspace/configs
+            mountPath: /data
       restartPolicy: Never
       volumes:
         - name: output-volume
           persistentVolumeClaim:
             claimName: dynamo-pvc
-        - name: configs
-          persistentVolumeClaim:
-            claimName: dynamo-pvc
   backoffLimit: 0
diff --git a/benchmarks/profiler/utils/__init__.py b/benchmarks/profiler/utils/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/benchmarks/utils/benchmark.py b/benchmarks/utils/benchmark.py
@@ -54,17 +54,17 @@ def main() -> int:
         help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
     )
     parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
-    parser.add_argument("--isl", type=int, default=200, help="Input sequence length")
+    parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
     parser.add_argument(
         "--std",
         type=int,
         default=10,
         help="Input sequence standard deviation",
     )
-    parser.add_argument("--osl", type=int, default=200, help="Output sequence length")
+    parser.add_argument("--osl", type=int, default=256, help="Output sequence length")
     parser.add_argument(
         "--model",
-        default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        default="Qwen/Qwen3-0.6B",
         help="Model name",
     )
     parser.add_argument(
diff --git a/components/backends/sglang/deploy/disagg_planner.yaml b/components/backends/sglang/deploy/disagg_planner.yaml
@@ -48,7 +48,7 @@ spec:
       pvc:
         create: false
         name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /workspace/profiling_results
+        mountPoint: /data/profiling_results
       extraPodSpec:
         mainContainer:
           image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
@@ -62,7 +62,7 @@ spec:
               --environment=kubernetes
               --backend=sglang
               --adjustment-interval=60
-              --profile-results-dir=/workspace/profiling_results
+              --profile-results-dir=/data/profiling_results
     Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
       dynamoNamespace: dynamo
       componentType: frontend
diff --git a/components/backends/vllm/deploy/disagg_planner.yaml b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -48,7 +48,7 @@ spec:
       pvc:
         create: false
         name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /workspace/profiling_results
+        mountPoint: /data/profiling_results
       extraPodSpec:
         mainContainer:
           image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
@@ -62,7 +62,7 @@ spec:
               --environment=kubernetes
               --backend=vllm
               --adjustment-interval=60
-              --profile-results-dir=/workspace/profiling_results
+              --profile-results-dir=/data/profiling_results
     Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
       dynamoNamespace: vllm-disagg-planner
       componentType: frontend
diff --git a/deploy/utils/README.md b/deploy/utils/README.md
@@ -88,23 +88,36 @@ These scripts interact with the Persistent Volume Claim (PVC) that stores config
 
 ```bash
 # The profiling job reads your DGD config from the PVC
-python3 deploy/utils/inject_manifest.py \
+# IMPORTANT: All paths must start with /data/ for security reasons
+python3 -m deploy.utils.inject_manifest \
   --namespace $NAMESPACE \
   --src ./my-disagg.yaml \
-  --dest /configs/disagg.yaml
+  --dest /data/configs/disagg.yaml
 ```
 
 **Download benchmark/profiling results:**
 
 ```bash
 # After benchmarking or profiling completes, download results
-python3 deploy/utils/download_pvc_results.py \
+python3 -m deploy.utils.download_pvc_results \
   --namespace $NAMESPACE \
   --output-dir ./pvc_files \
-  --folder /results \
+  --folder /data/results \
   --no-config   # optional: skip *.yaml/*.yml in the download
 ```
 
+#### Path Requirements
+
+**Important**: The PVC is mounted at `/data` in the access pod for security reasons. All destination paths must start with `/data/`.
+
+**Common path patterns:**
+- `/data/configs/` - Configuration files (DGD manifests)
+- `/data/results/` - Benchmark results
+- `/data/profiling_results/` - Profiling data
+- `/data/benchmarking/` - Benchmarking artifacts
+
+**User-friendly error messages**: If you forget the `/data/` prefix, the script will show a helpful error message with the correct path and example commands.
+
 #### Next Steps
 
 For complete benchmarking workflows:
diff --git a/deploy/utils/download_pvc_results.py b/deploy/utils/download_pvc_results.py
@@ -23,7 +23,7 @@
 
 Usage:
     python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory> \
-        --folder </absolute/folder/in/pvc> [--no-config]
+        --folder /data/<folder/in/pvc> [--no-config]
 """
 
 import argparse
@@ -36,7 +36,7 @@
     from deploy.utils.kubernetes import (
         check_kubectl_access,
         cleanup_access_pod,
-        deploy_access_pod,
+        ensure_clean_access_pod,
         run_command,
     )
 except ModuleNotFoundError:
@@ -46,7 +46,7 @@
     from deploy.utils.kubernetes import (
         check_kubectl_access,
         cleanup_access_pod,
-        deploy_access_pod,
+        ensure_clean_access_pod,
         run_command,
     )
 
@@ -182,19 +182,30 @@ def main():
     parser.add_argument(
         "--folder",
         required=True,
-        help="Absolute folder path in the PVC to download, e.g. /profiling_results or /benchmarking_results",
+        help="Absolute folder path in the PVC to download, must start with /data/, e.g. /data/profiling_results or /data/benchmarking_results",
     )
 
     args = parser.parse_args()
 
+    # Validate folder path starts with /data/
+    if not args.folder.startswith("/data/"):
+        print("❌ Error: Folder path must start with '/data/'")
+        print(f"   Provided: {args.folder}")
+        print("   Quick Fix: Add '/data/' prefix to your path")
+        print("   Examples:")
+        print("     /profiling_results → /data/profiling_results")
+        print("     /benchmarking_results → /data/benchmarking_results")
+        print("     /configs → /data/configs")
+        sys.exit(1)
+
     print("📥 PVC Results Download")
     print("=" * 40)
 
     # Validate inputs
     check_kubectl_access(args.namespace)
 
     # Deploy access pod
-    pod_name = deploy_access_pod(args.namespace)
+    pod_name = ensure_clean_access_pod(args.namespace)
     try:
         # List and download files
         files = list_pvc_contents(args.namespace, pod_name, args.folder, args.no_config)
diff --git a/deploy/utils/inject_manifest.py b/deploy/utils/inject_manifest.py
@@ -21,12 +21,15 @@
 Copies any Kubernetes manifest file into the PVC for later use by jobs.
 Both the source manifest path and destination path in the PVC are required.
 
+IMPORTANT: The PVC is mounted at /data in the access pod for security reasons.
+All destination paths must start with '/data/'.
+
 Usage:
     python3 inject_manifest.py --namespace <namespace> --src <local_manifest.yaml> --dest <absolute_path_in_pvc>
 
 Examples:
-    python3 inject_manifest.py --namespace <ns> --src ./my-disagg.yaml --dest /configs/disagg.yaml
-    python3 inject_manifest.py --namespace <ns> --src ./my-agg.yaml    --dest /configs/agg.yaml
+    python3 inject_manifest.py --namespace <ns> --src ./disagg.yaml --dest /data/configs/disagg.yaml
+    python3 inject_manifest.py --namespace <ns> --src ./my-data.yaml    --dest /data/custom/path/data.yaml
 """
 
 import argparse
@@ -37,7 +40,7 @@
     PVC_ACCESS_POD_NAME,
     check_kubectl_access,
     cleanup_access_pod,
-    deploy_access_pod,
+    ensure_clean_access_pod,
     run_command,
 )
 
@@ -100,16 +103,39 @@ def main():
     parser.add_argument(
         "--dest",
         required=True,
-        help="Absolute target path in PVC (e.g., /profiling_results/agg.yaml)",
+        help="Absolute target path in PVC (must start with /data/, e.g., /data/configs/agg.yaml)",
     )
 
     args = parser.parse_args()
 
-    # Validate target_path to prevent directory traversal
-    if not args.dest.startswith("/"):
-        print(
-            "ERROR: Target path must be an absolute path inside the PVC (start with '/')."
-        )
+    # Validate target_path to prevent directory traversal and ensure it's within PVC
+    if not args.dest.startswith("/data/"):
+        print("=" * 60)
+        print("❌ ERROR: Invalid target path")
+        print("=" * 60)
+        print("The PVC is mounted at /data in the access pod.")
+        print("All paths must start with '/data/' for security reasons.")
+        print("")
+        print("💡 QUICK FIX:")
+        if args.dest.startswith("/"):
+            # Suggest the fix
+            suggested_path = f"/data{args.dest}"
+            print(f"  Change: {args.dest}")
+            print(f"  To:     {suggested_path}")
+            print("")
+            print("📝 Example commands:")
+            print("  python3 -m deploy.utils.inject_manifest \\")
+            print(f"    --namespace {args.namespace} \\")
+            print(f"    --src {args.src} \\")
+            print(f"    --dest {suggested_path}")
+        else:
+            print(f"  Use: /data/{args.dest.lstrip('/')}")
+        print("")
+        print("🔍 Common patterns:")
+        print("  /configs/file.yaml     → /data/configs/file.yaml")
+        print("  /results/data.yaml     → /data/results/data.yaml")
+        print("  /profiling_results/... → /data/profiling_results/...")
+        print("=" * 60)
         sys.exit(1)
 
     if ".." in args.dest:
@@ -123,7 +149,7 @@ def main():
     check_kubectl_access(args.namespace)
 
     # Deploy access pod
-    deploy_access_pod(args.namespace)
+    ensure_clean_access_pod(args.namespace)
     try:
         # Copy manifest
         copy_manifest(args.namespace, args.src, args.dest)
diff --git a/deploy/utils/kubernetes.py b/deploy/utils/kubernetes.py
diff --git a/deploy/utils/manifests/pvc-access-pod.yaml b/deploy/utils/manifests/pvc-access-pod.yaml
diff --git a/deploy/utils/setup_k8s_namespace.sh b/deploy/utils/setup_k8s_namespace.sh
diff --git a/docs/benchmarks/benchmarking.md b/docs/benchmarks/benchmarking.md
diff --git a/docs/benchmarks/pre_deployment_profiling.md b/docs/benchmarks/pre_deployment_profiling.md

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
	`2`	`+# SPDX-License-Identifier: Apache-2.0`