Skip to content

Commit 09c7b73

Browse files
authored
feat: update benchmarking and deploy utils (#2933)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
1 parent 7dd872a commit 09c7b73

File tree

15 files changed

+209
-72
lines changed

15 files changed

+209
-72
lines changed

benchmarks/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ The benchmarking framework supports:
6161
- Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models
6262
- Automated performance plot generation with custom labels
6363

64+
**Sequential GPU Usage:**
65+
- Models are deployed and benchmarked **sequentially**, not in parallel
66+
- Each deployment gets exclusive access to all available GPUs during its benchmark run
67+
- Ensures accurate performance measurements and fair comparison across configurations
68+
6469
**Supported Backends:**
6570
- DynamoGraphDeployments
6671
- External HTTP endpoints (for comparison with non-Dynamo backends)

benchmarks/benchmark.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
1111

1212
# Configuration - all set via command line arguments
1313
NAMESPACE=""
14-
MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
14+
MODEL="Qwen/Qwen3-0.6B"
1515
ISL=2000
1616
STD=10
1717
OSL=256
@@ -46,7 +46,7 @@ REQUIRED:
4646
4747
OPTIONS:
4848
-h, --help Show this help message
49-
-m, --model MODEL Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
49+
-m, --model MODEL Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B)
5050
NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints.
5151
-i, --isl LENGTH Input sequence length (default: $ISL)
5252
-s, --std STDDEV Input sequence standard deviation (default: $STD)

benchmarks/profiler/deploy/profile_sla_job.yaml

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ spec:
2929
command: ["python", "-m", "benchmarks.profiler.profile_sla"]
3030
args:
3131
- --config
32-
- /workspace/configs/disagg.yaml
32+
- /data/configs/disagg.yaml
3333
- --output-dir
34-
- /workspace/profiling_results
34+
- /data/profiling_results
3535
- --namespace
3636
- ${NAMESPACE}
3737
- --backend
@@ -50,15 +50,10 @@ spec:
5050
- "20"
5151
volumeMounts:
5252
- name: output-volume
53-
mountPath: /workspace/profiling_results
54-
- name: configs
55-
mountPath: /workspace/configs
53+
mountPath: /data
5654
restartPolicy: Never
5755
volumes:
5856
- name: output-volume
5957
persistentVolumeClaim:
6058
claimName: dynamo-pvc
61-
- name: configs
62-
persistentVolumeClaim:
63-
claimName: dynamo-pvc
6459
backoffLimit: 0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0

benchmarks/utils/benchmark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,17 @@ def main() -> int:
5454
help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
5555
)
5656
parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
57-
parser.add_argument("--isl", type=int, default=200, help="Input sequence length")
57+
parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
5858
parser.add_argument(
5959
"--std",
6060
type=int,
6161
default=10,
6262
help="Input sequence standard deviation",
6363
)
64-
parser.add_argument("--osl", type=int, default=200, help="Output sequence length")
64+
parser.add_argument("--osl", type=int, default=256, help="Output sequence length")
6565
parser.add_argument(
6666
"--model",
67-
default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
67+
default="Qwen/Qwen3-0.6B",
6868
help="Model name",
6969
)
7070
parser.add_argument(

components/backends/sglang/deploy/disagg_planner.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ spec:
4848
pvc:
4949
create: false
5050
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
51-
mountPoint: /workspace/profiling_results
51+
mountPoint: /data/profiling_results
5252
extraPodSpec:
5353
mainContainer:
5454
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
@@ -62,7 +62,7 @@ spec:
6262
--environment=kubernetes
6363
--backend=sglang
6464
--adjustment-interval=60
65-
--profile-results-dir=/workspace/profiling_results
65+
--profile-results-dir=/data/profiling_results
6666
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
6767
dynamoNamespace: dynamo
6868
componentType: frontend

components/backends/vllm/deploy/disagg_planner.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ spec:
4848
pvc:
4949
create: false
5050
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
51-
mountPoint: /workspace/profiling_results
51+
mountPoint: /data/profiling_results
5252
extraPodSpec:
5353
mainContainer:
5454
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
@@ -62,7 +62,7 @@ spec:
6262
--environment=kubernetes
6363
--backend=vllm
6464
--adjustment-interval=60
65-
--profile-results-dir=/workspace/profiling_results
65+
--profile-results-dir=/data/profiling_results
6666
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
6767
dynamoNamespace: vllm-disagg-planner
6868
componentType: frontend

deploy/utils/README.md

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,23 +88,36 @@ These scripts interact with the Persistent Volume Claim (PVC) that stores config
8888

8989
```bash
9090
# The profiling job reads your DGD config from the PVC
91-
python3 deploy/utils/inject_manifest.py \
91+
# IMPORTANT: All paths must start with /data/ for security reasons
92+
python3 -m deploy.utils.inject_manifest \
9293
--namespace $NAMESPACE \
9394
--src ./my-disagg.yaml \
94-
--dest /configs/disagg.yaml
95+
--dest /data/configs/disagg.yaml
9596
```
9697

9798
**Download benchmark/profiling results:**
9899

99100
```bash
100101
# After benchmarking or profiling completes, download results
101-
python3 deploy/utils/download_pvc_results.py \
102+
python3 -m deploy.utils.download_pvc_results \
102103
--namespace $NAMESPACE \
103104
--output-dir ./pvc_files \
104-
--folder /results \
105+
--folder /data/results \
105106
--no-config # optional: skip *.yaml/*.yml in the download
106107
```
107108

109+
#### Path Requirements
110+
111+
**Important**: The PVC is mounted at `/data` in the access pod for security reasons. All destination paths must start with `/data/`.
112+
113+
**Common path patterns:**
114+
- `/data/configs/` - Configuration files (DGD manifests)
115+
- `/data/results/` - Benchmark results
116+
- `/data/profiling_results/` - Profiling data
117+
- `/data/benchmarking/` - Benchmarking artifacts
118+
119+
**User-friendly error messages**: If you forget the `/data/` prefix, the script will show a helpful error message with the correct path and example commands.
120+
108121
#### Next Steps
109122

110123
For complete benchmarking workflows:

deploy/utils/download_pvc_results.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
2424
Usage:
2525
python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory> \
26-
--folder </absolute/folder/in/pvc> [--no-config]
26+
--folder /data/<folder/in/pvc> [--no-config]
2727
"""
2828

2929
import argparse
@@ -36,7 +36,7 @@
3636
from deploy.utils.kubernetes import (
3737
check_kubectl_access,
3838
cleanup_access_pod,
39-
deploy_access_pod,
39+
ensure_clean_access_pod,
4040
run_command,
4141
)
4242
except ModuleNotFoundError:
@@ -46,7 +46,7 @@
4646
from deploy.utils.kubernetes import (
4747
check_kubectl_access,
4848
cleanup_access_pod,
49-
deploy_access_pod,
49+
ensure_clean_access_pod,
5050
run_command,
5151
)
5252

@@ -182,19 +182,30 @@ def main():
182182
parser.add_argument(
183183
"--folder",
184184
required=True,
185-
help="Absolute folder path in the PVC to download, e.g. /profiling_results or /benchmarking_results",
185+
help="Absolute folder path in the PVC to download, must start with /data/, e.g. /data/profiling_results or /data/benchmarking_results",
186186
)
187187

188188
args = parser.parse_args()
189189

190+
# Validate folder path starts with /data/
191+
if not args.folder.startswith("/data/"):
192+
print("❌ Error: Folder path must start with '/data/'")
193+
print(f" Provided: {args.folder}")
194+
print(" Quick Fix: Add '/data/' prefix to your path")
195+
print(" Examples:")
196+
print(" /profiling_results → /data/profiling_results")
197+
print(" /benchmarking_results → /data/benchmarking_results")
198+
print(" /configs → /data/configs")
199+
sys.exit(1)
200+
190201
print("📥 PVC Results Download")
191202
print("=" * 40)
192203

193204
# Validate inputs
194205
check_kubectl_access(args.namespace)
195206

196207
# Deploy access pod
197-
pod_name = deploy_access_pod(args.namespace)
208+
pod_name = ensure_clean_access_pod(args.namespace)
198209
try:
199210
# List and download files
200211
files = list_pvc_contents(args.namespace, pod_name, args.folder, args.no_config)

deploy/utils/inject_manifest.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,15 @@
2121
Copies any Kubernetes manifest file into the PVC for later use by jobs.
2222
Both the source manifest path and destination path in the PVC are required.
2323
24+
IMPORTANT: The PVC is mounted at /data in the access pod for security reasons.
25+
All destination paths must start with '/data/'.
26+
2427
Usage:
2528
python3 inject_manifest.py --namespace <namespace> --src <local_manifest.yaml> --dest <absolute_path_in_pvc>
2629
2730
Examples:
28-
python3 inject_manifest.py --namespace <ns> --src ./my-disagg.yaml --dest /configs/disagg.yaml
29-
python3 inject_manifest.py --namespace <ns> --src ./my-agg.yaml --dest /configs/agg.yaml
31+
python3 inject_manifest.py --namespace <ns> --src ./disagg.yaml --dest /data/configs/disagg.yaml
32+
python3 inject_manifest.py --namespace <ns> --src ./my-data.yaml --dest /data/custom/path/data.yaml
3033
"""
3134

3235
import argparse
@@ -37,7 +40,7 @@
3740
PVC_ACCESS_POD_NAME,
3841
check_kubectl_access,
3942
cleanup_access_pod,
40-
deploy_access_pod,
43+
ensure_clean_access_pod,
4144
run_command,
4245
)
4346

@@ -100,16 +103,39 @@ def main():
100103
parser.add_argument(
101104
"--dest",
102105
required=True,
103-
help="Absolute target path in PVC (e.g., /profiling_results/agg.yaml)",
106+
help="Absolute target path in PVC (must start with /data/, e.g., /data/configs/agg.yaml)",
104107
)
105108

106109
args = parser.parse_args()
107110

108-
# Validate target_path to prevent directory traversal
109-
if not args.dest.startswith("/"):
110-
print(
111-
"ERROR: Target path must be an absolute path inside the PVC (start with '/')."
112-
)
111+
# Validate target_path to prevent directory traversal and ensure it's within PVC
112+
if not args.dest.startswith("/data/"):
113+
print("=" * 60)
114+
print("❌ ERROR: Invalid target path")
115+
print("=" * 60)
116+
print("The PVC is mounted at /data in the access pod.")
117+
print("All paths must start with '/data/' for security reasons.")
118+
print("")
119+
print("💡 QUICK FIX:")
120+
if args.dest.startswith("/"):
121+
# Suggest the fix
122+
suggested_path = f"/data{args.dest}"
123+
print(f" Change: {args.dest}")
124+
print(f" To: {suggested_path}")
125+
print("")
126+
print("📝 Example commands:")
127+
print(" python3 -m deploy.utils.inject_manifest \\")
128+
print(f" --namespace {args.namespace} \\")
129+
print(f" --src {args.src} \\")
130+
print(f" --dest {suggested_path}")
131+
else:
132+
print(f" Use: /data/{args.dest.lstrip('/')}")
133+
print("")
134+
print("🔍 Common patterns:")
135+
print(" /configs/file.yaml → /data/configs/file.yaml")
136+
print(" /results/data.yaml → /data/results/data.yaml")
137+
print(" /profiling_results/... → /data/profiling_results/...")
138+
print("=" * 60)
113139
sys.exit(1)
114140

115141
if ".." in args.dest:
@@ -123,7 +149,7 @@ def main():
123149
check_kubectl_access(args.namespace)
124150

125151
# Deploy access pod
126-
deploy_access_pod(args.namespace)
152+
ensure_clean_access_pod(args.namespace)
127153
try:
128154
# Copy manifest
129155
copy_manifest(args.namespace, args.src, args.dest)

0 commit comments

Comments
 (0)