ai-dynamo · tmonty12 · Sep 19, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025
@@ -29,7 +29,7 @@ spec:
         command: ["python", "-m", "benchmarks.profiler.profile_sla"]
         args:
           - --config
-          - /data/configs/disagg.yaml
+          - ${DGD_CONFIG_FILE}
           - --output-dir
           - /data/profiling_results
           - --namespace

diff --git a/components/backends/sglang/deploy/disagg_planner.yaml b/components/backends/sglang/deploy/disagg_planner.yaml
@@ -48,7 +48,7 @@ spec:
       pvc:
         create: false
         name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /data/profiling_results
+        mountPoint: /data
       extraPodSpec:
         mainContainer:
           image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1

diff --git a/components/backends/trtllm/deploy/disagg_planner.yaml b/components/backends/trtllm/deploy/disagg_planner.yaml
@@ -66,7 +66,7 @@ spec:
       pvc:
         create: false
         name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /workspace/profiling_results
+        mountPoint: /data
       extraPodSpec:
         mainContainer:
           image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
@@ -82,7 +82,7 @@ spec:
             - --environment=kubernetes
             - --backend=trtllm
             - --adjustment-interval=60
-            - --profile-results-dir=/workspace/profiling_results
+            - --profile-results-dir=/data/profiling_results
             - --prometheus-port=9085
     Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
       dynamoNamespace: trtllm-disagg-planner

diff --git a/components/backends/vllm/deploy/disagg_planner.yaml b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -13,6 +13,8 @@ spec:
       value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
     - name: DYNAMO_NAMESPACE
       value: "vllm-disagg-planner"
+    - name: PROMETHEUS_PORT
+      value: "8000"
   services:
     Frontend:
       dynamoNamespace: vllm-disagg-planner
@@ -48,7 +50,7 @@ spec:
       pvc:
         create: false
         name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /data/profiling_results
+        mountPoint: /data
       extraPodSpec:
         mainContainer:
           image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1

@@ -70,6 +70,12 @@ log "Applying benchmarking manifests to namespace $NAMESPACE"
 export NAMESPACE  # ensure envsubst can see it
 for mf in "$(dirname "$0")/manifests"/*.yaml; do
   if [[ -f "$mf" ]]; then
+    # Skip pvc-access-pod.yaml as it's managed by inject_manifest.py
+    if [[ "$(basename "$mf")" == "pvc-access-pod.yaml" ]]; then
+      log "Skipping $mf (managed by inject_manifest.py)"
+      continue
+    fi
+
     if command -v envsubst >/dev/null 2>&1; then
       envsubst < "$mf" | kubectl -n "$NAMESPACE" apply -f -
     else

diff --git a/docs/benchmarks/pre_deployment_profiling.md b/docs/benchmarks/pre_deployment_profiling.md
@@ -100,31 +100,6 @@ pip install -r deploy/utils/requirements.txt
 
 Use the injector utility to place your DGD manifest into the PVC. The profiling job will read the path you specify.
 
-```bash
-# Inject your disagg manifest
-python3 -m deploy.utils.inject_manifest \
-  --namespace $NAMESPACE \
-  --src components/backends/vllm/deploy/disagg.yaml \
-  --dest /data/configs/disagg.yaml
-
-# Set the docker image for the profiling job; any docker image that contains your script.
-export DOCKER_IMAGE=nvcr.io/nvidia/dynamo:latest-vllm
-```
-
-### Configure container image (optional)
-
-You have two options for configuring your profiling setup:
-
-**Option A: Use pre-built image with custom config injection (recommended)**
-
-Use the default pre-built image and inject custom configurations via PVC:
-
-1. **Set the container image:**
-   ```bash
-   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag
-   ```
-
-2. **Inject your custom disagg configuration:**
    ```bash
    # Use default disagg.yaml config
    python3 -m deploy.utils.inject_manifest --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /data/configs/disagg.yaml
@@ -138,16 +113,6 @@ Use the default pre-built image and inject custom configurations via PVC:
 
    > **Note**: All paths must start with `/data/` for security reasons. If you forget this prefix, the script will show a helpful error message with the correct path.
 
-3. **Set the config path for the profiling job:**
-   ```bash
-   export DGD_CONFIG_FILE=/workspace/profiling_results/disagg.yaml # or your custom path
-   ```
-
-This approach allows you to:
-- Customize DGD configurations without rebuilding container images
-- Test different model configurations easily
-- Version control your DGD configs alongside your code
-
 > **Important**: For profiling, disagg configs should be run with Grove disabled by adding the annotation `nvidia.com/enable-grove: "false"` to avoid alpha Grove status issues.
 
 **Step 2: Set SLA target**
@@ -173,13 +138,25 @@ spec:
             - <vllm/sglang>
 ```
 
-**Step 3: Run profiling (required)**
+**Step 3: Define the container image and config path**
+
+1. **Set the container image:**
+   ```bash
+   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
+   ```
+
+3. **Set the config path for the profiling job:**
+   ```bash
+   export DGD_CONFIG_FILE=/data/configs/disagg.yaml # should be the same path you set for --dest in Step 1
+   ```
+
+**Step 4: Run profiling (required)**
 
 ```bash
 envsubst < benchmarks/profiler/deploy/profile_sla_job.yaml | kubectl apply -f -
 ```
 
-**Step 4: Wait for profiling to complete**
+**Step 5: Wait for profiling to complete**
 ```bash
 kubectl get jobs -n $NAMESPACE
 kubectl logs job/profile-sla -n $NAMESPACE