@@ -82,25 +82,46 @@ kubectl create secret docker-registry nvcr-imagepullsecret \
8282# in the project's root folder
8383./container/build.sh --framework VLLM
8484# Tag and push to your container registry
85+ export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.2 # or your own dynamoimage
86+ # NOTE: DGD_CONFIG_FILE is pointing to the location of the config file inside DOCKER_IMAGE
87+ export DGD_CONFIG_FILE=/workspace/components/backends/vllm/deploy/disagg.yaml # or your own disagg config file
8588```
8689
8790Replace the ` image ` within ` profile_sla_job.yaml ` with the tag of the image you pushed.
8891
89- ** Step 2: Run profiling (required)**
92+ ** Step 2: Set SLA target**
93+
94+ Edit ` $DYNAMO_HOME/benchmarks/profiler/deploy/profile_sla_job.yaml ` to set the target ISL, OSL, TTFT, and ITL.
95+
96+ ``` yaml
97+ spec :
98+ template :
99+ spec :
100+ containers :
101+ - name : profile-sla
102+ args :
103+ - --isl
104+ - 3000 # average ISL is 3000 tokens
105+ - --osl
106+ - 150 # average OSL is 150 tokens
107+ - --ttft
108+ - 200 # target TTFT is 200ms
109+ - --itl
110+ - 20 # target ITL is 20ms
111+ ` ` `
112+
113+ **Step 3: Run profiling (required)**
114+
90115` ` ` bash
91116cd $DYNAMO_HOME/benchmarks/profiler/deploy
92117envsubst < profiling_pvc.yaml | kubectl apply -f -
93118envsubst < profile_sla_sa.yaml | kubectl apply -f -
94119envsubst < profile_sla_rbac.yaml | kubectl apply -f -
95120envsubst < profile_sla_binding.yaml | kubectl apply -f -
96-
97- export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.3.2 # or your own image
98- # NOTE: DGD_CONFIG_FILE is pointing to the location of the config file inside DOCKER_IMAGE
99- export DGD_CONFIG_FILE=/workspace/components/backends/vllm/deploy/disagg.yaml # or your own disagg config file
100121envsubst < profile_sla_job.yaml | kubectl apply -f -
101122```
102123
103- ** Step 3 : Wait for profiling to complete**
124+ ** Step 4 : Wait for profiling to complete**
104125``` bash
105126kubectl get jobs -n $NAMESPACE
106127kubectl logs job/profile-sla -n $NAMESPACE
0 commit comments