Skip to content

Commit d934e41

Browse files
alec-flowersindrajit96
authored andcommitted
refactor: standardize e2e tests across 3 frameworks (#2827)
Signed-off-by: alec-flowers <aflowers@nvidia.com> Signed-off-by: Indrajit Bhosale <iamindrajitb@gmail.com>
1 parent 4a7d5ca commit d934e41

29 files changed

+1069
-933
lines changed

components/backends/vllm/launch/agg.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

77
# run ingress
8-
python -m dynamo.frontend &
8+
python -m dynamo.frontend --http-port=8000 &
99

1010
# run worker
1111
# --enforce-eager is added for quick deployment. for production use, need to remove this flag
12-
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --connector none
12+
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
13+
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --connector none

components/backends/vllm/launch/agg_lmcache.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

77
# run ingress
8-
python -m dynamo.frontend &
8+
python -m dynamo.frontend --http-port=8000 &
99

1010
# run worker with LMCache enabled
1111
ENABLE_LMCACHE=1 \

components/backends/vllm/launch/agg_router.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

77
# run ingress
8-
python -m dynamo.frontend --router-mode kv &
8+
python -m dynamo.frontend --router-mode kv --http-port=8000 &
99

1010
# run workers
1111
# --enforce-eager is added for quick deployment. for production use, need to remove this flag

components/backends/vllm/launch/dep.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

77
# run ingress
8-
python -m dynamo.frontend --router-mode kv &
8+
python -m dynamo.frontend --router-mode kv --http-port=8000 &
99

1010
# Data Parallel Attention / Expert Parallelism
1111
# Routing to DP workers managed by Dynamo

components/backends/vllm/launch/disagg.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

77
# run ingress
8-
python -m dynamo.frontend --router-mode kv &
8+
python -m dynamo.frontend --router-mode kv --http-port=8000 &
99

1010
# --enforce-eager is added for quick deployment. for production use, need to remove this flag
1111
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager &

components/backends/vllm/launch/disagg_lmcache.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

77
# run ingress with KV router
8-
python -m dynamo.frontend --router-mode kv &
8+
python -m dynamo.frontend --router-mode kv --http-port=8000 &
99

1010
# run decode worker on GPU 0, without enabling LMCache
1111
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B &

components/backends/vllm/launch/disagg_router.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set -e
66
trap 'echo Cleaning up...; kill 0' EXIT
77

88
# run ingress
9-
python -m dynamo.frontend --router-mode kv &
9+
python -m dynamo.frontend --router-mode kv --http-port=8000 &
1010

1111
# routing will happen between the two decode workers
1212
# --enforce-eager is added for quick deployment. for production use, need to remove this flag

components/backends/vllm/launch/dsr1_dep.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ trap 'echo Cleaning up...; kill 0' EXIT
8383

8484
# run ingress if it's node 0
8585
if [ $NODE_RANK -eq 0 ]; then
86-
DYN_LOG=debug python -m dynamo.frontend --router-mode kv 2>&1 | tee $LOG_DIR/dsr1_dep_ingress.log &
86+
DYN_LOG=debug python -m dynamo.frontend --router-mode kv --http-port=8000 2>&1 | tee $LOG_DIR/dsr1_dep_ingress.log &
8787
fi
8888

8989
mkdir -p $LOG_DIR

examples/multimodal/launch/agg.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ else
5353
fi
5454

5555
# run ingress
56-
python -m dynamo.frontend &
56+
python -m dynamo.frontend --http-port=8000 &
5757

5858
# run processor
5959
python3 components/processor.py --model $MODEL_NAME --prompt-template "$PROMPT_TEMPLATE" &

examples/multimodal/launch/agg_llama.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ trap 'echo Cleaning up...; kill 0' EXIT
88
MODEL_NAME="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
99

1010
# run ingress
11-
python -m dynamo.frontend &
11+
python -m dynamo.frontend --http-port=8000 &
1212

1313
# run processor
1414
python3 components/processor.py --model $MODEL_NAME --prompt-template "<|image|>\n<prompt>" &

0 commit comments

Comments
 (0)