11#! /bin/bash
22set -xe
33
4+ # Parse command line arguments
5+ KV_BUFFER_DEVICE=" cuda" # Default to cuda
6+ while [[ $# -gt 0 ]]; do
7+ case $1 in
8+ --kv_buffer_device)
9+ KV_BUFFER_DEVICE=" $2 "
10+ shift 2
11+ ;;
12+ * )
13+ echo " Unknown option $1 "
14+ echo " Usage: $0 [--kv_buffer_device <cuda|cpu>]"
15+ exit 1
16+ ;;
17+ esac
18+ done
19+
20+ echo " Running accuracy tests with kv_buffer_device=$KV_BUFFER_DEVICE "
21+
22+ # Build the kv-transfer-config once
23+ if [[ " $KV_BUFFER_DEVICE " == " cuda" ]]; then
24+ KV_CONFIG=' {"kv_connector":"NixlConnector","kv_role":"kv_both"}'
25+ else
26+ KV_CONFIG=" {\" kv_connector\" :\" NixlConnector\" ,\" kv_role\" :\" kv_both\" ,\" kv_buffer_device\" :\" $KV_BUFFER_DEVICE \" }"
27+ fi
28+
429# Models to run
530MODELS=(
631 " Qwen/Qwen3-0.6B"
@@ -79,7 +104,7 @@ run_tests_for_model() {
79104
80105 # Calculate port number (base port + instance number)
81106 PORT=$(( 8100 + i))
82- # Calculate side channel port. Avoid clash with with TP workers.
107+ # Calculate side channel port. Avoid clash with with TP workers.
83108 SIDE_CHANNEL_PORT=$(( 5559 + i))
84109
85110 echo " Starting prefill instance $i on GPU $GPU_ID , port $PORT "
@@ -90,7 +115,7 @@ run_tests_for_model() {
90115 --enforce-eager \
91116 --gpu-memory-utilization 0.2 \
92117 --tensor-parallel-size $PREFILLER_TP_SIZE \
93- --kv-transfer-config '{ \" kv_connector \" : \" NixlConnector \" , \" kv_role \" : \" kv_both \" } '"
118+ --kv-transfer-config '$KV_CONFIG '"
94119
95120 if [ -n " $model_args " ]; then
96121 FULL_CMD=" $BASE_CMD $model_args "
@@ -122,7 +147,7 @@ run_tests_for_model() {
122147 --enforce-eager \
123148 --gpu-memory-utilization 0.2 \
124149 --tensor-parallel-size $DECODER_TP_SIZE \
125- --kv-transfer-config '{ \" kv_connector \" : \" NixlConnector \" , \" kv_role \" : \" kv_both \" } '"
150+ --kv-transfer-config '$KV_CONFIG '"
126151
127152 if [ -n " $model_args " ]; then
128153 FULL_CMD=" $BASE_CMD $model_args "
0 commit comments