From fe4c0a307249cbd87f73891f1644508ef546f4f7 Mon Sep 17 00:00:00 2001 From: Weida Hong Date: Tue, 26 Aug 2025 17:38:39 +0000 Subject: [PATCH] [Misc] Reduce initialization time of auto_tune Each iteration of OOM execution when finding max available memory-utilization results in 10 min delay. Try to detect crashed situation and continue to next iteration efficiently. Signed-off-by: Weida Hong --- benchmarks/auto_tune/auto_tune.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh index 82c20ffa6554..a9de3f0aa9b2 100644 --- a/benchmarks/auto_tune/auto_tune.sh +++ b/benchmarks/auto_tune/auto_tune.sh @@ -87,10 +87,15 @@ start_server() { VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 \ vllm serve "${common_args_array[@]}" > "$vllm_log" 2>&1 & fi + local server_pid=$! # wait for 10 minutes... server_started=0 for i in {1..60}; do + # This line checks whether the server is still alive or not, + # since that we should always have permission to send signal to the server process. + kill -0 $server_pid 2> /dev/null || break + RESPONSE=$(curl -s -X GET "http://0.0.0.0:8004/health" -w "%{http_code}" -o /dev/stdout) STATUS_CODE=$(echo "$RESPONSE" | tail -n 1) if [[ "$STATUS_CODE" -eq 200 ]]; then @@ -102,7 +107,7 @@ start_server() { done if (( ! server_started )); then - echo "server did not start within 10 minutes. Please check server log at $vllm_log". + echo "server did not start within 10 minutes or crashed. Please check server log at $vllm_log". return 1 else return 0