1+ # SPDX-License-Identifier: Apache-2.0
2+ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+ import threading
5+ import time
6+ from http import HTTPStatus
7+
8+ import pytest
9+ import requests
10+
11+ from tests .utils import RemoteOpenAIServer
12+
13+ MODEL_NAME = "Qwen/Qwen3-0.6B"
14+
15+
16+ @pytest .fixture (scope = "class" )
17+ def server ():
18+ args = [
19+ "--enforce-eager" , "--max-model-len" , "100" ,
20+ "--gpu-memory-utilization" , "0.8"
21+ ]
22+
23+ with RemoteOpenAIServer (MODEL_NAME , args ) as remote_server :
24+ yield remote_server
25+
26+
27+ class TestHealth :
28+
29+ def test_health_basic (self , server : RemoteOpenAIServer ):
30+ """Test basic health check endpoint."""
31+ response = requests .get (server .url_for ("health" ))
32+ assert response .status_code == HTTPStatus .OK
33+
34+ def test_health_with_generate (self , server : RemoteOpenAIServer ):
35+ """Test health check with generate parameter."""
36+ response = requests .get (server .url_for ("health" ),
37+ params = {"generate" : "true" })
38+ assert response .status_code == HTTPStatus .OK
39+
40+ def test_health_with_running_query (self , server : RemoteOpenAIServer ):
41+ generation_errors : list [Exception ] = []
42+ start_event = threading .Event ()
43+ done_event = threading .Event ()
44+
45+ def _run_generate () -> None :
46+ try :
47+ client = server .get_client ()
48+ start_event .set ()
49+ client .completions .create (
50+ model = MODEL_NAME ,
51+ prompt = "Ping health endpoint" ,
52+ max_tokens = 50 ,
53+ temperature = 0.0 ,
54+ )
55+ except Exception as e :
56+ generation_errors .append (e )
57+ finally :
58+ done_event .set ()
59+
60+ generate_thread = threading .Thread (target = _run_generate , daemon = True )
61+ generate_thread .start ()
62+
63+ time .sleep (1 ) # Ensure the generation has started
64+ response = requests .get (server .url_for ("health" ),
65+ params = {"generate" : "true" })
66+ assert response .status_code == HTTPStatus .OK
67+
68+ assert start_event .wait (
69+ timeout = 10 ), "Generation thread failed to start"
70+ assert done_event .wait (timeout = 300 ), "Generation thread did not finish"
71+ generate_thread .join (timeout = 0 )
72+ if generation_errors :
73+ raise generation_errors [0 ]
0 commit comments