@@ -59,6 +59,33 @@ def __init__(self, config: TRTLLMConfig, request):
5959 )
6060
6161
62+ def run_trtllm_test_case (config : TRTLLMConfig , request ) -> None :
63+ payload = create_payload_for_config (config )
64+
65+ with TRTLLMProcess (config , request ) as server_process :
66+ assert len (config .endpoints ) == len (config .response_handlers )
67+ for endpoint , response_handler in zip (
68+ config .endpoints , config .response_handlers
69+ ):
70+ url = f"http://localhost:{ server_process .port } /{ endpoint } "
71+ start_time = time .time ()
72+ elapsed = 0.0
73+
74+ request_body = (
75+ payload .payload_chat
76+ if endpoint == "v1/chat/completions"
77+ else payload .payload_completions
78+ )
79+
80+ for _ in range (payload .repeat_count ):
81+ elapsed = time .time () - start_time
82+
83+ response = server_process .send_request (
84+ url , payload = request_body , timeout = config .timeout - elapsed
85+ )
86+ server_process .check_response (payload , response , response_handler )
87+
88+
6289# trtllm test configurations
6390trtllm_configs = {
6491 "aggregated" : TRTLLMConfig (
@@ -147,33 +174,9 @@ def test_deployment(trtllm_config_test, request, runtime_services):
147174 logger .info ("Starting test_deployment" )
148175
149176 config = trtllm_config_test
150- payload = create_payload_for_config (config )
151-
152177 logger .info (f"Using model: { config .model } " )
153178 logger .info (f"Script: { config .script_name } " )
154-
155- with TRTLLMProcess (config , request ) as server_process :
156- assert len (config .endpoints ) == len (config .response_handlers )
157- for endpoint , response_handler in zip (
158- config .endpoints , config .response_handlers
159- ):
160- url = f"http://localhost:{ server_process .port } /{ endpoint } "
161- start_time = time .time ()
162- elapsed = 0.0
163-
164- request_body = (
165- payload .payload_chat
166- if endpoint == "v1/chat/completions"
167- else payload .payload_completions
168- )
169-
170- for _ in range (payload .repeat_count ):
171- elapsed = time .time () - start_time
172-
173- response = server_process .send_request (
174- url , payload = request_body , timeout = config .timeout - elapsed
175- )
176- server_process .check_response (payload , response , response_handler )
179+ run_trtllm_test_case (config , request )
177180
178181
179182@pytest .mark .e2e
@@ -335,3 +338,34 @@ def log_output():
335338 except subprocess .TimeoutExpired :
336339 process .kill ()
337340 process .wait ()
341+
342+
343+ @pytest .mark .e2e
344+ @pytest .mark .gpu_1
345+ @pytest .mark .trtllm_marker
346+ @pytest .mark .slow
347+ def test_chat_only_aggregated_with_test_logits_processor (
348+ request , runtime_services , monkeypatch
349+ ):
350+ """
351+ Run a single aggregated chat-completions test using Qwen 0.6B with the
352+ test logits processor enabled, and expect "Hello world" in the response.
353+ """
354+
355+ # Enable HelloWorld logits processor only for this test
356+ monkeypatch .setenv ("DYNAMO_ENABLE_TEST_LOGITS_PROCESSOR" , "1" )
357+
358+ base = trtllm_configs ["aggregated" ]
359+ config = TRTLLMConfig (
360+ name = "aggregated_qwen_chatonly" ,
361+ directory = base .directory ,
362+ script_name = base .script_name , # agg.sh
363+ marks = [], # not used by this direct test
364+ endpoints = ["v1/chat/completions" ],
365+ response_handlers = [chat_completions_response_handler ],
366+ model = "Qwen/Qwen3-0.6B" ,
367+ delayed_start = base .delayed_start ,
368+ timeout = base .timeout ,
369+ )
370+
371+ run_trtllm_test_case (config , request )
0 commit comments