Skip to content

Commit ff6600e

Browse files
committed
chore: add e2e test
Signed-off-by: Bhuvan Agrawal <11240550+bhuvan002@users.noreply.github.com>
1 parent fc465c6 commit ff6600e

File tree

2 files changed

+66
-26
lines changed

2 files changed

+66
-26
lines changed

tests/serve/common.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
"""Common base classes and utilities for engine tests (vLLM, TRT-LLM, etc.)"""
55

6+
import os
67
from dataclasses import dataclass
78
from typing import Any, Callable, List
89

@@ -32,6 +33,11 @@ def create_payload_for_config(config: EngineConfig) -> Payload:
3233
3334
This provides the default implementation for text-only models.
3435
"""
36+
expected_response = (
37+
["Hello world"]
38+
if os.getenv("DYNAMO_ENABLE_TEST_LOGITS_PROCESSOR") == "1"
39+
else ["AI"]
40+
)
3541
return Payload(
3642
payload_chat={
3743
"model": config.model,
@@ -54,5 +60,5 @@ def create_payload_for_config(config: EngineConfig) -> Payload:
5460
},
5561
repeat_count=3,
5662
expected_log=[],
57-
expected_response=["AI"],
63+
expected_response=expected_response,
5864
)

tests/serve/test_trtllm.py

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,33 @@ def __init__(self, config: TRTLLMConfig, request):
5959
)
6060

6161

62+
def run_trtllm_test_case(config: TRTLLMConfig, request) -> None:
63+
payload = create_payload_for_config(config)
64+
65+
with TRTLLMProcess(config, request) as server_process:
66+
assert len(config.endpoints) == len(config.response_handlers)
67+
for endpoint, response_handler in zip(
68+
config.endpoints, config.response_handlers
69+
):
70+
url = f"http://localhost:{server_process.port}/{endpoint}"
71+
start_time = time.time()
72+
elapsed = 0.0
73+
74+
request_body = (
75+
payload.payload_chat
76+
if endpoint == "v1/chat/completions"
77+
else payload.payload_completions
78+
)
79+
80+
for _ in range(payload.repeat_count):
81+
elapsed = time.time() - start_time
82+
83+
response = server_process.send_request(
84+
url, payload=request_body, timeout=config.timeout - elapsed
85+
)
86+
server_process.check_response(payload, response, response_handler)
87+
88+
6289
# trtllm test configurations
6390
trtllm_configs = {
6491
"aggregated": TRTLLMConfig(
@@ -147,33 +174,9 @@ def test_deployment(trtllm_config_test, request, runtime_services):
147174
logger.info("Starting test_deployment")
148175

149176
config = trtllm_config_test
150-
payload = create_payload_for_config(config)
151-
152177
logger.info(f"Using model: {config.model}")
153178
logger.info(f"Script: {config.script_name}")
154-
155-
with TRTLLMProcess(config, request) as server_process:
156-
assert len(config.endpoints) == len(config.response_handlers)
157-
for endpoint, response_handler in zip(
158-
config.endpoints, config.response_handlers
159-
):
160-
url = f"http://localhost:{server_process.port}/{endpoint}"
161-
start_time = time.time()
162-
elapsed = 0.0
163-
164-
request_body = (
165-
payload.payload_chat
166-
if endpoint == "v1/chat/completions"
167-
else payload.payload_completions
168-
)
169-
170-
for _ in range(payload.repeat_count):
171-
elapsed = time.time() - start_time
172-
173-
response = server_process.send_request(
174-
url, payload=request_body, timeout=config.timeout - elapsed
175-
)
176-
server_process.check_response(payload, response, response_handler)
179+
run_trtllm_test_case(config, request)
177180

178181

179182
@pytest.mark.e2e
@@ -335,3 +338,34 @@ def log_output():
335338
except subprocess.TimeoutExpired:
336339
process.kill()
337340
process.wait()
341+
342+
343+
@pytest.mark.e2e
344+
@pytest.mark.gpu_1
345+
@pytest.mark.trtllm_marker
346+
@pytest.mark.slow
347+
def test_chat_only_aggregated_with_test_logits_processor(
348+
request, runtime_services, monkeypatch
349+
):
350+
"""
351+
Run a single aggregated chat-completions test using Qwen 0.6B with the
352+
test logits processor enabled, and expect "Hello world" in the response.
353+
"""
354+
355+
# Enable HelloWorld logits processor only for this test
356+
monkeypatch.setenv("DYNAMO_ENABLE_TEST_LOGITS_PROCESSOR", "1")
357+
358+
base = trtllm_configs["aggregated"]
359+
config = TRTLLMConfig(
360+
name="aggregated_qwen_chatonly",
361+
directory=base.directory,
362+
script_name=base.script_name, # agg.sh
363+
marks=[], # not used by this direct test
364+
endpoints=["v1/chat/completions"],
365+
response_handlers=[chat_completions_response_handler],
366+
model="Qwen/Qwen3-0.6B",
367+
delayed_start=base.delayed_start,
368+
timeout=base.timeout,
369+
)
370+
371+
run_trtllm_test_case(config, request)

0 commit comments

Comments
 (0)