fix trtllm model

alec-flowers · alec-flowers · commit 09a94c05d1ed · 2025-08-28T11:51:12.000-07:00
Signed-off-by: alec-flowers &lt;aflowers@nvidia.com&gt;
diff --git a/tests/serve/test_trtllm.py b/tests/serve/test_trtllm.py
@@ -69,7 +69,7 @@ def __init__(self, config: TRTLLMConfig, request):
             chat_completions_response_handler,
             completions_response_handler,
         ],
-        model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        model="Qwen/Qwen3-0.6B",
     ),
     "disaggregated": TRTLLMConfig(
         name="disaggregated",
@@ -81,7 +81,7 @@ def __init__(self, config: TRTLLMConfig, request):
             chat_completions_response_handler,
             completions_response_handler,
         ],
-        model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        model="Qwen/Qwen3-0.6B",
     ),
     # TODO: These are sanity tests that the kv router examples launch
     # and inference without error, but do not do detailed checks on the
@@ -96,7 +96,7 @@ def __init__(self, config: TRTLLMConfig, request):
             chat_completions_response_handler,
             completions_response_handler,
         ],
-        model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        model="Qwen/Qwen3-0.6B",
     ),
     "disaggregated_router": TRTLLMConfig(
         name="disaggregated_router",
@@ -108,7 +108,7 @@ def __init__(self, config: TRTLLMConfig, request):
             chat_completions_response_handler,
             completions_response_handler,
         ],
-        model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        model="Qwen/Qwen3-0.6B",
     ),
 }