@@ -69,7 +69,7 @@ def __init__(self, config: TRTLLMConfig, request):
6969 chat_completions_response_handler ,
7070 completions_response_handler ,
7171 ],
72- model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B " ,
72+ model = "Qwen/Qwen3-0.6B " ,
7373 ),
7474 "disaggregated" : TRTLLMConfig (
7575 name = "disaggregated" ,
@@ -81,7 +81,7 @@ def __init__(self, config: TRTLLMConfig, request):
8181 chat_completions_response_handler ,
8282 completions_response_handler ,
8383 ],
84- model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B " ,
84+ model = "Qwen/Qwen3-0.6B " ,
8585 ),
8686 # TODO: These are sanity tests that the kv router examples launch
8787 # and inference without error, but do not do detailed checks on the
@@ -96,7 +96,7 @@ def __init__(self, config: TRTLLMConfig, request):
9696 chat_completions_response_handler ,
9797 completions_response_handler ,
9898 ],
99- model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B " ,
99+ model = "Qwen/Qwen3-0.6B " ,
100100 ),
101101 "disaggregated_router" : TRTLLMConfig (
102102 name = "disaggregated_router" ,
@@ -108,7 +108,7 @@ def __init__(self, config: TRTLLMConfig, request):
108108 chat_completions_response_handler ,
109109 completions_response_handler ,
110110 ],
111- model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B " ,
111+ model = "Qwen/Qwen3-0.6B " ,
112112 ),
113113}
114114
0 commit comments