@@ -19,6 +19,7 @@ vLLM currently supports the following reasoning models:
1919
2020:::{note}
2121IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass ` thinking=True ` in your ` chat_template_kwargs ` .
22+ The reasoning feature for the Qwen3 series is enabled by default. To disable it, you must pass ` enable_thinking=False ` in your ` chat_template_kwargs ` .
2223:::
2324
2425## Quickstart
@@ -49,6 +50,8 @@ model = models.data[0].id
4950# Round 1
5051messages = [{" role" : " user" , " content" : " 9.11 and 9.8, which is greater?" }]
5152# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
53+ # For Qwen3 series, if you want to disable thinking in reasoning mode, add:
54+ # extra_body={"chat_template_kwargs": {"enable_thinking": False}}
5255response = client.chat.completions.create(model = model, messages = messages)
5356
5457reasoning_content = response.choices[0 ].message.reasoning_content
@@ -104,6 +107,8 @@ model = models.data[0].id
104107
105108messages = [{" role" : " user" , " content" : " 9.11 and 9.8, which is greater?" }]
106109# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
110+ # For Qwen3 series, if you want to disable thinking in reasoning mode, add:
111+ # extra_body={"chat_template_kwargs": {"enable_thinking": False}}
107112stream = client.chat.completions.create(model = model,
108113 messages = messages,
109114 stream = True )
0 commit comments