@@ -132,9 +132,6 @@ def __init__(
132132 "\" auto\" tool choice has been enabled please note that while"
133133 " the parallel_tool_calls client option is preset for "
134134 "compatibility reasons, it will be ignored." )
135- if not self .use_harmony :
136- raise NotImplementedError ("Auto tool choice is not supported "
137- "yet unless using Harmony" )
138135
139136 # HACK(woosuk): This is a hack. We should use a better store.
140137 # FIXME: If enable_store=True, this may cause a memory leak since we
@@ -212,8 +209,8 @@ async def create_responses(
212209 await self ._make_request (request , prev_response ,
213210 tokenizer ))
214211
215- except (ValueError , TypeError , RuntimeError ,
216- jinja2 . TemplateError ) as e :
212+ except (ValueError , TypeError , RuntimeError , jinja2 . TemplateError ,
213+ NotImplementedError ) as e :
217214 logger .exception ("Error in preprocessing prompt inputs" )
218215 return self .create_error_response (f"{ e } { e .__cause__ } " )
219216
@@ -313,6 +310,9 @@ async def _make_request(
313310 prev_response : Optional [ResponsesResponse ],
314311 tokenizer : AnyTokenizer ,
315312 ):
313+ if len (request .tools ) > 0 :
314+ raise NotImplementedError (
315+ "Tool use is not supported in Responses API without Harmony" )
316316 # Construct the input messages.
317317 messages = self ._construct_input_messages (request , prev_response )
318318 _ , request_prompts , engine_prompts = await self ._preprocess_chat (
0 commit comments