- 
          
 - 
                Notifications
    
You must be signed in to change notification settings  - Fork 11k
 
Description
Your current environment
image: vllm/vllm-openai:v0.6.4.post1
command: "--model /model --served-model-name mistral-large-123b --tensor-parallel-size 4 --port 8081 --tokenizer_mode mistral --load_format safetensors --config_format mistral --chat-template templates/extended_tool_chat_template_mistral_parallel.jinja --enable-auto-tool-choice --tool-call-parser mistral"
Model Input Dumps
Traceback (most recent call last):
File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 401, in run_asgi
result = await app(  # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in call
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 113, in call
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 187, in call
raise exc
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 165, in call
await self.app(scope, receive, _send)
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 185, in call
with collapse_excgroups():
^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 158, in exit
self.gen.throw(value)
File "/usr/local/lib/python3.12/dist-packages/starlette/_utils.py", line 82, in collapse_excgroups
raise exc
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 187, in call
response = await self.dispatch_func(request, call_next)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 490, in add_request_id
response = await call_next(request)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 163, in call_next
raise app_exc
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 149, in coro
await self.app(scope, receive_or_disconnect, send_no_error)
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 185, in call
with collapse_excgroups():
^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 158, in exit
self.gen.throw(value)
File "/usr/local/lib/python3.12/dist-packages/starlette/_utils.py", line 82, in collapse_excgroups
raise exc
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 187, in call
response = await self.dispatch_func(request, call_next)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 485, in authentication
return await call_next(request)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 163, in call_next
raise app_exc
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/base.py", line 149, in coro
await self.app(scope, receive_or_disconnect, send_no_error)
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/cors.py", line 85, in call
await self.app(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/exceptions.py", line 62, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 715, in call
await self.middleware_stack(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 73, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 347, in create_chat_completion
generator = await handler.create_chat_completion(request, raw_request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/serving_chat.py", line 155, in create_chat_completion
) = await self._preprocess_chat(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/serving_engine.py", line 458, in _preprocess_chat
request_prompt = apply_mistral_chat_template(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/chat_utils.py", line 767, in apply_mistral_chat_template
return tokenizer.apply_chat_template(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/transformers_utils/tokenizers/mistral.py", line 265, in apply_chat_template
encoded = self.mistral.encode_chat_completion(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/mistral_common/tokens/tokenizers/mistral.py", line 194, in encode_chat_completion
validated_request = self._chat_completion_request_validator.validate_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/mistral_common/protocol/instruct/validator.py", line 63, in validate_request
self.validate_messages(request.messages)
File "/usr/local/lib/python3.12/dist-packages/mistral_common/protocol/instruct/validator.py", line 51, in validate_messages
self._validate_message_list_content(messages)
File "/usr/local/lib/python3.12/dist-packages/mistral_common/protocol/instruct/validator.py", line 273, in _validate_message_list_content
self._validate_assistant_message(message, is_last_message=idx == len(messages) - 1)
File "/usr/local/lib/python3.12/dist-packages/mistral_common/protocol/instruct/validator.py", line 155, in _validate_assistant_message
self._validate_tool_call(tool_call, is_last_message=is_last_message)
File "/usr/local/lib/python3.12/dist-packages/mistral_common/protocol/instruct/validator.py", line 310, in _validate_tool_call
raise InvalidFunctionCallException(
mistral_common.exceptions.InvalidFunctionCallException: Tool call id was chatcmpl-tool-280cd414a0a24c6baa54a8b87dce97ff but must be a-z, A-Z, 0-9, with a length of 9.
🐛 Describe the bug
- The tool_chat_template_mistral_parallel.jinja correctly corrects the tool_call_id to 9 characters when it is called synchronously (tested with langgraph.invoke()).
 - But not if it is called in the async stream (langgraph.astream(stream_mode="messages")).
 - Also works with langgraph.astream(stream_mode="values"), which does call the API with stream mode set to False
 
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.