File tree Expand file tree Collapse file tree 3 files changed +30
-1
lines changed Expand file tree Collapse file tree 3 files changed +30
-1
lines changed Original file line number Diff line number Diff line change @@ -6,7 +6,7 @@ numpy < 2.0.0
66requests
77tqdm
88py-cpuinfo
9- transformers >= 4.42.0 # Required for Gemma 2.
9+ transformers >= 4.42.0 # Required for Gemma 2 and for additional chat template parameters .
1010tokenizers >= 0.19.1 # Required for Llama 3.
1111fastapi
1212aiohttp
Original file line number Diff line number Diff line change @@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
190190 "special tokens so this should be set to False (as is the "
191191 "default)." ),
192192 )
193+ documents : Optional [List [Dict [str , str ]]] = Field (
194+ default = None ,
195+ description =
196+ ("A list of dicts representing documents that will be accessible to "
197+ "the model if it is performing RAG (retrieval-augmented generation)."
198+ " If the template does not support RAG, this argument will have no "
199+ "effect. We recommend that each document should be a dict containing "
200+ "\" title\" and \" text\" keys." ),
201+ )
202+ chat_template : Optional [str ] = Field (
203+ default = None ,
204+ description = (
205+ "A Jinja template to use for this conversion. "
206+ "If this is not passed, the model's default chat template will be "
207+ "used instead." ),
208+ )
209+ chat_template_kwargs : Optional [Dict [str , Any ]] = Field (
210+ default = None ,
211+ description = ("Additional kwargs to pass to the template renderer. "
212+ "Will be accessible by the chat template." ),
213+ )
193214 include_stop_str_in_output : Optional [bool ] = Field (
194215 default = False ,
195216 description = (
Original file line number Diff line number Diff line change @@ -218,10 +218,18 @@ async def create_chat_completion(
218218 conversation .extend (chat_parsed_result .messages )
219219 image_futures .extend (chat_parsed_result .image_futures )
220220
221+ tool_dicts = None if request .tools is None else [
222+ tool .model_dump () for tool in request .tools
223+ ]
224+
221225 prompt = self .tokenizer .apply_chat_template (
222226 conversation = conversation ,
223227 tokenize = False ,
224228 add_generation_prompt = request .add_generation_prompt ,
229+ tools = tool_dicts ,
230+ documents = request .documents ,
231+ chat_template = request .chat_template ,
232+ ** (request .chat_template_kwargs or {}),
225233 )
226234 except Exception as e :
227235 logger .error ("Error in applying chat template from request: %s" , e )
You can’t perform that action at this time.
0 commit comments