File tree Expand file tree Collapse file tree 3 files changed +30
-1
lines changed Expand file tree Collapse file tree 3 files changed +30
-1
lines changed Original file line number Diff line number Diff line change @@ -6,7 +6,7 @@ numpy < 2.0.0
66requests
77tqdm
88py-cpuinfo
9- transformers >= 4.42.0  # Required for Gemma 2.
9+ transformers >= 4.42.0  # Required for Gemma 2 and for additional chat template parameters .
1010tokenizers >= 0.19.1  # Required for Llama 3.
1111fastapi
1212aiohttp
Original file line number Diff line number Diff line change @@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
190190            "special tokens so this should be set to False (as is the " 
191191            "default)." ),
192192    )
193+     documents : Optional [List [Dict [str , str ]]] =  Field (
194+         default = None ,
195+         description = 
196+         ("A list of dicts representing documents that will be accessible to " 
197+          "the model if it is performing RAG (retrieval-augmented generation)." 
198+          " If the template does not support RAG, this argument will have no " 
199+          "effect. We recommend that each document should be a dict containing " 
200+          "\" title\"  and \" text\"  keys." ),
201+     )
202+     chat_template : Optional [str ] =  Field (
203+         default = None ,
204+         description = (
205+             "A Jinja template to use for this conversion. " 
206+             "If this is not passed, the model's default chat template will be " 
207+             "used instead." ),
208+     )
209+     chat_template_kwargs : Optional [Dict [str , Any ]] =  Field (
210+         default = None ,
211+         description = ("Additional kwargs to pass to the template renderer. " 
212+                      "Will be accessible by the chat template." ),
213+     )
193214    include_stop_str_in_output : Optional [bool ] =  Field (
194215        default = False ,
195216        description = (
Original file line number Diff line number Diff line change @@ -218,10 +218,18 @@ async def create_chat_completion(
218218                conversation .extend (chat_parsed_result .messages )
219219                image_futures .extend (chat_parsed_result .image_futures )
220220
221+             tool_dicts  =  None  if  request .tools  is  None  else  [
222+                 tool .model_dump () for  tool  in  request .tools 
223+             ]
224+ 
221225            prompt  =  self .tokenizer .apply_chat_template (
222226                conversation = conversation ,
223227                tokenize = False ,
224228                add_generation_prompt = request .add_generation_prompt ,
229+                 tools = tool_dicts ,
230+                 documents = request .documents ,
231+                 chat_template = request .chat_template ,
232+                 ** (request .chat_template_kwargs  or  {}),
225233            )
226234        except  Exception  as  e :
227235            logger .error ("Error in applying chat template from request: %s" , e )
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments