@@ -229,40 +229,35 @@ def _create_engine_cores(
229229
230230 return engine_cores
231231
232- def _add_request (self , request : EngineCoreRequest ) -> Request :
233- if request .mm_hashes is not None :
234- # Here, if hash exists for a multimodal input, then it will be
235- # fetched from the cache, else it will be added to the cache.
236- # Note that the cache here is mirrored with the client cache, so
237- # anything that has a hash must have a HIT cache entry here
238- # as well.
239- assert request .mm_inputs is not None
240- request .mm_inputs = self ._prefill_engines [
241- 0 ].mm_input_cache_server .get_and_update_p1 (
242- request .mm_inputs , request .mm_hashes )
243-
244- req = Request .from_engine_core_request (request )
245-
246- if req .use_structured_output :
247- # Start grammar compilation asynchronously
248- self ._prefill_engines [0 ].structured_output_manager .grammar_init (
249- req )
250-
251- return req
252-
253- def add_request (self , request : EngineCoreRequest ):
254- vllm_request = self ._add_request (request )
232+
233+ def add_request (self , request : EngineCoreRequest , request_wave : int = 0 ):
234+ # vllm_request = self._add_request(request)
255235
256236 # TODO(fhzhang): support multiple prefill engines.
257- self ._prefill_engines [0 ].scheduler .add_request (vllm_request )
258- self ._requests [request .request_id ] = vllm_request
237+ if not isinstance (request .request_id , str ):
238+ raise TypeError (
239+ f"request_id must be a string, got { type (request .request_id )} " )
240+
241+ if pooling_params := request .pooling_params :
242+ supported_pooling_tasks = [
243+ task for task in self .get_supported_tasks ()
244+ if task in POOLING_TASKS
245+ ]
246+
247+ if pooling_params .task not in supported_pooling_tasks :
248+ raise ValueError (f"Unsupported task: { pooling_params .task !r} "
249+ f"Supported tasks: { supported_pooling_tasks } " )
250+
251+ self ._prefill_engines [0 ].scheduler .add_request (request )
252+ self ._requests [request .request_id ] = request
259253
260254 def _handle_client_request (self , request_type : EngineCoreRequestType ,
261255 request : Any ) -> None :
262256 """Dispatch request from client."""
263257
264258 if request_type == EngineCoreRequestType .ADD :
265- self .add_request (request )
259+ req , request_wave = request
260+ self .add_request (req )
266261 elif request_type == EngineCoreRequestType .ABORT :
267262 # TODO(fhzhang): we need to keep track of which engine is processing
268263 # the request and finish it there.
0 commit comments