66import  time 
77import  weakref 
88from  functools  import  partial 
9- from  typing  import  (Any , AsyncGenerator , Callable , Coroutine ,  Dict , Iterable ,
10-                     List ,  Mapping , Optional , Set , Tuple , Type , Union ,  overload )
9+ from  typing  import  (Any , AsyncGenerator , Callable , Dict , Iterable ,  List ,
10+                     Mapping , Optional , Set , Tuple , Type , Union )
1111from  weakref  import  ReferenceType 
1212
13- from  typing_extensions  import  deprecated 
14- 
1513import  vllm .envs  as  envs 
1614from  vllm .config  import  (DecodingConfig , LoRAConfig , ModelConfig ,
1715                         ParallelConfig , SchedulerConfig , VllmConfig )
3634from  vllm .sequence  import  ExecuteModelRequest 
3735from  vllm .transformers_utils .tokenizer  import  AnyTokenizer 
3836from  vllm .usage .usage_lib  import  UsageContext 
39- from  vllm .utils  import  Device , deprecate_kwargs ,  weak_bind 
37+ from  vllm .utils  import  Device , weak_bind 
4038
4139logger  =  init_logger (__name__ )
4240ENGINE_ITERATION_TIMEOUT_S  =  envs .VLLM_ENGINE_ITERATION_TIMEOUT_S 
@@ -429,24 +427,6 @@ async def get_tokenizer_async(self,
429427        return  await  (
430428            self .get_tokenizer_group ().get_lora_tokenizer_async (lora_request ))
431429
432-     @overload  
433-     @deprecated ("'inputs' will be renamed to 'prompt" ) 
434-     async  def  add_request_async (
435-         self ,
436-         request_id : str ,
437-         * ,
438-         inputs : PromptType ,
439-         params : Union [SamplingParams , PoolingParams ],
440-         arrival_time : Optional [float ] =  None ,
441-         lora_request : Optional [LoRARequest ] =  None ,
442-         trace_headers : Optional [Mapping [str , str ]] =  None ,
443-         prompt_adapter_request : Optional [PromptAdapterRequest ] =  None ,
444-         priority : int  =  0 ,
445-         data_parallel_rank : Optional [int ] =  None ,
446-     ) ->  None :
447-         ...
448- 
449-     @overload  
450430    async  def  add_request_async (
451431        self ,
452432        request_id : str ,
@@ -459,32 +439,10 @@ async def add_request_async(
459439        priority : int  =  0 ,
460440        data_parallel_rank : Optional [int ] =  None ,
461441    ) ->  None :
462-         ...
463- 
464-     @deprecate_kwargs ( 
465-         "inputs" , 
466-         additional_message = "Please use the 'prompt' parameter instead." , 
467-     ) 
468-     async  def  add_request_async (
469-             self ,
470-             request_id : str ,
471-             prompt : Optional [PromptType ] =  None ,
472-             params : Optional [Union [SamplingParams , PoolingParams ]] =  None ,
473-             arrival_time : Optional [float ] =  None ,
474-             lora_request : Optional [LoRARequest ] =  None ,
475-             trace_headers : Optional [Mapping [str , str ]] =  None ,
476-             prompt_adapter_request : Optional [PromptAdapterRequest ] =  None ,
477-             priority : int  =  0 ,
478-             data_parallel_rank : Optional [int ] =  None ,
479-             * ,
480-             inputs : Optional [PromptType ] =  None ,  # DEPRECATED 
481-     ) ->  None :
482-         """Async version of 
483-         [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request].""" 
484-         if  inputs  is  not None :
485-             prompt  =  inputs 
486-         assert  prompt  is  not None  and  params  is  not None 
487- 
442+         """ 
443+         Async version of 
444+         [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]. 
445+         """ 
488446        if  lora_request  is  not None  and  not  self .lora_config :
489447            raise  ValueError (f"Got lora_request { lora_request }  
490448                             "not enabled!" )
@@ -521,8 +479,7 @@ async def add_request_async(
521479            params  =  await  build_guided_decoding_logits_processor_async (
522480                sampling_params = params ,
523481                tokenizer = await  self .get_tokenizer_async (lora_request ),
524-                 default_guided_backend = self .decoding_config .
525-                 guided_decoding_backend ,
482+                 default_guided_backend = self .decoding_config .backend ,
526483                reasoning_backend = self .decoding_config .reasoning_backend ,
527484                model_config = self .model_config )
528485
@@ -894,28 +851,7 @@ async def run_engine_loop(engine_ref: ReferenceType):
894851                raise 
895852            await  asyncio .sleep (0 )
896853
897-     # This method does not need to be async, but kept that way 
898-     # for backwards compatibility. 
899-     @overload  
900-     @deprecated ("'inputs' will be renamed to 'prompt" ) 
901-     def  add_request (
902-         self ,
903-         request_id : str ,
904-         * ,
905-         inputs : PromptType ,
906-         params : Union [SamplingParams , PoolingParams ],
907-         arrival_time : Optional [float ] =  None ,
908-         lora_request : Optional [LoRARequest ] =  None ,
909-         trace_headers : Optional [Mapping [str , str ]] =  None ,
910-         prompt_adapter_request : Optional [PromptAdapterRequest ] =  None ,
911-         priority : int  =  0 ,
912-         data_parallel_rank : Optional [int ] =  None ,
913-     ) ->  Coroutine [None , None , AsyncGenerator [Union [
914-             RequestOutput , PoolingRequestOutput ], None ]]:
915-         ...
916- 
917-     @overload  
918-     def  add_request (
854+     async  def  add_request (
919855        self ,
920856        request_id : str ,
921857        prompt : PromptType ,
@@ -926,32 +862,7 @@ def add_request(
926862        prompt_adapter_request : Optional [PromptAdapterRequest ] =  None ,
927863        priority : int  =  0 ,
928864        data_parallel_rank : Optional [int ] =  None ,
929-     ) ->  Coroutine [None , None , AsyncGenerator [Union [
930-             RequestOutput , PoolingRequestOutput ], None ]]:
931-         ...
932- 
933-     @deprecate_kwargs ( 
934-         "inputs" , 
935-         additional_message = "Please use the 'prompt' parameter instead." , 
936-     ) 
937-     async  def  add_request (
938-         self ,
939-         request_id : str ,
940-         prompt : Optional [PromptType ] =  None ,
941-         params : Optional [Union [SamplingParams , PoolingParams ]] =  None ,
942-         arrival_time : Optional [float ] =  None ,
943-         lora_request : Optional [LoRARequest ] =  None ,
944-         trace_headers : Optional [Mapping [str , str ]] =  None ,
945-         prompt_adapter_request : Optional [PromptAdapterRequest ] =  None ,
946-         priority : int  =  0 ,
947-         data_parallel_rank : Optional [int ] =  None ,
948-         * ,
949-         inputs : Optional [PromptType ] =  None ,  # DEPRECATED 
950865    ) ->  AsyncGenerator [Union [RequestOutput , PoolingRequestOutput ], None ]:
951-         if  inputs  is  not None :
952-             prompt  =  inputs 
953-         assert  prompt  is  not None  and  params  is  not None 
954- 
955866        if  not  self .is_running :
956867            if  self .start_engine_loop :
957868                self .start_background_loop ()
0 commit comments