@@ -182,7 +182,6 @@ def _apply_prompt_adapter(
182182 def _tokenize_prompt (
183183 self ,
184184 prompt : str ,
185- request_id : str ,
186185 lora_request : Optional [LoRARequest ],
187186 ) -> list [int ]:
188187 """
@@ -202,15 +201,13 @@ def _tokenize_prompt(
202201 "do_lower_case" , False )):
203202 prompt = prompt .lower ()
204203
205- return tokenizer .encode (request_id = request_id ,
206- prompt = prompt ,
204+ return tokenizer .encode (prompt = prompt ,
207205 lora_request = lora_request ,
208206 add_special_tokens = add_special_tokens )
209207
210208 async def _tokenize_prompt_async (
211209 self ,
212210 prompt : str ,
213- request_id : str ,
214211 lora_request : Optional [LoRARequest ],
215212 ) -> list [int ]:
216213 """Async version of :meth:`_tokenize_prompt`."""
@@ -222,7 +219,6 @@ async def _tokenize_prompt_async(
222219 # appending an EOS token to the prompt which disrupts generation.
223220 add_special_tokens = False
224221 return await tokenizer .encode_async (
225- request_id = request_id ,
226222 prompt = prompt ,
227223 lora_request = lora_request ,
228224 add_special_tokens = add_special_tokens )
@@ -309,7 +305,6 @@ async def _process_multimodal_async(
309305 def _prompt_to_llm_inputs (
310306 self ,
311307 prompt : SingletonPrompt ,
312- request_id : str ,
313308 lora_request : Optional [LoRARequest ] = None ,
314309 return_mm_hashes : bool = False ,
315310 ) -> SingletonInputs :
@@ -318,7 +313,6 @@ def _prompt_to_llm_inputs(
318313
319314 Arguments:
320315
321- * request_id
322316 * prompt: single encoder or decoder input prompt
323317 * lora_request: this is only valid for decoder prompts
324318 * return_mm_hashes: whether to return multimodal hashes
@@ -333,7 +327,6 @@ def _prompt_to_llm_inputs(
333327 prompt_text = parsed ["content" ]
334328 prompt_token_ids = self ._tokenize_prompt (
335329 prompt_text ,
336- request_id = request_id ,
337330 lora_request = lora_request ,
338331 )
339332
@@ -384,7 +377,6 @@ def _prompt_to_llm_inputs(
384377
385378 prompt_token_ids = self ._tokenize_prompt (
386379 prompt_text ,
387- request_id = request_id ,
388380 lora_request = lora_request ,
389381 )
390382
@@ -400,7 +392,6 @@ def _prompt_to_llm_inputs(
400392 async def _prompt_to_llm_inputs_async (
401393 self ,
402394 prompt : SingletonPrompt ,
403- request_id : str ,
404395 lora_request : Optional [LoRARequest ] = None ,
405396 return_mm_hashes : bool = False ,
406397 ) -> SingletonInputs :
@@ -411,7 +402,6 @@ async def _prompt_to_llm_inputs_async(
411402 prompt_text = parsed ["content" ]
412403 prompt_token_ids = await self ._tokenize_prompt_async (
413404 prompt_text ,
414- request_id = request_id ,
415405 lora_request = lora_request ,
416406 )
417407
@@ -460,7 +450,6 @@ async def _prompt_to_llm_inputs_async(
460450
461451 prompt_token_ids = await self ._tokenize_prompt_async (
462452 prompt_text ,
463- request_id = request_id ,
464453 lora_request = lora_request ,
465454 )
466455
@@ -560,7 +549,6 @@ def _separate_enc_dec_inputs_from_mm_processor_outputs(
560549 def _process_encoder_decoder_prompt (
561550 self ,
562551 prompt : PromptType ,
563- request_id : str ,
564552 ) -> EncoderDecoderInputs :
565553 """
566554 For encoder/decoder models only:
@@ -587,7 +575,6 @@ def _process_encoder_decoder_prompt(
587575 Arguments:
588576
589577 * prompt: an input prompt
590- * request_id
591578
592579 Returns:
593580
@@ -598,16 +585,11 @@ def _process_encoder_decoder_prompt(
598585
599586 if is_explicit_encoder_decoder_prompt (prompt ):
600587 encoder_inputs = self ._prompt_to_llm_inputs (
601- prompt ["encoder_prompt" ],
602- request_id = request_id ,
603- )
588+ prompt ["encoder_prompt" ])
604589 if (decoder_input := prompt ["decoder_prompt" ]) is None :
605590 decoder_inputs = None
606591 else :
607- decoder_inputs = self ._prompt_to_llm_inputs (
608- decoder_input ,
609- request_id = request_id ,
610- )
592+ decoder_inputs = self ._prompt_to_llm_inputs (decoder_input )
611593 # For multimodal model, override decoder prompt from processor
612594 # with explicit decoder prompt.
613595 if self .model_config .is_multimodal_model and (
@@ -616,10 +598,7 @@ def _process_encoder_decoder_prompt(
616598 self ._separate_enc_dec_inputs_from_mm_processor_outputs (
617599 encoder_inputs , decoder_inputs ))
618600 else :
619- inputs = self ._prompt_to_llm_inputs (
620- prompt ,
621- request_id = request_id ,
622- )
601+ inputs = self ._prompt_to_llm_inputs (prompt )
623602 if self .model_config .is_multimodal_model and (
624603 self ._can_process_multimodal ()):
625604 # Encoder-Decoder Multimodal model
@@ -636,26 +615,20 @@ def _process_encoder_decoder_prompt(
636615 async def _process_encoder_decoder_prompt_async (
637616 self ,
638617 prompt : PromptType ,
639- request_id : str ,
640618 ) -> EncoderDecoderInputs :
641619 """Async version of :meth:`_process_encoder_decoder_prompt`."""
642620 encoder_inputs : SingletonInputs
643621 decoder_inputs : Optional [SingletonInputs ]
644622
645623 if is_explicit_encoder_decoder_prompt (prompt ):
646624 encoder_task = self ._prompt_to_llm_inputs_async (
647- prompt ["encoder_prompt" ],
648- request_id = request_id ,
649- )
625+ prompt ["encoder_prompt" ])
650626
651627 if (decoder_input := prompt ["decoder_prompt" ]) is None :
652628 encoder_inputs = await encoder_task
653629 decoder_inputs = None
654630 else :
655- decoder_task = self ._prompt_to_llm_inputs_async (
656- decoder_input ,
657- request_id = request_id ,
658- )
631+ decoder_task = self ._prompt_to_llm_inputs_async (decoder_input )
659632
660633 encoder_inputs , decoder_inputs = await asyncio .gather (
661634 encoder_task , decoder_task )
@@ -668,10 +641,7 @@ async def _process_encoder_decoder_prompt_async(
668641 self ._separate_enc_dec_inputs_from_mm_processor_outputs (
669642 encoder_inputs , decoder_inputs ))
670643 else :
671- inputs = await self ._prompt_to_llm_inputs_async (
672- prompt ,
673- request_id = request_id ,
674- )
644+ inputs = await self ._prompt_to_llm_inputs_async (prompt )
675645 if self .model_config .is_multimodal_model and (
676646 self ._can_process_multimodal ()):
677647 # Encoder-Decoder Multimodal model
@@ -704,7 +674,6 @@ def _build_decoder_only_llm_inputs(
704674 def _process_decoder_only_prompt (
705675 self ,
706676 prompt : SingletonPrompt ,
707- request_id : str ,
708677 lora_request : Optional [LoRARequest ] = None ,
709678 prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
710679 return_mm_hashes : bool = False ,
@@ -716,7 +685,6 @@ def _process_decoder_only_prompt(
716685 Arguments:
717686
718687 * prompt: input prompt
719- * request_id
720688 * lora_request
721689 * prompt_adapter_request
722690 * return_mm_hashes
@@ -728,7 +696,6 @@ def _process_decoder_only_prompt(
728696
729697 prompt_comps = self ._prompt_to_llm_inputs (
730698 prompt ,
731- request_id = request_id ,
732699 lora_request = lora_request ,
733700 return_mm_hashes = return_mm_hashes ,
734701 )
@@ -741,15 +708,13 @@ def _process_decoder_only_prompt(
741708 async def _process_decoder_only_prompt_async (
742709 self ,
743710 prompt : SingletonPrompt ,
744- request_id : str ,
745711 lora_request : Optional [LoRARequest ] = None ,
746712 prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
747713 return_mm_hashes : bool = False ,
748714 ) -> DecoderOnlyInputs :
749715 """Async version of :meth:`_process_decoder_only_prompt`."""
750716 prompt_comps = await self ._prompt_to_llm_inputs_async (
751717 prompt ,
752- request_id = request_id ,
753718 lora_request = lora_request ,
754719 return_mm_hashes = return_mm_hashes ,
755720 )
@@ -762,7 +727,6 @@ async def _process_decoder_only_prompt_async(
762727 def preprocess (
763728 self ,
764729 prompt : PromptType ,
765- request_id : str ,
766730 lora_request : Optional [LoRARequest ] = None ,
767731 prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
768732 return_mm_hashes : bool = False ,
@@ -774,10 +738,7 @@ def preprocess(
774738 "returned until they are supported on vLLM V1." )
775739 # Encoder-decoder model requires special mapping of
776740 # input prompts to encoder & decoder
777- return self ._process_encoder_decoder_prompt (
778- prompt ,
779- request_id = request_id ,
780- )
741+ return self ._process_encoder_decoder_prompt (prompt )
781742
782743 if is_explicit_encoder_decoder_prompt (prompt ):
783744 raise ValueError ("Cannot pass encoder-decoder prompt "
@@ -786,7 +747,6 @@ def preprocess(
786747 # Decoder-only operation
787748 return self ._process_decoder_only_prompt (
788749 prompt ,
789- request_id = request_id ,
790750 lora_request = lora_request ,
791751 prompt_adapter_request = prompt_adapter_request ,
792752 return_mm_hashes = return_mm_hashes ,
@@ -795,7 +755,6 @@ def preprocess(
795755 async def preprocess_async (
796756 self ,
797757 prompt : PromptType ,
798- request_id : str ,
799758 lora_request : Optional [LoRARequest ] = None ,
800759 prompt_adapter_request : Optional [PromptAdapterRequest ] = None ,
801760 return_mm_hashes : bool = False ,
@@ -807,10 +766,7 @@ async def preprocess_async(
807766 "returned until they are supported on vLLM V1." )
808767 # Encoder-decoder model requires special mapping of
809768 # input prompts to encoder & decoder
810- return await self ._process_encoder_decoder_prompt_async (
811- prompt ,
812- request_id = request_id ,
813- )
769+ return await self ._process_encoder_decoder_prompt_async (prompt )
814770
815771 if is_explicit_encoder_decoder_prompt (prompt ):
816772 raise ValueError ("Cannot pass encoder-decoder prompt "
@@ -819,7 +775,6 @@ async def preprocess_async(
819775 # Decoder-only operation
820776 return await self ._process_decoder_only_prompt_async (
821777 prompt ,
822- request_id = request_id ,
823778 lora_request = lora_request ,
824779 prompt_adapter_request = prompt_adapter_request ,
825780 return_mm_hashes = return_mm_hashes ,
0 commit comments