@@ -254,7 +254,6 @@ def _process_multimodal(
254254 mm_processor_kwargs : Optional [Mapping [str , object ]],
255255 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
256256 lora_request : Optional [LoRARequest ] = None ,
257- return_mm_hashes : bool = False ,
258257 ) -> MultiModalInputs :
259258 """
260259 Apply the model's multi-modal processor to a multi-modal prompt,
@@ -271,8 +270,7 @@ def _process_multimodal(
271270 return mm_processor .apply (prompt ,
272271 mm_data ,
273272 hf_processor_mm_kwargs = mm_processor_kwargs ,
274- tokenization_kwargs = tokenization_kwargs ,
275- return_mm_hashes = return_mm_hashes )
273+ tokenization_kwargs = tokenization_kwargs )
276274
277275 async def _process_multimodal_async (
278276 self ,
@@ -281,7 +279,6 @@ async def _process_multimodal_async(
281279 mm_processor_kwargs : Optional [Mapping [str , object ]],
282280 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
283281 lora_request : Optional [LoRARequest ] = None ,
284- return_mm_hashes : bool = False ,
285282 ) -> MultiModalInputs :
286283 """
287284 Async version of
@@ -297,8 +294,7 @@ async def _process_multimodal_async(
297294 return mm_processor .apply (prompt ,
298295 mm_data ,
299296 hf_processor_mm_kwargs = mm_processor_kwargs ,
300- tokenization_kwargs = tokenization_kwargs ,
301- return_mm_hashes = return_mm_hashes )
297+ tokenization_kwargs = tokenization_kwargs )
302298
303299 def _process_embeds (
304300 self ,
@@ -335,7 +331,6 @@ def _process_tokens(
335331 parsed_content : TokensPrompt ,
336332 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
337333 lora_request : Optional [LoRARequest ] = None ,
338- return_mm_hashes : bool = False ,
339334 ) -> Union [TokenInputs , MultiModalInputs ]:
340335 prompt_token_ids = parsed_content ["prompt_token_ids" ]
341336 token_type_ids = parsed_content .get ("token_type_ids" )
@@ -348,7 +343,6 @@ def _process_tokens(
348343 parsed_content .get ("mm_processor_kwargs" ),
349344 tokenization_kwargs = tokenization_kwargs ,
350345 lora_request = lora_request ,
351- return_mm_hashes = return_mm_hashes ,
352346 )
353347 else :
354348 inputs = token_inputs (
@@ -366,7 +360,6 @@ async def _process_tokens_async(
366360 parsed_content : TokensPrompt ,
367361 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
368362 lora_request : Optional [LoRARequest ] = None ,
369- return_mm_hashes : bool = False ,
370363 ) -> Union [TokenInputs , MultiModalInputs ]:
371364 prompt_token_ids = parsed_content ["prompt_token_ids" ]
372365 token_type_ids = parsed_content .get ("token_type_ids" )
@@ -379,7 +372,6 @@ async def _process_tokens_async(
379372 parsed_content .get ("mm_processor_kwargs" ),
380373 tokenization_kwargs = tokenization_kwargs ,
381374 lora_request = lora_request ,
382- return_mm_hashes = return_mm_hashes ,
383375 )
384376 else :
385377 inputs = token_inputs (
@@ -397,7 +389,6 @@ def _process_text(
397389 parsed_content : TextPrompt ,
398390 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
399391 lora_request : Optional [LoRARequest ] = None ,
400- return_mm_hashes : bool = False ,
401392 ) -> Union [TokenInputs , MultiModalInputs ]:
402393 prompt_text = parsed_content ["prompt" ]
403394
@@ -409,7 +400,6 @@ def _process_text(
409400 parsed_content .get ("mm_processor_kwargs" ),
410401 tokenization_kwargs = tokenization_kwargs ,
411402 lora_request = lora_request ,
412- return_mm_hashes = return_mm_hashes ,
413403 )
414404 else :
415405 prompt_token_ids = self ._tokenize_prompt (
@@ -432,7 +422,6 @@ async def _process_text_async(
432422 parsed_content : TextPrompt ,
433423 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
434424 lora_request : Optional [LoRARequest ] = None ,
435- return_mm_hashes : bool = False ,
436425 ) -> Union [TokenInputs , MultiModalInputs ]:
437426 prompt_text = parsed_content ["prompt" ]
438427
@@ -444,7 +433,6 @@ async def _process_text_async(
444433 parsed_content .get ("mm_processor_kwargs" ),
445434 tokenization_kwargs = tokenization_kwargs ,
446435 lora_request = lora_request ,
447- return_mm_hashes = return_mm_hashes ,
448436 )
449437 else :
450438 prompt_token_ids = await self ._tokenize_prompt_async (
@@ -467,7 +455,6 @@ def _prompt_to_llm_inputs(
467455 prompt : SingletonPrompt ,
468456 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
469457 lora_request : Optional [LoRARequest ] = None ,
470- return_mm_hashes : bool = False ,
471458 ) -> SingletonInputs :
472459 """
473460 Extract the singleton inputs from a prompt.
@@ -476,7 +463,6 @@ def _prompt_to_llm_inputs(
476463
477464 * prompt: single encoder or decoder input prompt
478465 * lora_request: this is only valid for decoder prompts
479- * return_mm_hashes: whether to return multimodal hashes
480466
481467 Returns:
482468
@@ -490,21 +476,18 @@ def _prompt_to_llm_inputs(
490476 return self ._process_tokens (
491477 parsed ["content" ],
492478 lora_request = lora_request ,
493- return_mm_hashes = return_mm_hashes ,
494479 )
495480 if parsed ["type" ] == "text" :
496481 return self ._process_text (
497482 parsed ["content" ],
498483 tokenization_kwargs = tokenization_kwargs ,
499484 lora_request = lora_request ,
500- return_mm_hashes = return_mm_hashes ,
501485 )
502486 if parsed ["type" ] == "str" :
503487 return self ._process_text (
504488 TextPrompt (prompt = parsed ["content" ]),
505489 tokenization_kwargs = tokenization_kwargs ,
506490 lora_request = lora_request ,
507- return_mm_hashes = return_mm_hashes ,
508491 )
509492
510493 assert_never (parsed )
@@ -514,7 +497,6 @@ async def _prompt_to_llm_inputs_async(
514497 prompt : SingletonPrompt ,
515498 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
516499 lora_request : Optional [LoRARequest ] = None ,
517- return_mm_hashes : bool = False ,
518500 ) -> SingletonInputs :
519501 """
520502 Async version of
@@ -528,21 +510,18 @@ async def _prompt_to_llm_inputs_async(
528510 return await self ._process_tokens_async (
529511 parsed ["content" ],
530512 lora_request = lora_request ,
531- return_mm_hashes = return_mm_hashes ,
532513 )
533514 if parsed ["type" ] == "text" :
534515 return await self ._process_text_async (
535516 parsed ["content" ],
536517 tokenization_kwargs = tokenization_kwargs ,
537518 lora_request = lora_request ,
538- return_mm_hashes = return_mm_hashes ,
539519 )
540520 if parsed ["type" ] == "str" :
541521 return await self ._process_text_async (
542522 TextPrompt (prompt = parsed ["content" ]),
543523 tokenization_kwargs = tokenization_kwargs ,
544524 lora_request = lora_request ,
545- return_mm_hashes = return_mm_hashes ,
546525 )
547526
548527 assert_never (parsed )
@@ -785,7 +764,6 @@ def _process_decoder_only_prompt(
785764 prompt : SingletonPrompt ,
786765 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
787766 lora_request : Optional [LoRARequest ] = None ,
788- return_mm_hashes : bool = False ,
789767 ) -> DecoderOnlyInputs :
790768 """
791769 For decoder-only models:
@@ -796,7 +774,6 @@ def _process_decoder_only_prompt(
796774
797775 * prompt: input prompt
798776 * lora_request
799- * return_mm_hashes
800777
801778 Returns:
802779
@@ -807,7 +784,6 @@ def _process_decoder_only_prompt(
807784 prompt ,
808785 tokenization_kwargs = tokenization_kwargs ,
809786 lora_request = lora_request ,
810- return_mm_hashes = return_mm_hashes ,
811787 )
812788
813789 return self ._build_decoder_only_llm_inputs (prompt_comps )
@@ -817,7 +793,6 @@ async def _process_decoder_only_prompt_async(
817793 prompt : SingletonPrompt ,
818794 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
819795 lora_request : Optional [LoRARequest ] = None ,
820- return_mm_hashes : bool = False ,
821796 ) -> DecoderOnlyInputs :
822797 """
823798 Async version of
@@ -827,7 +802,6 @@ async def _process_decoder_only_prompt_async(
827802 prompt ,
828803 tokenization_kwargs = tokenization_kwargs ,
829804 lora_request = lora_request ,
830- return_mm_hashes = return_mm_hashes ,
831805 )
832806
833807 return self ._build_decoder_only_llm_inputs (prompt_comps )
@@ -837,17 +811,15 @@ def preprocess(
837811 prompt : PromptType ,
838812 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
839813 lora_request : Optional [LoRARequest ] = None ,
840- return_mm_hashes : bool = False ,
841814 ) -> ProcessorInputs :
842815 """Preprocess the input prompt."""
843816 if self .model_config .is_encoder_decoder :
844- assert not return_mm_hashes , (
845- "Multimodal hashes for encoder-decoder models should not be " ,
846- "returned until they are supported on vLLM V1." )
847817 # Encoder-decoder model requires special mapping of
848- # input prompts to encoder & decoder
818+ # input prompts to encoder & decoder.
849819 return self ._process_encoder_decoder_prompt (
850- prompt , tokenization_kwargs )
820+ prompt ,
821+ tokenization_kwargs ,
822+ )
851823
852824 if is_explicit_encoder_decoder_prompt (prompt ):
853825 raise ValueError ("Cannot pass encoder-decoder prompt "
@@ -858,27 +830,25 @@ def preprocess(
858830 prompt ,
859831 tokenization_kwargs = tokenization_kwargs ,
860832 lora_request = lora_request ,
861- return_mm_hashes = return_mm_hashes ,
862833 )
863834
864835 async def preprocess_async (
865836 self ,
866837 prompt : PromptType ,
867838 tokenization_kwargs : Optional [dict [str , Any ]] = None ,
868839 lora_request : Optional [LoRARequest ] = None ,
869- return_mm_hashes : bool = False ,
870840 ) -> ProcessorInputs :
871841 """
872842 Async version of
873843 [`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
874844 """
875845 if self .model_config .is_encoder_decoder :
876- assert not return_mm_hashes , (
877- "Multimodal hashes for encoder-decoder models should not be " ,
878- "returned until they are supported on vLLM V1." )
879846 # Encoder-decoder model requires special mapping of
880- # input prompts to encoder & decoder
881- return await self ._process_encoder_decoder_prompt_async (prompt )
847+ # input prompts to encoder & decoder.
848+ return await self ._process_encoder_decoder_prompt_async (
849+ prompt ,
850+ tokenization_kwargs ,
851+ )
882852
883853 if is_explicit_encoder_decoder_prompt (prompt ):
884854 raise ValueError ("Cannot pass encoder-decoder prompt "
@@ -889,5 +859,4 @@ async def preprocess_async(
889859 prompt ,
890860 tokenization_kwargs = tokenization_kwargs ,
891861 lora_request = lora_request ,
892- return_mm_hashes = return_mm_hashes ,
893862 )
0 commit comments