@@ -1569,56 +1569,35 @@ def _validate_mm_placeholders(
15691569                    "model (usually arising from an inconsistency between " 
15701570                    "`_call_hf_processor` and `_get_prompt_updates`)." )
15711571
1572-     def  apply (
1572+     def  _hash_mm_items (
15731573        self ,
1574-         prompt : Union [str , list [int ]],
1575-         mm_data : MultiModalDataDict ,
1574+         mm_items : MultiModalDataItems ,
15761575        hf_processor_mm_kwargs : Mapping [str , object ],
1577-         return_mm_hashes : bool  =  False ,
1578-     ) ->  MultiModalInputs :
1579-         """ 
1580-         Process multi-modal inputs to be used in vLLM. 
1576+     ) ->  dict [str , list [str ]]:
1577+         """Create MM hashes to be returned (only used in V1).""" 
15811578
1582-         The main steps are: 
1583- 
1584-         1. Apply HF Processor on prompt text and multi-modal data together, 
1585-            outputting token IDs and processed tensors. 
1586-         2. Find and update sequences in the token IDs with placeholder tokens. 
1587-            The number of placeholder tokens equals the feature size of the 
1588-            multi-modal data outputted by the multi-modal encoder. 
1589-         3. Extract information about the placeholder tokens from the 
1590-            processed token IDs. 
1591-         """ 
1592-         mm_items  =  self ._to_mm_items (mm_data )
1593- 
1594-         # Create MM hashes to be returned (only used in V1) 
15951579        # TODO: Use these hash keys for caching operations in apply_hf_processor 
15961580        # instead of rehashing. 
1581+         model_id  =  self .info .model_id 
15971582
1598-         if  return_mm_hashes :
1599-             model_id  =  self .info .model_id 
1600-             mm_hashes  =  {
1601-                 modality : [
1602-                     MultiModalHasher .hash_kwargs (model_id = model_id ,
1603-                                                  ** {modality : item },
1604-                                                  ** hf_processor_mm_kwargs )
1605-                     for  item  in  items 
1606-                 ]
1607-                 for  modality , items  in  mm_items .items ()
1608-             }
1609-         else :
1610-             mm_hashes  =  None 
1611- 
1612-         (
1613-             prompt_ids ,
1614-             mm_kwargs ,
1615-             is_update_applied ,
1616-         ) =  self ._cached_apply_hf_processor (
1617-             prompt ,
1618-             mm_items ,
1619-             hf_processor_mm_kwargs ,
1620-         )
1583+         return  {
1584+             modality : [
1585+                 MultiModalHasher .hash_kwargs (model_id = model_id ,
1586+                                              ** {modality : item },
1587+                                              ** hf_processor_mm_kwargs )
1588+                 for  item  in  items 
1589+             ]
1590+             for  modality , items  in  mm_items .items ()
1591+         }
16211592
1593+     def  _maybe_apply_prompt_updates (
1594+         self ,
1595+         mm_items : MultiModalDataItems ,
1596+         hf_processor_mm_kwargs : Mapping [str , object ],
1597+         prompt_ids : list [int ],
1598+         mm_kwargs : MultiModalKwargs ,
1599+         is_update_applied : bool ,
1600+     ) ->  tuple [list [int ], str , Mapping [str , list [PlaceholderFeaturesInfo ]]]:
16221601        unbound_prompt_updates  =  self ._get_prompt_updates (
16231602            mm_items ,
16241603            hf_processor_mm_kwargs ,
@@ -1652,6 +1631,51 @@ def apply(
16521631            )
16531632            self ._validate_mm_placeholders (mm_placeholders , mm_item_counts )
16541633
1634+         return  prompt_ids , prompt , mm_placeholders 
1635+ 
1636+     def  apply (
1637+         self ,
1638+         prompt : Union [str , list [int ]],
1639+         mm_data : MultiModalDataDict ,
1640+         hf_processor_mm_kwargs : Mapping [str , object ],
1641+         return_mm_hashes : bool  =  False ,
1642+     ) ->  MultiModalInputs :
1643+         """ 
1644+         Process multi-modal inputs to be used in vLLM. 
1645+ 
1646+         The main steps are: 
1647+ 
1648+         1. Apply HF Processor on prompt text and multi-modal data together, 
1649+            outputting token IDs and processed tensors. 
1650+         2. Find and update sequences in the token IDs with placeholder tokens. 
1651+            The number of placeholder tokens equals the feature size of the 
1652+            multi-modal data outputted by the multi-modal encoder. 
1653+         3. Extract information about the placeholder tokens from the 
1654+            processed token IDs. 
1655+         """ 
1656+         mm_items  =  self ._to_mm_items (mm_data )
1657+ 
1658+         mm_hashes  =  (self ._hash_mm_items (mm_items , hf_processor_mm_kwargs )
1659+                      if  return_mm_hashes  else  None )
1660+ 
1661+         (
1662+             prompt_ids ,
1663+             mm_kwargs ,
1664+             is_update_applied ,
1665+         ) =  self ._cached_apply_hf_processor (
1666+             prompt ,
1667+             mm_items ,
1668+             hf_processor_mm_kwargs ,
1669+         )
1670+ 
1671+         prompt_ids , prompt , mm_placeholders  =  self ._maybe_apply_prompt_updates (
1672+             mm_items = mm_items ,
1673+             hf_processor_mm_kwargs = hf_processor_mm_kwargs ,
1674+             prompt_ids = prompt_ids ,
1675+             mm_kwargs = mm_kwargs ,
1676+             is_update_applied = is_update_applied ,
1677+         )
1678+ 
16551679        mm_placeholder_ranges  =  {
16561680            modality : [item .to_range () for  item  in  placeholders ]
16571681            for  modality , placeholders  in  mm_placeholders .items ()
0 commit comments