@@ -152,6 +152,7 @@ def get_max_tokens_per_item_by_modality(
152152        model_config : "ModelConfig" ,
153153        * ,
154154        cache : BaseMultiModalProcessorCache  |  None  =  None ,
155+         profiler_limits : Mapping [str , int ] |  None  =  None ,
155156    ) ->  Mapping [str , int ]:
156157        """ 
157158        Get the maximum number of tokens per data item from each modality based 
@@ -164,40 +165,15 @@ def get_max_tokens_per_item_by_modality(
164165        profiler : MultiModalProfiler  =  MultiModalProfiler (processor )
165166
166167        seq_len  =  model_config .max_model_len 
167-         mm_limits  =  self .get_mm_limits_per_prompt (model_config , cache = cache )
168+         profiler_limits  =  (
169+             profiler .get_mm_limits () if  profiler_limits  is  None  else  profiler_limits 
170+         )
168171
169172        return  profiler .get_mm_max_contiguous_tokens (
170173            seq_len ,
171-             {modality : 1  for  modality , limit  in  mm_limits .items () if  limit  >  0 },
172-         )
173- 
174-     def  get_max_tokens_per_item_by_nonzero_modality (
175-         self ,
176-         model_config : "ModelConfig" ,
177-         * ,
178-         cache : BaseMultiModalProcessorCache  |  None  =  None ,
179-     ) ->  Mapping [str , int ]:
180-         """ 
181-         Get the maximum number of tokens per data item from each modality based 
182-         on underlying model configuration, excluding modalities that user 
183-         explicitly disabled via `limit_mm_per_prompt`. 
184- 
185-         Note: 
186-             This is currently directly used only in V1 for profiling the memory 
187-             usage of a model. 
188-         """ 
189-         mm_limits  =  self .get_mm_limits_per_prompt (model_config , cache = cache )
190-         max_tokens_per_item  =  self .get_max_tokens_per_item_by_modality (
191-             model_config ,
192-             cache = cache ,
174+             {modality : 1  for  modality , limit  in  profiler_limits .items () if  limit  >  0 },
193175        )
194176
195-         return  {
196-             key : max_tokens_per_mm_item 
197-             for  key , max_tokens_per_mm_item  in  max_tokens_per_item .items ()
198-             if  mm_limits [key ] >  0 
199-         }
200- 
201177    def  get_mm_limits_per_prompt (
202178        self ,
203179        model_config : "ModelConfig" ,
@@ -369,7 +345,7 @@ def get_encdec_max_encoder_len(self, model_config: "ModelConfig") -> int:
369345        """ 
370346        if  not  model_config .is_encoder_decoder :
371347            return  0 
372-         max_tokens  =  self .get_max_tokens_per_item_by_nonzero_modality (model_config )
348+         max_tokens  =  self .get_max_tokens_per_item_by_modality (model_config )
373349        if  not  max_tokens :
374350            # TODO - this function assumes encoder-decoder models are 
375351            # multimodal. This will need to change when adding support for more 
0 commit comments