@@ -152,6 +152,7 @@ def get_max_tokens_per_item_by_modality(
152152 model_config : "ModelConfig" ,
153153 * ,
154154 cache : BaseMultiModalProcessorCache | None = None ,
155+ profiler_limits : Mapping [str , int ] | None = None ,
155156 ) -> Mapping [str , int ]:
156157 """
157158 Get the maximum number of tokens per data item from each modality based
@@ -164,40 +165,15 @@ def get_max_tokens_per_item_by_modality(
164165 profiler : MultiModalProfiler = MultiModalProfiler (processor )
165166
166167 seq_len = model_config .max_model_len
167- mm_limits = self .get_mm_limits_per_prompt (model_config , cache = cache )
168+ profiler_limits = (
169+ profiler .get_mm_limits () if profiler_limits is None else profiler_limits
170+ )
168171
169172 return profiler .get_mm_max_contiguous_tokens (
170173 seq_len ,
171- {modality : 1 for modality , limit in mm_limits .items () if limit > 0 },
172- )
173-
174- def get_max_tokens_per_item_by_nonzero_modality (
175- self ,
176- model_config : "ModelConfig" ,
177- * ,
178- cache : BaseMultiModalProcessorCache | None = None ,
179- ) -> Mapping [str , int ]:
180- """
181- Get the maximum number of tokens per data item from each modality based
182- on underlying model configuration, excluding modalities that user
183- explicitly disabled via `limit_mm_per_prompt`.
184-
185- Note:
186- This is currently directly used only in V1 for profiling the memory
187- usage of a model.
188- """
189- mm_limits = self .get_mm_limits_per_prompt (model_config , cache = cache )
190- max_tokens_per_item = self .get_max_tokens_per_item_by_modality (
191- model_config ,
192- cache = cache ,
174+ {modality : 1 for modality , limit in profiler_limits .items () if limit > 0 },
193175 )
194176
195- return {
196- key : max_tokens_per_mm_item
197- for key , max_tokens_per_mm_item in max_tokens_per_item .items ()
198- if mm_limits [key ] > 0
199- }
200-
201177 def get_mm_limits_per_prompt (
202178 self ,
203179 model_config : "ModelConfig" ,
@@ -369,7 +345,7 @@ def get_encdec_max_encoder_len(self, model_config: "ModelConfig") -> int:
369345 """
370346 if not model_config .is_encoder_decoder :
371347 return 0
372- max_tokens = self .get_max_tokens_per_item_by_nonzero_modality (model_config )
348+ max_tokens = self .get_max_tokens_per_item_by_modality (model_config )
373349 if not max_tokens :
374350 # TODO - this function assumes encoder-decoder models are
375351 # multimodal. This will need to change when adding support for more
0 commit comments