| 
12 | 12 |                                                cached_tokenizer_from_config)  | 
13 | 13 | from vllm.utils import ClassRegistry  | 
14 | 14 | 
 
  | 
15 |  | -from .cache import (BaseMultiModalProcessorCache,  | 
16 |  | -                    processor_only_cache_from_config)  | 
 | 15 | +from .cache import BaseMultiModalProcessorCache  | 
17 | 16 | from .processing import BaseMultiModalProcessor, BaseProcessingInfo  | 
18 | 17 | from .profiling import (BaseDummyInputsBuilder, DummyDecoderData,  | 
19 | 18 |                         DummyEncoderData, MultiModalProfiler)  | 
@@ -176,35 +175,6 @@ def get_max_tokens_per_item_by_nonzero_modality(  | 
176 | 175 |             if mm_limits[key] > 0  | 
177 | 176 |         }  | 
178 | 177 | 
 
  | 
179 |  | -    # TODO: Remove once V0 is gone  | 
180 |  | -    def get_max_tokens_by_modality(  | 
181 |  | -        self,  | 
182 |  | -        model_config: "ModelConfig",  | 
183 |  | -    ) -> Mapping[str, int]:  | 
184 |  | -        """  | 
185 |  | -        Get the maximum number of tokens from each modality  | 
186 |  | -        for profiling the memory usage of a model.  | 
187 |  | -        """  | 
188 |  | -        cache = processor_only_cache_from_config(model_config, self)  | 
189 |  | -        mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache)  | 
190 |  | -        max_tokens_per_item = self.get_max_tokens_per_item_by_modality(  | 
191 |  | -            model_config,  | 
192 |  | -            cache=cache,  | 
193 |  | -        )  | 
194 |  | - | 
195 |  | -        return {  | 
196 |  | -            key: mm_limits[key] * max_tokens_per_mm_item  | 
197 |  | -            for key, max_tokens_per_mm_item in max_tokens_per_item.items()  | 
198 |  | -        }  | 
199 |  | - | 
200 |  | -    # TODO: Remove once V0 is gone  | 
201 |  | -    def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int:  | 
202 |  | -        """  | 
203 |  | -        Get the maximum number of multi-modal tokens  | 
204 |  | -        for profiling the memory usage of a model.  | 
205 |  | -        """  | 
206 |  | -        return sum(self.get_max_tokens_by_modality(model_config).values())  | 
207 |  | - | 
208 | 178 |     def get_mm_limits_per_prompt(  | 
209 | 179 |         self,  | 
210 | 180 |         model_config: "ModelConfig",  | 
 | 
0 commit comments