|
15 | 15 | # limitations under the License. |
16 | 16 | # |
17 | 17 |
|
18 | | -from typing import TYPE_CHECKING, Any, Optional, Union |
| 18 | +from typing import TYPE_CHECKING, Any, Optional, Union, cast |
19 | 19 |
|
20 | 20 | from transformers.processing_utils import ProcessorMixin |
21 | 21 | from typing_extensions import TypeVar |
22 | 22 | from vllm.transformers_utils import processor |
23 | 23 | from vllm.transformers_utils.processor import (_merge_mm_kwargs, |
24 | | - cached_get_processor) |
| 24 | + cached_get_image_processor, |
| 25 | + cached_get_processor, |
| 26 | + cached_get_video_processor) |
25 | 27 |
|
26 | 28 | if TYPE_CHECKING: |
27 | 29 | from vllm.config import ModelConfig |
@@ -90,9 +92,99 @@ def cached_processor_from_config( |
90 | 92 | ) |
91 | 93 |
|
92 | 94 |
|
| 95 | +def get_image_processor( |
| 96 | + processor_name: str, |
| 97 | + *args: Any, |
| 98 | + revision: Optional[str] = None, |
| 99 | + trust_remote_code: bool = False, |
| 100 | + **kwargs: Any, |
| 101 | +): |
| 102 | + """Load an image processor for the given model name via HuggingFace.""" |
| 103 | + # don't put this import at the top level |
| 104 | + # it will call torch.cuda.device_count() |
| 105 | + from transformers import AutoImageProcessor |
| 106 | + from transformers.image_processing_utils import BaseImageProcessor |
| 107 | + |
| 108 | + try: |
| 109 | + processor = AutoImageProcessor.from_pretrained( |
| 110 | + processor_name, |
| 111 | + *args, |
| 112 | + revision=revision, |
| 113 | + trust_remote_code=trust_remote_code, |
| 114 | + **kwargs) |
| 115 | + except ValueError as e: |
| 116 | + # If the error pertains to the processor class not existing or not |
| 117 | + # currently being imported, suggest using the --trust-remote-code flag. |
| 118 | + # Unlike AutoTokenizer, AutoImageProcessor does not separate such errors |
| 119 | + if not trust_remote_code: |
| 120 | + err_msg = ( |
| 121 | + "Failed to load the image processor. If the image processor is " |
| 122 | + "a custom processor not yet available in the HuggingFace " |
| 123 | + "transformers library, consider setting " |
| 124 | + "`trust_remote_code=True` in LLM or using the " |
| 125 | + "`--trust-remote-code` flag in the CLI.") |
| 126 | + raise RuntimeError(err_msg) from e |
| 127 | + else: |
| 128 | + raise e |
| 129 | + |
| 130 | + return cast(BaseImageProcessor, processor) |
| 131 | + |
| 132 | + |
| 133 | +def cached_image_processor_from_config( |
| 134 | + model_config: "ModelConfig", |
| 135 | + **kwargs: Any, |
| 136 | +): |
| 137 | + return cached_get_image_processor( |
| 138 | + model_config.model, |
| 139 | + revision=model_config.revision, |
| 140 | + trust_remote_code=model_config.trust_remote_code, |
| 141 | + **_merge_mm_kwargs(model_config, **kwargs), |
| 142 | + ) |
| 143 | + |
| 144 | + |
| 145 | +def get_video_processor( |
| 146 | + processor_name: str, |
| 147 | + *args: Any, |
| 148 | + revision: Optional[str] = None, |
| 149 | + trust_remote_code: bool = False, |
| 150 | + **kwargs: Any, |
| 151 | +): |
| 152 | + """Load a video processor for the given model name via HuggingFace.""" |
| 153 | + # don't put this import at the top level |
| 154 | + # it will call torch.cuda.device_count() |
| 155 | + from transformers.image_processing_utils import BaseImageProcessor |
| 156 | + |
| 157 | + processor = get_processor( |
| 158 | + processor_name, |
| 159 | + *args, |
| 160 | + revision=revision, |
| 161 | + trust_remote_code=trust_remote_code, |
| 162 | + **kwargs, |
| 163 | + ) |
| 164 | + |
| 165 | + return cast(BaseImageProcessor, processor.video_processor) |
| 166 | + |
| 167 | + |
| 168 | +def cached_video_processor_from_config( |
| 169 | + model_config: "ModelConfig", |
| 170 | + **kwargs: Any, |
| 171 | +): |
| 172 | + return cached_get_video_processor( |
| 173 | + model_config.model, |
| 174 | + revision=model_config.revision, |
| 175 | + trust_remote_code=model_config.trust_remote_code, |
| 176 | + **_merge_mm_kwargs(model_config, **kwargs), |
| 177 | + ) |
| 178 | + |
| 179 | + |
| 180 | +# Adapted from vllm: https://github.com/vllm-project/vllm/pull/17948 |
93 | 181 | # Pass `revision` param to transformer processor to avoid using `main` as |
94 | 182 | # default branch when using modelscope. |
95 | 183 | # Find more details at: |
96 | 184 | # https://github.com/vllm-project/vllm-ascend/issues/829 |
97 | 185 | processor.get_processor = get_processor |
98 | 186 | processor.cached_processor_from_config = cached_processor_from_config |
| 187 | +processor.get_image_processor = get_image_processor |
| 188 | +processor.cached_image_processor_from_config = cached_image_processor_from_config |
| 189 | +processor.get_video_processor = get_video_processor |
| 190 | +processor.cached_video_processor_from_config = cached_video_processor_from_config |
0 commit comments