huggingface · yonigozlan · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 14, 2025
diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py
@@ -41,6 +41,12 @@ class BaseImageProcessor(ImageProcessingMixin):
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        if not self.is_fast:
+            logger.warning_once(
+                f"Using a slow image processor (`{self.__class__.__name__}`). "
+                "As we are transitioning to fast (PyTorch-native) processors, consider using `AutoImageProcessor` or the model-specific fast image processor class "
+                "to instantiate a fast image processor."
+            )
 
     @property
     def is_fast(self) -> bool:

diff --git a/src/transformers/models/align/processing_align.py b/src/transformers/models/align/processing_align.py
@@ -52,15 +52,15 @@ class AlignProcessor(ProcessorMixin):
         ```
 
     Args:
-        image_processor ([`EfficientNetImageProcessor`]):
+        image_processor ([`AutoImageProcessor`]):
             The image processor is a required input.
         tokenizer ([`BertTokenizer`, `BertTokenizerFast`]):
             The tokenizer is a required input.
 
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "EfficientNetImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
     valid_processor_kwargs = AlignProcessorKwargs
 

diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py
@@ -49,9 +49,6 @@
 logger = logging.get_logger(__name__)
 
 
-FORCE_FAST_IMAGE_PROCESSOR = ["Qwen2VLImageProcessor"]
-
-
 if TYPE_CHECKING:
     # This significantly improves completion suggestion performance when
     # the transformers package is used with Microsoft's Pylance language server.
@@ -520,19 +517,12 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
             # if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
             if use_fast is None:
                 use_fast = image_processor_type.endswith("Fast")
-                if not use_fast and image_processor_type in FORCE_FAST_IMAGE_PROCESSOR and is_torchvision_available():
-                    use_fast = True
+                if not use_fast and is_torchvision_available():
                     logger.warning_once(
                         f"The image processor of type `{image_processor_type}` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. "
                         "This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. "
-                        "Note that this behavior will be extended to all models in a future release."
-                    )
-                if not use_fast:
-                    logger.warning_once(
-                        "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
-                        "`use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. "
-                        "This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
                     )
+                use_fast = True
             if use_fast and not image_processor_type.endswith("Fast"):
                 image_processor_type += "Fast"
             if use_fast and not is_torchvision_available():

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -253,6 +253,7 @@
             ("FastSpeech2ConformerTokenizer" if is_g2p_en_available() else None, None),
         ),
         ("flaubert", ("FlaubertTokenizer", None)),
+        ("flava", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
         ("flex_olmo", (None, "GPT2TokenizerFast" if is_tokenizers_available() else None)),
         ("fnet", ("FNetTokenizer", "FNetTokenizerFast" if is_tokenizers_available() else None)),
         ("fsmt", ("FSMTTokenizer", None)),

diff --git a/src/transformers/models/blip/processing_blip.py b/src/transformers/models/blip/processing_blip.py
@@ -54,7 +54,7 @@ class BlipProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
 
     def __init__(self, image_processor, tokenizer, **kwargs):

diff --git a/src/transformers/models/blip_2/processing_blip_2.py b/src/transformers/models/blip_2/processing_blip_2.py
@@ -61,7 +61,7 @@ class Blip2Processor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
     def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):

diff --git a/src/transformers/models/bridgetower/processing_bridgetower.py b/src/transformers/models/bridgetower/processing_bridgetower.py
@@ -43,19 +43,19 @@ class BridgeTowerProcessor(ProcessorMixin):
     Constructs a BridgeTower processor which wraps a Roberta tokenizer and BridgeTower image processor into a single
     processor.
 
-    [`BridgeTowerProcessor`] offers all the functionalities of [`BridgeTowerImageProcessor`] and
+    [`BridgeTowerProcessor`] offers all the functionalities of [`AutoImageProcessor`] and
     [`RobertaTokenizerFast`]. See the docstring of [`~BridgeTowerProcessor.__call__`] and
     [`~BridgeTowerProcessor.decode`] for more information.
 
     Args:
-        image_processor (`BridgeTowerImageProcessor`):
-            An instance of [`BridgeTowerImageProcessor`]. The image processor is a required input.
+        image_processor (`AutoImageProcessor`):
+            An instance of [`AutoImageProcessor`]. The image processor is a required input.
         tokenizer (`RobertaTokenizerFast`):
             An instance of ['RobertaTokenizerFast`]. The tokenizer is a required input.
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "BridgeTowerImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")
     valid_processor_kwargs = BridgeTowerProcessorKwargs
 

diff --git a/src/transformers/models/chameleon/processing_chameleon.py b/src/transformers/models/chameleon/processing_chameleon.py
@@ -55,11 +55,11 @@ class ChameleonProcessor(ProcessorMixin):
     Constructs a Chameleon processor which wraps a Chameleon image processor and a Chameleon tokenizer into a single
     processor.
 
-    [`ChameleonProcessor`] offers all the functionalities of [`ChameleonImageProcessor`] and [`LlamaTokenizerFast`].
+    [`ChameleonProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`LlamaTokenizerFast`].
     See the [`~ChameleonProcessor.__call__`] and [`~ChameleonProcessor.decode`] for more information.
 
     Args:
-        image_processor ([`ChameleonImageProcessor`]):
+        image_processor ([`AutoImageProcessor`]):
             The image processor is a required input.
         tokenizer ([`LlamaTokenizerFast`]):
             The tokenizer is a required input.
@@ -71,7 +71,7 @@ class ChameleonProcessor(ProcessorMixin):
 
     attributes = ["image_processor", "tokenizer"]
     tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")
-    image_processor_class = "ChameleonImageProcessor"
+    image_processor_class = "AutoImageProcessor"
 
     def __init__(self, image_processor, tokenizer, image_seq_length: int = 1024, image_token: str = "<image>"):
         self.image_seq_length = image_seq_length

diff --git a/src/transformers/models/chinese_clip/processing_chinese_clip.py b/src/transformers/models/chinese_clip/processing_chinese_clip.py
@@ -35,8 +35,8 @@ class ChineseCLIPProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")
-    tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
+    image_processor_class = "AutoImageProcessor"
+    tokenizer_class = "AutoTokenizer"
 
     def __init__(self, image_processor=None, tokenizer=None, **kwargs):
         super().__init__(image_processor, tokenizer)

diff --git a/src/transformers/models/clip/processing_clip.py b/src/transformers/models/clip/processing_clip.py
@@ -34,7 +34,7 @@ class CLIPProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
     def __init__(self, image_processor=None, tokenizer=None, **kwargs):

diff --git a/src/transformers/models/clipseg/processing_clipseg.py b/src/transformers/models/clipseg/processing_clipseg.py
@@ -35,8 +35,8 @@ class CLIPSegProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
-    tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
+    image_processor_class = "AutoImageProcessor"
+    tokenizer_class = "AutoTokenizer"
 
     def __init__(self, image_processor=None, tokenizer=None, **kwargs):
         super().__init__(image_processor, tokenizer)

diff --git a/src/transformers/models/emu3/processing_emu3.py b/src/transformers/models/emu3/processing_emu3.py
@@ -52,11 +52,11 @@ class Emu3Processor(ProcessorMixin):
     Constructs a Emu3 processor which wraps a Emu3 image processor and a GPT2 tokenizer into a single
     processor.
 
-    [`Emu3Processor`] offers all the functionalities of [`Emu3ImageProcessor`] and [`GPT2TokenizerFast`].
+    [`Emu3Processor`] offers all the functionalities of [`AutoImageProcessor`] and [`GPT2TokenizerFast`].
     See the [`~Emu3Processor.__call__`] and [`~Emu3Processor.decode`] for more information.
 
     Args:
-        image_processor ([`Emu3ImageProcessor`]):
+        image_processor ([`AutoImageProcessor`]):
             The image processor is a required input.
         tokenizer ([`Emu3TokenizerFast`]):
             The tokenizer is a required input.
@@ -66,7 +66,7 @@ class Emu3Processor(ProcessorMixin):
 
     attributes = ["image_processor", "tokenizer"]
     tokenizer_class = ("GPT2Tokenizer", "GPT2TokenizerFast")
-    image_processor_class = "Emu3ImageProcessor"
+    image_processor_class = "AutoImageProcessor"
 
     def __init__(
         self,

diff --git a/src/transformers/models/flava/processing_flava.py b/src/transformers/models/flava/processing_flava.py
@@ -23,16 +23,16 @@ class FlavaProcessor(ProcessorMixin):
     r"""
     Constructs a FLAVA processor which wraps a FLAVA image processor and a FLAVA tokenizer into a single processor.
 
-    [`FlavaProcessor`] offers all the functionalities of [`FlavaImageProcessor`] and [`BertTokenizerFast`]. See the
+    [`FlavaProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`BertTokenizerFast`]. See the
     [`~FlavaProcessor.__call__`] and [`~FlavaProcessor.decode`] for more information.
 
     Args:
-        image_processor ([`FlavaImageProcessor`], *optional*): The image processor is a required input.
+        image_processor ([`AutoImageProcessor`], *optional*): The image processor is a required input.
         tokenizer ([`BertTokenizerFast`], *optional*): The tokenizer is a required input.
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "FlavaImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
 
     def __init__(self, image_processor=None, tokenizer=None, **kwargs):

diff --git a/src/transformers/models/fuyu/processing_fuyu.py b/src/transformers/models/fuyu/processing_fuyu.py
@@ -337,18 +337,18 @@ class FuyuProcessor(ProcessorMixin):
     r"""
     Constructs a Fuyu processor which wraps a Fuyu image processor and a Llama tokenizer into a single processor.
 
-    [`FuyuProcessor`] offers all the functionalities of [`FuyuImageProcessor`] and [`LlamaTokenizerFast`]. See the
+    [`FuyuProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`LlamaTokenizerFast`]. See the
     [`~FuyuProcessor.__call__`] and [`~FuyuProcessor.decode`] for more information.
 
     Args:
-        image_processor ([`FuyuImageProcessor`]):
+        image_processor ([`AutoImageProcessor`]):
             The image processor is a required input.
         tokenizer ([`LlamaTokenizerFast`]):
             The tokenizer is a required input.
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "FuyuImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
     def __init__(self, image_processor, tokenizer, **kwargs):

diff --git a/src/transformers/models/grounding_dino/processing_grounding_dino.py b/src/transformers/models/grounding_dino/processing_grounding_dino.py
@@ -119,19 +119,19 @@ class GroundingDinoProcessor(ProcessorMixin):
     Constructs a Grounding DINO processor which wraps a Deformable DETR image processor and a BERT tokenizer into a
     single processor.
 
-    [`GroundingDinoProcessor`] offers all the functionalities of [`GroundingDinoImageProcessor`] and
+    [`GroundingDinoProcessor`] offers all the functionalities of [`AutoImageProcessor`] and
     [`AutoTokenizer`]. See the docstring of [`~GroundingDinoProcessor.__call__`] and [`~GroundingDinoProcessor.decode`]
     for more information.
 
     Args:
-        image_processor (`GroundingDinoImageProcessor`):
-            An instance of [`GroundingDinoImageProcessor`]. The image processor is a required input.
+        image_processor (`AutoImageProcessor`):
+            An instance of [`AutoImageProcessor`]. The image processor is a required input.
         tokenizer (`AutoTokenizer`):
             An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "GroundingDinoImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
     valid_processor_kwargs = GroundingDinoProcessorKwargs
 
@@ -145,7 +145,7 @@ def __call__(
         **kwargs: Unpack[GroundingDinoProcessorKwargs],
     ) -> BatchEncoding:
         """
-        This method uses [`GroundingDinoImageProcessor.__call__`] method to prepare image(s) for the model, and
+        This method uses [`AutoImageProcessor.__call__`] method to prepare image(s) for the model, and
         [`BertTokenizerFast.__call__`] to prepare text for the model.
 
         Args:

diff --git a/src/transformers/models/idefics/processing_idefics.py b/src/transformers/models/idefics/processing_idefics.py
@@ -137,13 +137,13 @@ def is_url(string):
 
 class IdeficsProcessor(ProcessorMixin):
     r"""
-    Constructs a IDEFICS processor which wraps a LLama tokenizer and IDEFICS image processor into a single processor.
+    Constructs a IDEFICS processor which wraps a LLama tokenizer and AutoImageProcessor into a single processor.
 
     [`IdeficsProcessor`] offers all the functionalities of [`IdeficsImageProcessor`] and [`LlamaTokenizerFast`]. See
     the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.
 
     Args:
-        image_processor (`IdeficsImageProcessor`):
+        image_processor (`AutoImageProcessor`):
             An instance of [`IdeficsImageProcessor`]. The image processor is a required input.
         tokenizer (`LlamaTokenizerFast`):
             An instance of [`LlamaTokenizerFast`]. The tokenizer is a required input.
@@ -154,7 +154,7 @@ class IdeficsProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "IdeficsImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "LlamaTokenizerFast"
 
     def __init__(self, image_processor, tokenizer=None, image_size=224, add_end_of_utterance_token=None, **kwargs):

diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py
@@ -57,13 +57,13 @@ class Idefics2ProcessorKwargs(ProcessingKwargs, total=False):
 
 class Idefics2Processor(ProcessorMixin):
     r"""
-    Constructs a IDEFICS2 processor which wraps a LLama tokenizer and IDEFICS2 image processor into a single processor.
+    Constructs a IDEFICS2 processor which wraps a LLama tokenizer and AutoImageProcessor into a single processor.
 
     [`IdeficsProcessor`] offers all the functionalities of [`Idefics2ImageProcessor`] and [`LlamaTokenizerFast`]. See
     the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.
 
     Args:
-        image_processor (`Idefics2ImageProcessor`):
+        image_processor (`AutoImageProcessor`):
             An instance of [`Idefics2ImageProcessor`]. The image processor is a required input.
         tokenizer (`PreTrainedTokenizerBase`, *optional*):
             An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
@@ -76,7 +76,7 @@ class Idefics2Processor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "Idefics2ImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
     def __init__(

diff --git a/src/transformers/models/idefics3/processing_idefics3.py b/src/transformers/models/idefics3/processing_idefics3.py
@@ -103,13 +103,13 @@ class Idefics3ProcessorKwargs(ProcessingKwargs, total=False):
 
 class Idefics3Processor(ProcessorMixin):
     r"""
-    Constructs a Idefics3 processor which wraps a LLama tokenizer and Idefics3 image processor into a single processor.
+    Constructs a Idefics3 processor which wraps a LLama tokenizer and AutoImageProcessor into a single processor.
 
     [`Idefics3Processor`] offers all the functionalities of [`Idefics3ImageProcessor`] and [`Idefics3TokenizerFast`]. See
     the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.
 
     Args:
-        image_processor (`Idefics3ImageProcessor`):
+        image_processor (`AutoImageProcessor`):
             An instance of [`Idefics3ImageProcessor`]. The image processor is a required input.
         tokenizer (`PreTrainedTokenizerBase`, *optional*):
             An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
@@ -122,7 +122,7 @@ class Idefics3Processor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "Idefics3ImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
     def __init__(

diff --git a/src/transformers/models/instructblip/processing_instructblip.py b/src/transformers/models/instructblip/processing_instructblip.py
@@ -66,7 +66,7 @@ class InstructBlipProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer", "qformer_tokenizer"]
-    image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
     qformer_tokenizer_class = "AutoTokenizer"
 

diff --git a/src/transformers/models/janus/processing_janus.py b/src/transformers/models/janus/processing_janus.py
@@ -50,11 +50,11 @@ class JanusProcessor(ProcessorMixin):
     r"""
     Constructs a Janus processor which wraps a Janus Image Processor and a Llama tokenizer into a single processor.
 
-    [`JanusProcessor`] offers all the functionalities of [`JanusImageProcessor`] and [`LlamaTokenizerFast`]. See the
+    [`JanusProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`LlamaTokenizerFast`]. See the
     [`~JanusProcessor.__call__`] and [`~JanusProcessor.decode`] for more information.
 
     Args:
-        image_processor ([`JanusImageProcessor`]):
+        image_processor ([`AutoImageProcessor`]):
             The image processor is a required input.
         tokenizer ([`LlamaTokenizerFast`]):
             The tokenizer is a required input.
@@ -65,7 +65,7 @@ class JanusProcessor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = "JanusImageProcessor"
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "LlamaTokenizerFast"
 
     def __init__(self, image_processor, tokenizer, chat_template=None, use_default_system_prompt=False, **kwargs):

diff --git a/src/transformers/models/kosmos2/processing_kosmos2.py b/src/transformers/models/kosmos2/processing_kosmos2.py
@@ -86,7 +86,7 @@ class Kosmos2Processor(ProcessorMixin):
     """
 
     attributes = ["image_processor", "tokenizer"]
-    image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
+    image_processor_class = "AutoImageProcessor"
     tokenizer_class = "AutoTokenizer"
 
     def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):