Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/transformers/image_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class BaseImageProcessor(ImageProcessingMixin):

def __init__(self, **kwargs):
super().__init__(**kwargs)
if not self.is_fast:
logger.warning_once(
f"Using a slow image processor (`{self.__class__.__name__}`). "
"As we are transitioning to fast (PyTorch-native) processors, consider using `AutoImageProcessor` or the model-specific fast image processor class "
"to instantiate a fast image processor."
)
Comment on lines 42 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SGTM!

Related, since we're touching on the topic of "loading old models from the hub with new utils" this is related to the "from_pretrained conversion" @Cyrilvallez is working on, if we have modifications to apply to some old image processors, they should be in the from_pretrained as well to "convert" the processor in the same sense.


@property
def is_fast(self) -> bool:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/align/processing_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ class AlignProcessor(ProcessorMixin):
```

Args:
image_processor ([`EfficientNetImageProcessor`]):
image_processor ([`AutoImageProcessor`]):
The image processor is a required input.
tokenizer ([`BertTokenizer`, `BertTokenizerFast`]):
The tokenizer is a required input.

"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "EfficientNetImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
valid_processor_kwargs = AlignProcessorKwargs

Expand Down
14 changes: 2 additions & 12 deletions src/transformers/models/auto/image_processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@
logger = logging.get_logger(__name__)


FORCE_FAST_IMAGE_PROCESSOR = ["Qwen2VLImageProcessor"]


if TYPE_CHECKING:
# This significantly improves completion suggestion performance when
# the transformers package is used with Microsoft's Pylance language server.
Expand Down Expand Up @@ -520,19 +517,12 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
# if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
if use_fast is None:
use_fast = image_processor_type.endswith("Fast")
if not use_fast and image_processor_type in FORCE_FAST_IMAGE_PROCESSOR and is_torchvision_available():
use_fast = True
if not use_fast and is_torchvision_available():
logger.warning_once(
f"The image processor of type `{image_processor_type}` is now loaded as a fast processor by default, even if the model checkpoint was saved with a slow processor. "
"This is a breaking change and may produce slightly different outputs. To continue using the slow processor, instantiate this class with `use_fast=False`. "
"Note that this behavior will be extended to all models in a future release."
)
if not use_fast:
logger.warning_once(
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
"`use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. "
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
)
use_fast = True
if use_fast and not image_processor_type.endswith("Fast"):
image_processor_type += "Fast"
if use_fast and not is_torchvision_available():
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/tokenization_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@
("FastSpeech2ConformerTokenizer" if is_g2p_en_available() else None, None),
),
("flaubert", ("FlaubertTokenizer", None)),
("flava", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
("flex_olmo", (None, "GPT2TokenizerFast" if is_tokenizers_available() else None)),
("fnet", ("FNetTokenizer", "FNetTokenizerFast" if is_tokenizers_available() else None)),
("fsmt", ("FSMTTokenizer", None)),
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/blip/processing_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class BlipProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

def __init__(self, image_processor, tokenizer, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/blip_2/processing_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class Blip2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):
Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/bridgetower/processing_bridgetower.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@ class BridgeTowerProcessor(ProcessorMixin):
Constructs a BridgeTower processor which wraps a Roberta tokenizer and BridgeTower image processor into a single
processor.

[`BridgeTowerProcessor`] offers all the functionalities of [`BridgeTowerImageProcessor`] and
[`BridgeTowerProcessor`] offers all the functionalities of [`AutoImageProcessor`] and
[`RobertaTokenizerFast`]. See the docstring of [`~BridgeTowerProcessor.__call__`] and
[`~BridgeTowerProcessor.decode`] for more information.

Args:
image_processor (`BridgeTowerImageProcessor`):
An instance of [`BridgeTowerImageProcessor`]. The image processor is a required input.
image_processor (`AutoImageProcessor`):
An instance of [`AutoImageProcessor`]. The image processor is a required input.
tokenizer (`RobertaTokenizerFast`):
An instance of ['RobertaTokenizerFast`]. The tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "BridgeTowerImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = ("RobertaTokenizer", "RobertaTokenizerFast")
valid_processor_kwargs = BridgeTowerProcessorKwargs

Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/chameleon/processing_chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ class ChameleonProcessor(ProcessorMixin):
Constructs a Chameleon processor which wraps a Chameleon image processor and a Chameleon tokenizer into a single
processor.

[`ChameleonProcessor`] offers all the functionalities of [`ChameleonImageProcessor`] and [`LlamaTokenizerFast`].
[`ChameleonProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`LlamaTokenizerFast`].
See the [`~ChameleonProcessor.__call__`] and [`~ChameleonProcessor.decode`] for more information.

Args:
image_processor ([`ChameleonImageProcessor`]):
image_processor ([`AutoImageProcessor`]):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`]):
The tokenizer is a required input.
Expand All @@ -71,7 +71,7 @@ class ChameleonProcessor(ProcessorMixin):

attributes = ["image_processor", "tokenizer"]
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")
image_processor_class = "ChameleonImageProcessor"
image_processor_class = "AutoImageProcessor"

def __init__(self, image_processor, tokenizer, image_seq_length: int = 1024, image_token: str = "<image>"):
self.image_seq_length = image_seq_length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class ChineseCLIPProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, **kwargs):
super().__init__(image_processor, tokenizer)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/clip/processing_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class CLIPProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/clipseg/processing_clipseg.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class CLIPSegProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, **kwargs):
super().__init__(image_processor, tokenizer)
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/emu3/processing_emu3.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ class Emu3Processor(ProcessorMixin):
Constructs a Emu3 processor which wraps a Emu3 image processor and a GPT2 tokenizer into a single
processor.

[`Emu3Processor`] offers all the functionalities of [`Emu3ImageProcessor`] and [`GPT2TokenizerFast`].
[`Emu3Processor`] offers all the functionalities of [`AutoImageProcessor`] and [`GPT2TokenizerFast`].
See the [`~Emu3Processor.__call__`] and [`~Emu3Processor.decode`] for more information.

Args:
image_processor ([`Emu3ImageProcessor`]):
image_processor ([`AutoImageProcessor`]):
The image processor is a required input.
tokenizer ([`Emu3TokenizerFast`]):
The tokenizer is a required input.
Expand All @@ -66,7 +66,7 @@ class Emu3Processor(ProcessorMixin):

attributes = ["image_processor", "tokenizer"]
tokenizer_class = ("GPT2Tokenizer", "GPT2TokenizerFast")
image_processor_class = "Emu3ImageProcessor"
image_processor_class = "AutoImageProcessor"

def __init__(
self,
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/flava/processing_flava.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@ class FlavaProcessor(ProcessorMixin):
r"""
Constructs a FLAVA processor which wraps a FLAVA image processor and a FLAVA tokenizer into a single processor.

[`FlavaProcessor`] offers all the functionalities of [`FlavaImageProcessor`] and [`BertTokenizerFast`]. See the
[`FlavaProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`BertTokenizerFast`]. See the
[`~FlavaProcessor.__call__`] and [`~FlavaProcessor.decode`] for more information.

Args:
image_processor ([`FlavaImageProcessor`], *optional*): The image processor is a required input.
image_processor ([`AutoImageProcessor`], *optional*): The image processor is a required input.
tokenizer ([`BertTokenizerFast`], *optional*): The tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "FlavaImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

def __init__(self, image_processor=None, tokenizer=None, **kwargs):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/fuyu/processing_fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,18 +337,18 @@ class FuyuProcessor(ProcessorMixin):
r"""
Constructs a Fuyu processor which wraps a Fuyu image processor and a Llama tokenizer into a single processor.

[`FuyuProcessor`] offers all the functionalities of [`FuyuImageProcessor`] and [`LlamaTokenizerFast`]. See the
[`FuyuProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`LlamaTokenizerFast`]. See the
[`~FuyuProcessor.__call__`] and [`~FuyuProcessor.decode`] for more information.

Args:
image_processor ([`FuyuImageProcessor`]):
image_processor ([`AutoImageProcessor`]):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`]):
The tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "FuyuImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,19 @@ class GroundingDinoProcessor(ProcessorMixin):
Constructs a Grounding DINO processor which wraps a Deformable DETR image processor and a BERT tokenizer into a
single processor.

[`GroundingDinoProcessor`] offers all the functionalities of [`GroundingDinoImageProcessor`] and
[`GroundingDinoProcessor`] offers all the functionalities of [`AutoImageProcessor`] and
[`AutoTokenizer`]. See the docstring of [`~GroundingDinoProcessor.__call__`] and [`~GroundingDinoProcessor.decode`]
for more information.

Args:
image_processor (`GroundingDinoImageProcessor`):
An instance of [`GroundingDinoImageProcessor`]. The image processor is a required input.
image_processor (`AutoImageProcessor`):
An instance of [`AutoImageProcessor`]. The image processor is a required input.
tokenizer (`AutoTokenizer`):
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "GroundingDinoImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"
valid_processor_kwargs = GroundingDinoProcessorKwargs

Expand All @@ -145,7 +145,7 @@ def __call__(
**kwargs: Unpack[GroundingDinoProcessorKwargs],
) -> BatchEncoding:
"""
This method uses [`GroundingDinoImageProcessor.__call__`] method to prepare image(s) for the model, and
This method uses [`AutoImageProcessor.__call__`] method to prepare image(s) for the model, and
[`BertTokenizerFast.__call__`] to prepare text for the model.

Args:
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/idefics/processing_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,13 @@ def is_url(string):

class IdeficsProcessor(ProcessorMixin):
r"""
Constructs a IDEFICS processor which wraps a LLama tokenizer and IDEFICS image processor into a single processor.
Constructs a IDEFICS processor which wraps a LLama tokenizer and AutoImageProcessor into a single processor.

[`IdeficsProcessor`] offers all the functionalities of [`IdeficsImageProcessor`] and [`LlamaTokenizerFast`]. See
the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

Args:
image_processor (`IdeficsImageProcessor`):
image_processor (`AutoImageProcessor`):
An instance of [`IdeficsImageProcessor`]. The image processor is a required input.
tokenizer (`LlamaTokenizerFast`):
An instance of [`LlamaTokenizerFast`]. The tokenizer is a required input.
Expand All @@ -154,7 +154,7 @@ class IdeficsProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "IdeficsImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = "LlamaTokenizerFast"

def __init__(self, image_processor, tokenizer=None, image_size=224, add_end_of_utterance_token=None, **kwargs):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/idefics2/processing_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ class Idefics2ProcessorKwargs(ProcessingKwargs, total=False):

class Idefics2Processor(ProcessorMixin):
r"""
Constructs a IDEFICS2 processor which wraps a LLama tokenizer and IDEFICS2 image processor into a single processor.
Constructs a IDEFICS2 processor which wraps a LLama tokenizer and AutoImageProcessor into a single processor.

[`IdeficsProcessor`] offers all the functionalities of [`Idefics2ImageProcessor`] and [`LlamaTokenizerFast`]. See
the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

Args:
image_processor (`Idefics2ImageProcessor`):
image_processor (`AutoImageProcessor`):
An instance of [`Idefics2ImageProcessor`]. The image processor is a required input.
tokenizer (`PreTrainedTokenizerBase`, *optional*):
An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
Expand All @@ -76,7 +76,7 @@ class Idefics2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "Idefics2ImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/idefics3/processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ class Idefics3ProcessorKwargs(ProcessingKwargs, total=False):

class Idefics3Processor(ProcessorMixin):
r"""
Constructs a Idefics3 processor which wraps a LLama tokenizer and Idefics3 image processor into a single processor.
Constructs a Idefics3 processor which wraps a LLama tokenizer and AutoImageProcessor into a single processor.

[`Idefics3Processor`] offers all the functionalities of [`Idefics3ImageProcessor`] and [`Idefics3TokenizerFast`]. See
the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

Args:
image_processor (`Idefics3ImageProcessor`):
image_processor (`AutoImageProcessor`):
An instance of [`Idefics3ImageProcessor`]. The image processor is a required input.
tokenizer (`PreTrainedTokenizerBase`, *optional*):
An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
Expand All @@ -122,7 +122,7 @@ class Idefics3Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "Idefics3ImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class InstructBlipProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer", "qformer_tokenizer"]
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"
qformer_tokenizer_class = "AutoTokenizer"

Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/janus/processing_janus.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ class JanusProcessor(ProcessorMixin):
r"""
Constructs a Janus processor which wraps a Janus Image Processor and a Llama tokenizer into a single processor.

[`JanusProcessor`] offers all the functionalities of [`JanusImageProcessor`] and [`LlamaTokenizerFast`]. See the
[`JanusProcessor`] offers all the functionalities of [`AutoImageProcessor`] and [`LlamaTokenizerFast`]. See the
[`~JanusProcessor.__call__`] and [`~JanusProcessor.decode`] for more information.

Args:
image_processor ([`JanusImageProcessor`]):
image_processor ([`AutoImageProcessor`]):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`]):
The tokenizer is a required input.
Expand All @@ -65,7 +65,7 @@ class JanusProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = "JanusImageProcessor"
image_processor_class = "AutoImageProcessor"
tokenizer_class = "LlamaTokenizerFast"

def __init__(self, image_processor, tokenizer, chat_template=None, use_default_system_prompt=False, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/kosmos2/processing_kosmos2.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class Kosmos2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):
Expand Down
Loading