diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py index fbc60f7db5ed..4dfa7f08b0db 100644 --- a/src/transformers/image_processing_utils_fast.py +++ b/src/transformers/image_processing_utils_fast.py @@ -46,7 +46,6 @@ auto_docstring, is_torch_available, is_torchvision_available, - is_torchvision_v2_available, is_vision_available, logging, ) @@ -60,14 +59,13 @@ import torch if is_torchvision_available(): + from torchvision.transforms.v2 import functional as F + from .image_utils import pil_torch_interpolation_mapping + else: pil_torch_interpolation_mapping = None -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): - from torchvision.transforms import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index d6d0e19afb2e..36ed821e696a 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -28,7 +28,6 @@ is_torch_available, is_torch_tensor, is_torchvision_available, - is_torchvision_v2_available, is_vision_available, logging, requires_backends, @@ -54,9 +53,7 @@ from torchvision.transforms import InterpolationMode pil_torch_interpolation_mapping = { - PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else InterpolationMode.NEAREST, + PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT, PILImageResampling.BOX: InterpolationMode.BOX, PILImageResampling.BILINEAR: InterpolationMode.BILINEAR, PILImageResampling.HAMMING: InterpolationMode.HAMMING, diff --git a/src/transformers/models/beit/image_processing_beit_fast.py b/src/transformers/models/beit/image_processing_beit_fast.py index 7a55543dee62..7ff894127ecd 100644 --- a/src/transformers/models/beit/image_processing_beit_fast.py +++ b/src/transformers/models/beit/image_processing_beit_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -38,16 +39,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class BeitFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): r""" do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`): diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py b/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py index 44da5d4486e7..5be6f9f6c54b 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower_fast.py @@ -18,6 +18,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -31,13 +32,7 @@ reorder_images, ) from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling -from ...utils import auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import auto_docstring def make_pixel_mask( diff --git a/src/transformers/models/chameleon/image_processing_chameleon_fast.py b/src/transformers/models/chameleon/image_processing_chameleon_fast.py index 39aa4ec87b00..1d102614f7df 100644 --- a/src/transformers/models/chameleon/image_processing_chameleon_fast.py +++ b/src/transformers/models/chameleon/image_processing_chameleon_fast.py @@ -19,17 +19,13 @@ import numpy as np import PIL import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast from ...image_utils import ImageInput, PILImageResampling, SizeDict -from ...utils import auto_docstring, is_torchvision_v2_available, logging +from ...utils import auto_docstring, logging -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py b/src/transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py index afe76134bc8d..322e98dbd0f5 100644 --- a/src/transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +++ b/src/transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py @@ -24,6 +24,7 @@ import numpy as np import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -34,13 +35,7 @@ ) from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, ImageInput, PILImageResampling, SizeDict from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import TensorType, auto_docstring class Cohere2VisionFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py index 5b9fe6325517..351d4fa1470f 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr_fast.py @@ -10,6 +10,7 @@ import torch from torch import nn from torchvision.io import read_image +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -33,7 +34,7 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging +from ...utils import TensorType, auto_docstring, logging from ...utils.import_utils import requires from .image_processing_conditional_detr import ( compute_segments, @@ -43,12 +44,6 @@ ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) @@ -433,13 +428,7 @@ def resize_annotation( resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`): The resampling filter to use when resizing the masks. """ - interpolation = ( - interpolation - if interpolation is not None - else F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST - ) + interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)] new_annotation = {} diff --git a/src/transformers/models/convnext/image_processing_convnext_fast.py b/src/transformers/models/convnext/image_processing_convnext_fast.py index a1002d950399..3ab00c0fd091 100644 --- a/src/transformers/models/convnext/image_processing_convnext_fast.py +++ b/src/transformers/models/convnext/image_processing_convnext_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -37,16 +38,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class ConvNextFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ crop_pct (`float`, *optional*): diff --git a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py index b8c629b74576..45f4fd2bdb93 100644 --- a/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/image_processing_deepseek_vl.py @@ -38,12 +38,7 @@ valid_images, validate_preprocess_arguments, ) -from ...utils import ( - TensorType, - filter_out_non_signature_kwargs, - is_vision_available, - logging, -) +from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging if is_vision_available(): diff --git a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py index 22d8e0928a6e..ce884da8d08b 100644 --- a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py @@ -29,11 +29,7 @@ from ...modeling_outputs import ModelOutput from ...modeling_utils import PreTrainedModel from ...processing_utils import Unpack -from ...utils import ( - TransformersKwargs, - auto_docstring, - can_return_tuple, -) +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple from ..auto import AutoModel from .configuration_deepseek_vl import DeepseekVLConfig diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py index cd07f8db350b..8458d02d58a5 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py @@ -9,6 +9,7 @@ import torch from torchvision.io import read_image +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -32,17 +33,11 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging +from ...utils import TensorType, auto_docstring, logging from ...utils.import_utils import requires from .image_processing_deformable_detr import get_size_with_aspect_ratio -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) @@ -427,13 +422,7 @@ def resize_annotation( resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`): The resampling filter to use when resizing the masks. """ - interpolation = ( - interpolation - if interpolation is not None - else F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST - ) + interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)] new_annotation = {} diff --git a/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py b/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py index 76c1a53e0073..bc621e0ffc26 100644 --- a/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py +++ b/src/transformers/models/depth_pro/image_processing_depth_pro_fast.py @@ -30,7 +30,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, requires_backends, ) @@ -41,10 +40,7 @@ from .modeling_depth_pro import DepthProDepthEstimatorOutput -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from torchvision.transforms.v2 import functional as F logger = logging.get_logger(__name__) diff --git a/src/transformers/models/detr/image_processing_detr_fast.py b/src/transformers/models/detr/image_processing_detr_fast.py index f30ebfa41859..a2ac8d03eed3 100644 --- a/src/transformers/models/detr/image_processing_detr_fast.py +++ b/src/transformers/models/detr/image_processing_detr_fast.py @@ -23,6 +23,7 @@ import torch from torch import nn from torchvision.io import read_image +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -49,7 +50,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from ...utils.import_utils import requires @@ -61,12 +61,6 @@ ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION, AnnotationFormat.COCO_PANOPTIC) @@ -450,13 +444,7 @@ def resize_annotation( resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`): The resampling filter to use when resizing the masks. """ - interpolation = ( - interpolation - if interpolation is not None - else F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST - ) + interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)] new_annotation = {} diff --git a/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py b/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py index cdb68044bfc4..7c080485ed00 100644 --- a/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +++ b/src/transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from transformers.image_processing_base import BatchFeature from transformers.image_processing_utils_fast import BaseImageProcessorFast, group_images_by_shape, reorder_images @@ -24,17 +25,11 @@ from transformers.utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from transformers.utils.import_utils import requires -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/donut/image_processing_donut_fast.py b/src/transformers/models/donut/image_processing_donut_fast.py index 7c808ab60cd4..29e06831b1b4 100644 --- a/src/transformers/models/donut/image_processing_donut_fast.py +++ b/src/transformers/models/donut/image_processing_donut_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import group_images_by_shape, reorder_images @@ -25,16 +26,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py b/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py index 5f7437c45b2e..1463ef405f37 100644 --- a/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +++ b/src/transformers/models/efficientloftr/image_processing_efficientloftr_fast.py @@ -39,17 +39,13 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) if TYPE_CHECKING: from .modeling_efficientloftr import KeypointMatchingOutput -if is_torchvision_v2_available(): - import torchvision.transforms.v2.functional as F -else: - import torchvision.transforms.functional as F +import torchvision.transforms.v2.functional as F def _is_valid_image(image): diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py b/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py index 3544d927c146..77e787614a10 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet_fast.py @@ -18,6 +18,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import group_images_by_shape, reorder_images @@ -26,16 +27,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class EfficientNetFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ Args: diff --git a/src/transformers/models/eomt/image_processing_eomt_fast.py b/src/transformers/models/eomt/image_processing_eomt_fast.py index 97a13a0745eb..ca80231d3a76 100644 --- a/src/transformers/models/eomt/image_processing_eomt_fast.py +++ b/src/transformers/models/eomt/image_processing_eomt_fast.py @@ -19,6 +19,7 @@ import numpy as np import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -40,7 +41,6 @@ TensorType, auto_docstring, filter_out_non_signature_kwargs, - is_torchvision_v2_available, ) from .image_processing_eomt import ( compute_segments, @@ -50,12 +50,6 @@ ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class EomtImageProcessorFastKwargs(DefaultFastImageProcessorKwargs): """ do_split_image (`bool`, *optional*, defaults to `False`): @@ -204,9 +198,7 @@ def _preprocess_image_like_inputs( "do_normalize": False, "do_rescale": False, # Nearest interpolation is used for segmentation maps instead of BILINEAR. - "interpolation": F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + "interpolation": F.InterpolationMode.NEAREST_EXACT, } ) diff --git a/src/transformers/models/flava/image_processing_flava_fast.py b/src/transformers/models/flava/image_processing_flava_fast.py index 97409ddd57ed..732d25e71f69 100644 --- a/src/transformers/models/flava/image_processing_flava_fast.py +++ b/src/transformers/models/flava/image_processing_flava_fast.py @@ -21,6 +21,7 @@ from typing import Any, Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -34,7 +35,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) from .image_processing_flava import ( FLAVA_CODEBOOK_MEAN, @@ -45,12 +45,6 @@ ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class FlavaMaskingGenerator: def __init__( self, diff --git a/src/transformers/models/gemma3/image_processing_gemma3_fast.py b/src/transformers/models/gemma3/image_processing_gemma3_fast.py index eb828a89643d..c61152bc6b22 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3_fast.py +++ b/src/transformers/models/gemma3/image_processing_gemma3_fast.py @@ -19,6 +19,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -32,16 +33,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/glm4v/image_processing_glm4v_fast.py b/src/transformers/models/glm4v/image_processing_glm4v_fast.py index fbf4aebaac6a..8cdf31a437ae 100644 --- a/src/transformers/models/glm4v/image_processing_glm4v_fast.py +++ b/src/transformers/models/glm4v/image_processing_glm4v_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import ( BatchFeature, @@ -38,17 +39,11 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from .image_processing_glm4v import smart_resize -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py index 5277f1c4e13b..a47a1422a5dc 100644 --- a/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +++ b/src/transformers/models/got_ocr2/image_processing_got_ocr2_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -30,17 +31,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) from .image_processing_got_ocr2 import get_optimal_tiled_canvas -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class GotOcr2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ crop_to_patches (`bool`, *optional*, defaults to `False`): diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py index 66528519eef8..744cb5f92923 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino_fast.py @@ -9,6 +9,7 @@ import torch from torchvision.io import read_image +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -32,7 +33,7 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging +from ...utils import TensorType, auto_docstring, logging from ...utils.import_utils import requires from .image_processing_grounding_dino import get_size_with_aspect_ratio @@ -41,12 +42,6 @@ from .modeling_grounding_dino import GroundingDinoObjectDetectionOutput -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) @@ -459,13 +454,7 @@ def resize_annotation( resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`): The resampling filter to use when resizing the masks. """ - interpolation = ( - interpolation - if interpolation is not None - else F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST - ) + interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)] new_annotation = {} diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py b/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py index ddfee7c757fe..7a6bcc53ae1a 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt_fast.py @@ -18,6 +18,7 @@ import numpy as np import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -30,16 +31,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - def squared_euclidean_distance_torch(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: """ Compute squared Euclidean distances between all pixels and clusters. diff --git a/src/transformers/models/instructblipvideo/video_processing_instructblipvideo.py b/src/transformers/models/instructblipvideo/video_processing_instructblipvideo.py index a2cd3cf351d2..d2fe3cc7f343 100644 --- a/src/transformers/models/instructblipvideo/video_processing_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/video_processing_instructblipvideo.py @@ -20,21 +20,16 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling, SizeDict from ...processing_utils import Unpack, VideosKwargs -from ...utils import TensorType, is_torchvision_v2_available +from ...utils import TensorType from ...video_processing_utils import BaseVideoProcessor from ...video_utils import group_videos_by_shape, reorder_videos -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class InstructBlipVideoVideoProcessorInitKwargs(VideosKwargs): ... diff --git a/src/transformers/models/internvl/video_processing_internvl.py b/src/transformers/models/internvl/video_processing_internvl.py index a2e06d3b7ec4..96d7d3067f73 100644 --- a/src/transformers/models/internvl/video_processing_internvl.py +++ b/src/transformers/models/internvl/video_processing_internvl.py @@ -17,21 +17,16 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling, SizeDict from ...processing_utils import Unpack, VideosKwargs -from ...utils import TensorType, is_torchvision_v2_available +from ...utils import TensorType from ...video_processing_utils import BaseVideoProcessor from ...video_utils import VideoMetadata, group_videos_by_shape, reorder_videos -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class InternVLVideoProcessorInitKwargs(VideosKwargs): initial_shift: Union[bool, float, int] diff --git a/src/transformers/models/janus/image_processing_janus_fast.py b/src/transformers/models/janus/image_processing_janus_fast.py index 9ed2732fb3d0..6cbca591626e 100644 --- a/src/transformers/models/janus/image_processing_janus_fast.py +++ b/src/transformers/models/janus/image_processing_janus_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -36,16 +37,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class JanusFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): r""" min_size (`int`, *optional*, defaults to 14): diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py index 723687d58219..354bbe21c4db 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import ChannelDimension, group_images_by_shape, reorder_images @@ -25,18 +26,12 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, requires_backends, ) from .image_processing_layoutlmv2 import apply_tesseract -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py index 2ab8f8dd48cc..caefa9b89660 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import ChannelDimension, group_images_by_shape, reorder_images @@ -25,18 +26,12 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, requires_backends, ) from .image_processing_layoutlmv3 import apply_tesseract -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/levit/image_processing_levit_fast.py b/src/transformers/models/levit/image_processing_levit_fast.py index e452894d6e2e..ae30194288fa 100644 --- a/src/transformers/models/levit/image_processing_levit_fast.py +++ b/src/transformers/models/levit/image_processing_levit_fast.py @@ -17,6 +17,7 @@ from typing import Optional import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, SizeDict from ...image_transforms import ( @@ -24,13 +25,7 @@ get_resize_output_image_size, ) from ...image_utils import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, PILImageResampling -from ...utils import auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import auto_docstring @auto_docstring diff --git a/src/transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py b/src/transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py index c709a01dca41..4081c86e108a 100755 --- a/src/transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py +++ b/src/transformers/models/lfm2_vl/image_processing_lfm2_vl_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -38,16 +39,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/llama4/image_processing_llama4_fast.py b/src/transformers/models/llama4/image_processing_llama4_fast.py index 946fdde0a643..6506d5749d94 100644 --- a/src/transformers/models/llama4/image_processing_llama4_fast.py +++ b/src/transformers/models/llama4/image_processing_llama4_fast.py @@ -20,6 +20,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -33,16 +34,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - def get_factors(dividend: int) -> set[int]: """ Calculate all factors of a given number, i.e. a divisor that leaves diff --git a/src/transformers/models/llava/image_processing_llava_fast.py b/src/transformers/models/llava/image_processing_llava_fast.py index 41bb94f5b7e0..596070040549 100644 --- a/src/transformers/models/llava/image_processing_llava_fast.py +++ b/src/transformers/models/llava/image_processing_llava_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -38,16 +39,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class LlavaFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): ... diff --git a/src/transformers/models/llava_next/image_processing_llava_next_fast.py b/src/transformers/models/llava_next/image_processing_llava_next_fast.py index b502d98d6ac3..df20e2b90e83 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next_fast.py +++ b/src/transformers/models/llava_next/image_processing_llava_next_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_patch_output_size, select_best_resolution from ...image_processing_utils_fast import ( @@ -39,16 +40,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class LlavaNextFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ image_grid_pinpoints (`list[list[int]]`, *optional*): diff --git a/src/transformers/models/mask2former/image_processing_mask2former_fast.py b/src/transformers/models/mask2former/image_processing_mask2former_fast.py index a5d662288119..58dbb09d6319 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former_fast.py +++ b/src/transformers/models/mask2former/image_processing_mask2former_fast.py @@ -23,6 +23,7 @@ import torch from torch import nn +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -42,7 +43,7 @@ PILImageResampling, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging +from ...utils import TensorType, auto_docstring, logging from .image_processing_mask2former import ( compute_segments, convert_segmentation_to_rle, @@ -51,11 +52,6 @@ ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) @@ -348,9 +344,7 @@ def _preprocess( image=grouped_segmentation_maps[shape], size=size, size_divisor=size_divisor, - interpolation=F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + interpolation=F.InterpolationMode.NEAREST_EXACT, ) resized_images_grouped[shape] = stacked_images if segmentation_maps is not None: diff --git a/src/transformers/models/maskformer/image_processing_maskformer_fast.py b/src/transformers/models/maskformer/image_processing_maskformer_fast.py index ab6411f1bb3f..9e15486cfa35 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer_fast.py +++ b/src/transformers/models/maskformer/image_processing_maskformer_fast.py @@ -20,6 +20,7 @@ import torch from torch import nn +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -42,7 +43,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from .image_processing_maskformer import ( @@ -53,11 +53,6 @@ ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) @@ -354,9 +349,7 @@ def _preprocess( image=grouped_segmentation_maps[shape], size=size, size_divisor=size_divisor, - interpolation=F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + interpolation=F.InterpolationMode.NEAREST_EXACT, ) resized_images_grouped[shape] = stacked_images if segmentation_maps is not None: diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py index 948f9e96d7d9..6c40fbf3f9b8 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -38,16 +39,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class MobileNetV2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`): @@ -134,9 +128,7 @@ def _preprocess_image_like_inputs( "do_normalize": False, "do_rescale": False, # Nearest interpolation is used for segmentation maps instead of BILINEAR. - "interpolation": F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + "interpolation": F.InterpolationMode.NEAREST_EXACT, } ) diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py b/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py index 71c8ababba36..fab16ecfdc87 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -36,16 +37,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class MobileVitFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ do_flip_channel_order (`bool`, *optional*, defaults to `self.do_flip_channel_order`): @@ -135,9 +129,7 @@ def _preprocess_image_like_inputs( "do_rescale": False, "do_flip_channel_order": False, # Nearest interpolation is used for segmentation maps instead of BILINEAR. - "interpolation": F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + "interpolation": F.InterpolationMode.NEAREST_EXACT, } ) diff --git a/src/transformers/models/nougat/image_processing_nougat_fast.py b/src/transformers/models/nougat/image_processing_nougat_fast.py index d6579029e4f5..15cee9051082 100644 --- a/src/transformers/models/nougat/image_processing_nougat_fast.py +++ b/src/transformers/models/nougat/image_processing_nougat_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -40,16 +41,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class NougatFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ Args: diff --git a/src/transformers/models/oneformer/image_processing_oneformer_fast.py b/src/transformers/models/oneformer/image_processing_oneformer_fast.py index 20b34bb7fd39..4a20a04e70f2 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer_fast.py +++ b/src/transformers/models/oneformer/image_processing_oneformer_fast.py @@ -18,6 +18,7 @@ import torch from torch import nn +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -39,17 +40,11 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from .image_processing_oneformer import load_metadata, prepare_metadata -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) @@ -453,11 +448,7 @@ def _preprocess( for shape, stacked_segmentation_maps in grouped_segmentation_maps.items(): if do_resize: stacked_segmentation_maps = self.resize( - stacked_segmentation_maps, - size=size, - interpolation=F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + stacked_segmentation_maps, size=size, interpolation=F.InterpolationMode.NEAREST_EXACT ) processed_segmentation_maps_grouped[shape] = stacked_segmentation_maps processed_segmentation_maps = reorder_images( diff --git a/src/transformers/models/ovis2/image_processing_ovis2_fast.py b/src/transformers/models/ovis2/image_processing_ovis2_fast.py index 07fbf82f9fbe..04b79299e9e1 100644 --- a/src/transformers/models/ovis2/image_processing_ovis2_fast.py +++ b/src/transformers/models/ovis2/image_processing_ovis2_fast.py @@ -16,6 +16,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -35,17 +36,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) from .image_processing_ovis2 import get_min_tile_covering_grid, get_optimal_tiled_canvas -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class Ovis2ImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ Args: diff --git a/src/transformers/models/perceiver/image_processing_perceiver_fast.py b/src/transformers/models/perceiver/image_processing_perceiver_fast.py index 82c1bcd9d319..72cb17cd40cd 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver_fast.py +++ b/src/transformers/models/perceiver/image_processing_perceiver_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature from ...image_transforms import group_images_by_shape, reorder_images @@ -24,16 +25,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - @auto_docstring class PerceiverImageProcessorFast(BaseImageProcessorFast): resample = PILImageResampling.BICUBIC diff --git a/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py b/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py index 532136f8108e..4bd9928daa94 100644 --- a/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py +++ b/src/transformers/models/phi4_multimodal/image_processing_phi4_multimodal_fast.py @@ -16,6 +16,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -27,16 +28,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/pixtral/image_processing_pixtral_fast.py b/src/transformers/models/pixtral/image_processing_pixtral_fast.py index db3e75760318..0893af3830f9 100644 --- a/src/transformers/models/pixtral/image_processing_pixtral_fast.py +++ b/src/transformers/models/pixtral/image_processing_pixtral_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -30,17 +31,11 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from .image_processing_pixtral import get_resize_output_image_size -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/poolformer/image_processing_poolformer_fast.py b/src/transformers/models/poolformer/image_processing_poolformer_fast.py index 70c6ed55bc8a..62d5f276859f 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer_fast.py +++ b/src/transformers/models/poolformer/image_processing_poolformer_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import ( @@ -38,16 +39,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class PoolFormerFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ Args: diff --git a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py index 763fd613c218..06d6ed156443 100644 --- a/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py +++ b/src/transformers/models/prompt_depth_anything/image_processing_prompt_depth_anything_fast.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from ...modeling_outputs import DepthEstimatorOutput import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -42,17 +43,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, requires_backends, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - def _constrain_to_multiple_of(val, multiple, min_val=0, max_val=None): """Constrain a value to be a multiple of another value.""" x = round(val / multiple) * multiple diff --git a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py index 80242a331ace..ec9878da3222 100644 --- a/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py +++ b/src/transformers/models/qwen2_vl/image_processing_qwen2_vl_fast.py @@ -22,6 +22,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -42,18 +43,12 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from ...video_utils import VideoInput, make_batched_videos from .image_processing_qwen2_vl import smart_resize -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/qwen2_vl/video_processing_qwen2_vl.py b/src/transformers/models/qwen2_vl/video_processing_qwen2_vl.py index 3fb020443f35..84bcd827f02e 100644 --- a/src/transformers/models/qwen2_vl/video_processing_qwen2_vl.py +++ b/src/transformers/models/qwen2_vl/video_processing_qwen2_vl.py @@ -23,6 +23,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_utils import ( @@ -34,18 +35,12 @@ get_image_size, ) from ...processing_utils import Unpack, VideosKwargs -from ...utils import TensorType, add_start_docstrings, is_torchvision_v2_available +from ...utils import TensorType, add_start_docstrings from ...video_processing_utils import BASE_VIDEO_PROCESSOR_DOCSTRING, BaseVideoProcessor from ...video_utils import VideoMetadata, group_videos_by_shape, reorder_videos from .image_processing_qwen2_vl import smart_resize -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class Qwen2VLVideoProcessorInitKwargs(VideosKwargs): min_pixels: Optional[int] max_pixels: Optional[int] diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py index 1dfffd31082c..9aae271deacc 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py @@ -32,7 +32,7 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, requires_backends +from ...utils import TensorType, auto_docstring, requires_backends from ...utils.import_utils import requires from .image_processing_rt_detr import get_size_with_aspect_ratio @@ -242,13 +242,7 @@ def resize_annotation( resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`): The resampling filter to use when resizing the masks. """ - interpolation = ( - interpolation - if interpolation is not None - else F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST - ) + interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)] new_annotation = {} diff --git a/src/transformers/models/sam/image_processing_sam_fast.py b/src/transformers/models/sam/image_processing_sam_fast.py index 65ee02e97dac..7cb5f7b2bfc2 100644 --- a/src/transformers/models/sam/image_processing_sam_fast.py +++ b/src/transformers/models/sam/image_processing_sam_fast.py @@ -23,6 +23,7 @@ import torch from torch.nn import functional as F from torchvision.ops.boxes import batched_nms +from torchvision.transforms.v2 import functional as F_t from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -39,13 +40,7 @@ pil_torch_interpolation_mapping, ) from ...processing_utils import Unpack -from ...utils import auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F_t -else: - from torchvision.transforms import functional as F_t +from ...utils import auto_docstring class SamFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): @@ -223,9 +218,7 @@ def _preprocess_image_like_inputs( { "do_normalize": False, "do_rescale": False, - "interpolation": F_t.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F_t.InterpolationMode.NEAREST, + "interpolation": F_t.InterpolationMode.NEAREST_EXACT, "size": segmentation_maps_kwargs.pop("mask_size"), "pad_size": segmentation_maps_kwargs.pop("mask_pad_size"), } diff --git a/src/transformers/models/sam2/image_processing_sam2_fast.py b/src/transformers/models/sam2/image_processing_sam2_fast.py index f78e8b65bea1..30e99980f4d1 100644 --- a/src/transformers/models/sam2/image_processing_sam2_fast.py +++ b/src/transformers/models/sam2/image_processing_sam2_fast.py @@ -40,10 +40,7 @@ pil_torch_interpolation_mapping, ) from ...processing_utils import Unpack -from ...utils import ( - TensorType, - auto_docstring, -) +from ...utils import TensorType, auto_docstring class Sam2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/sam2/modeling_sam2.py b/src/transformers/models/sam2/modeling_sam2.py index ef16466d344c..fe42cc39cacf 100644 --- a/src/transformers/models/sam2/modeling_sam2.py +++ b/src/transformers/models/sam2/modeling_sam2.py @@ -37,10 +37,7 @@ from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack from ...pytorch_utils import compile_compatible_method_lru_cache -from ...utils import ( - ModelOutput, - auto_docstring, -) +from ...utils import ModelOutput, auto_docstring from ...utils.generic import TransformersKwargs, check_model_inputs from ..auto import AutoModel from .configuration_sam2 import ( diff --git a/src/transformers/models/siglip2/image_processing_siglip2_fast.py b/src/transformers/models/siglip2/image_processing_siglip2_fast.py index 64dcfa1ad566..45261fab2cd0 100644 --- a/src/transformers/models/siglip2/image_processing_siglip2_fast.py +++ b/src/transformers/models/siglip2/image_processing_siglip2_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -32,17 +33,11 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from .image_processing_siglip2 import get_image_size_for_max_num_patches -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/smolvlm/video_processing_smolvlm.py b/src/transformers/models/smolvlm/video_processing_smolvlm.py index 7e8e544b8fc7..522a344b09b5 100644 --- a/src/transformers/models/smolvlm/video_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/video_processing_smolvlm.py @@ -17,21 +17,16 @@ import numpy as np import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_utils import IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD, PILImageResampling, SizeDict from ...processing_utils import Unpack, VideosKwargs -from ...utils import TensorType, is_torchvision_v2_available, logging +from ...utils import TensorType, logging from ...video_processing_utils import BaseVideoProcessor from ...video_utils import VideoMetadata, group_videos_by_shape, reorder_videos -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) DEFAULT_SYSTEM_MESSAGE = "You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language." diff --git a/src/transformers/models/superpoint/image_processing_superpoint_fast.py b/src/transformers/models/superpoint/image_processing_superpoint_fast.py index a752e08ac5f0..54f95fa75af6 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint_fast.py +++ b/src/transformers/models/superpoint/image_processing_superpoint_fast.py @@ -33,17 +33,13 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) if TYPE_CHECKING: from .modeling_superpoint import SuperPointKeypointDescriptionOutput -if is_torchvision_v2_available(): - import torchvision.transforms.v2.functional as F -else: - import torchvision.transforms.functional as F +import torchvision.transforms.v2.functional as F def is_grayscale( diff --git a/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py b/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py index c10bd5081754..82c9d733d367 100644 --- a/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py +++ b/src/transformers/models/swin2sr/image_processing_swin2sr_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, ChannelDimension, get_image_size from ...image_processing_utils_fast import ( @@ -30,17 +31,11 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, ) from ...utils.deprecation import deprecate_kwarg -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/textnet/image_processing_textnet_fast.py b/src/transformers/models/textnet/image_processing_textnet_fast.py index 2f5ef22ef5e3..baa6276736f7 100644 --- a/src/transformers/models/textnet/image_processing_textnet_fast.py +++ b/src/transformers/models/textnet/image_processing_textnet_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import BaseImageProcessorFast, DefaultFastImageProcessorKwargs @@ -37,16 +38,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class TextNetFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ size_divisor (`int`, *optional*, defaults to 32): diff --git a/src/transformers/models/tvp/image_processing_tvp_fast.py b/src/transformers/models/tvp/image_processing_tvp_fast.py index e7fe7e621d8c..5d74e6efb71f 100644 --- a/src/transformers/models/tvp/image_processing_tvp_fast.py +++ b/src/transformers/models/tvp/image_processing_tvp_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -34,13 +35,7 @@ make_nested_list_of_images, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import TensorType, auto_docstring class TvpFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/vilt/image_processing_vilt_fast.py b/src/transformers/models/vilt/image_processing_vilt_fast.py index 79e601648c55..6926b655ce45 100644 --- a/src/transformers/models/vilt/image_processing_vilt_fast.py +++ b/src/transformers/models/vilt/image_processing_vilt_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -30,15 +31,9 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - # Set maximum size based on the typical aspect ratio of the COCO dataset MAX_LONGER_EDGE = 1333 MAX_SHORTER_EDGE = 800 diff --git a/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py b/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py index 1974d53119b1..c5a7256a612b 100644 --- a/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py +++ b/src/transformers/models/vitmatte/image_processing_vitmatte_fast.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -37,16 +38,10 @@ TensorType, auto_docstring, filter_out_non_signature_kwargs, - is_torchvision_v2_available, logging, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/yolos/image_processing_yolos_fast.py b/src/transformers/models/yolos/image_processing_yolos_fast.py index fda06dfc522a..59bb3868e75e 100644 --- a/src/transformers/models/yolos/image_processing_yolos_fast.py +++ b/src/transformers/models/yolos/image_processing_yolos_fast.py @@ -9,6 +9,7 @@ import torch from torchvision.io import read_image +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_size_dict from ...image_processing_utils_fast import ( @@ -32,16 +33,10 @@ validate_annotations, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, logging +from ...utils import TensorType, auto_docstring, logging from ...utils.import_utils import requires -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) @@ -475,13 +470,7 @@ def resize_annotation( resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`): The resampling filter to use when resizing the masks. """ - interpolation = ( - interpolation - if interpolation is not None - else F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST - ) + interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)] new_annotation = {} diff --git a/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py b/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py index 7967932729e5..045dbfdacd4d 100644 --- a/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py +++ b/src/transformers/models/zoedepth/image_processing_zoedepth_fast.py @@ -21,6 +21,7 @@ import numpy as np import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import ( BatchFeature, @@ -44,7 +45,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, logging, requires_backends, ) @@ -52,12 +52,6 @@ from .modeling_zoedepth import ZoeDepthDepthEstimatorOutput -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) diff --git a/src/transformers/video_processing_utils.py b/src/transformers/video_processing_utils.py index 4d0e9c58f314..0bc81bf8eb28 100644 --- a/src/transformers/video_processing_utils.py +++ b/src/transformers/video_processing_utils.py @@ -46,7 +46,6 @@ is_remote_url, is_torch_available, is_torchcodec_available, - is_torchvision_available, is_torchvision_v2_available, logging, ) @@ -70,8 +69,6 @@ if is_torchvision_v2_available(): from torchvision.transforms.v2 import functional as F -elif is_torchvision_available(): - from torchvision.transforms import functional as F logger = logging.get_logger(__name__)