diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py index 93dd31e3aafc..241c12923bdb 100644 --- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid.py @@ -39,12 +39,7 @@ valid_images, validate_preprocess_arguments, ) -from ...utils import ( - TensorType, - filter_out_non_signature_kwargs, - is_vision_available, - logging, -) +from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging if is_vision_available(): diff --git a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py index db9c9ad987c1..c04e006e358d 100644 --- a/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +++ b/src/transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py @@ -21,6 +21,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -39,13 +40,7 @@ pil_torch_interpolation_mapping, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import TensorType, auto_docstring class DeepseekVLHybridFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py index cae509e14d64..d9a85654e901 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py @@ -29,11 +29,7 @@ from ...modeling_outputs import ModelOutput from ...modeling_utils import PreTrainedModel from ...processing_utils import Unpack -from ...utils import ( - TransformersKwargs, - auto_docstring, - can_return_tuple, -) +from ...utils import TransformersKwargs, auto_docstring, can_return_tuple from ..auto import AutoModel from .configuration_deepseek_vl_hybrid import DeepseekVLHybridConfig diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py index e9808b02ce34..18b416a57df2 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py @@ -16,6 +16,7 @@ import torch import torch.nn as nn +from torchvision.transforms.v2 import functional as F from ...cache_utils import Cache from ...image_processing_utils_fast import ( @@ -53,7 +54,6 @@ auto_docstring, can_return_tuple, filter_out_non_signature_kwargs, - is_torchvision_v2_available, logging, ) from ..auto import CONFIG_MAPPING, AutoConfig, AutoModel @@ -70,12 +70,6 @@ from ..sam.modeling_sam import SamLayerNorm, SamVisionNeck -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/dpt/image_processing_dpt_fast.py b/src/transformers/models/dpt/image_processing_dpt_fast.py index 3e80ad7943db..892ddd7c3d6f 100644 --- a/src/transformers/models/dpt/image_processing_dpt_fast.py +++ b/src/transformers/models/dpt/image_processing_dpt_fast.py @@ -25,6 +25,7 @@ from typing import TYPE_CHECKING, Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_base import BatchFeature from ...image_processing_utils_fast import BaseImageProcessorFast, DefaultFastImageProcessorKwargs @@ -39,17 +40,12 @@ is_torch_tensor, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available, requires_backends +from ...utils import TensorType, auto_docstring, requires_backends if TYPE_CHECKING: from ...modeling_outputs import DepthEstimatorOutput -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - class DPTFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): """ diff --git a/src/transformers/models/dpt/modular_dpt.py b/src/transformers/models/dpt/modular_dpt.py index 32ca94a2d43f..34eb08f39b68 100644 --- a/src/transformers/models/dpt/modular_dpt.py +++ b/src/transformers/models/dpt/modular_dpt.py @@ -32,7 +32,6 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, requires_backends, ) from ..beit.image_processing_beit_fast import BeitImageProcessorFast @@ -41,10 +40,7 @@ if TYPE_CHECKING: from ...modeling_outputs import DepthEstimatorOutput -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from torchvision.transforms.v2 import functional as F def get_resize_output_image_size( diff --git a/src/transformers/models/llava_onevision/image_processing_llava_onevision_fast.py b/src/transformers/models/llava_onevision/image_processing_llava_onevision_fast.py index 4392d64e9ebf..11872cb67bf3 100644 --- a/src/transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +++ b/src/transformers/models/llava_onevision/image_processing_llava_onevision_fast.py @@ -22,6 +22,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature, get_patch_output_size, select_best_resolution from ...image_processing_utils_fast import ( @@ -41,13 +42,7 @@ get_image_size, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import TensorType, auto_docstring class LlavaOnevisionFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): diff --git a/src/transformers/models/llava_onevision/modular_llava_onevision.py b/src/transformers/models/llava_onevision/modular_llava_onevision.py index ec2304e09dd1..b4f64dee8e04 100644 --- a/src/transformers/models/llava_onevision/modular_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modular_llava_onevision.py @@ -18,6 +18,7 @@ import torch from torch import nn +from torchvision.transforms.v2 import functional as F from transformers.models.llava_next.image_processing_llava_next_fast import LlavaNextImageProcessorFast from transformers.models.llava_next_video.modeling_llava_next_video import ( @@ -50,16 +51,10 @@ TensorType, auto_docstring, can_return_tuple, - is_torchvision_v2_available, logging, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/owlv2/image_processing_owlv2_fast.py b/src/transformers/models/owlv2/image_processing_owlv2_fast.py index c17a45b6e427..417fc800ea88 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2_fast.py +++ b/src/transformers/models/owlv2/image_processing_owlv2_fast.py @@ -23,6 +23,7 @@ from typing import TYPE_CHECKING, Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import BaseImageProcessorFast, BatchFeature, DefaultFastImageProcessorKwargs from ...image_transforms import center_to_corners_format, group_images_by_shape, reorder_images @@ -35,16 +36,10 @@ SizeDict, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available +from ...utils import TensorType, auto_docstring from .image_processing_owlv2 import _scale_boxes, box_iou -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - if TYPE_CHECKING: from .modeling_owlv2 import Owlv2ObjectDetectionOutput diff --git a/src/transformers/models/owlv2/modular_owlv2.py b/src/transformers/models/owlv2/modular_owlv2.py index 2e6d917a791a..66acd2088399 100644 --- a/src/transformers/models/owlv2/modular_owlv2.py +++ b/src/transformers/models/owlv2/modular_owlv2.py @@ -18,6 +18,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils_fast import ( BaseImageProcessorFast, @@ -37,17 +38,10 @@ from ...utils import ( TensorType, auto_docstring, - is_torchvision_v2_available, ) from ..owlvit.image_processing_owlvit_fast import OwlViTImageProcessorFast -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class Owlv2FastImageProcessorKwargs(DefaultFastImageProcessorKwargs): ... diff --git a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py index 68c5497b0205..1dfffd31082c 100644 --- a/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py +++ b/src/transformers/models/rt_detr/image_processing_rt_detr_fast.py @@ -8,6 +8,7 @@ from typing import Any, Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -36,12 +37,6 @@ from .image_processing_rt_detr import get_size_with_aspect_ratio -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class RTDetrFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): r""" format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`): diff --git a/src/transformers/models/rt_detr/modular_rt_detr.py b/src/transformers/models/rt_detr/modular_rt_detr.py index 760e4a6675cf..61bd055144f0 100644 --- a/src/transformers/models/rt_detr/modular_rt_detr.py +++ b/src/transformers/models/rt_detr/modular_rt_detr.py @@ -2,6 +2,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from transformers.models.detr.image_processing_detr_fast import DetrFastImageProcessorKwargs, DetrImageProcessorFast @@ -22,18 +23,11 @@ from ...processing_utils import Unpack from ...utils import ( TensorType, - is_torchvision_v2_available, logging, requires_backends, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) SUPPORTED_ANNOTATION_FORMATS = (AnnotationFormat.COCO_DETECTION,) diff --git a/src/transformers/models/sam2_video/modeling_sam2_video.py b/src/transformers/models/sam2_video/modeling_sam2_video.py index caa07d1f63b5..79d5b015f889 100644 --- a/src/transformers/models/sam2_video/modeling_sam2_video.py +++ b/src/transformers/models/sam2_video/modeling_sam2_video.py @@ -39,10 +39,7 @@ from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack from ...pytorch_utils import compile_compatible_method_lru_cache -from ...utils import ( - ModelOutput, - auto_docstring, -) +from ...utils import ModelOutput, auto_docstring from ...utils.generic import OutputRecorder, TransformersKwargs from ..auto import AutoModel from .configuration_sam2_video import Sam2VideoConfig, Sam2VideoMaskDecoderConfig, Sam2VideoPromptEncoderConfig diff --git a/src/transformers/models/sam2_video/modular_sam2_video.py b/src/transformers/models/sam2_video/modular_sam2_video.py index fa0d6c21d5e6..b95a9f778251 100644 --- a/src/transformers/models/sam2_video/modular_sam2_video.py +++ b/src/transformers/models/sam2_video/modular_sam2_video.py @@ -36,8 +36,6 @@ from ...utils import ( ModelOutput, auto_docstring, - is_torchvision_available, - is_torchvision_v2_available, logging, ) from ...utils.generic import OutputRecorder, TransformersKwargs @@ -59,12 +57,6 @@ from ..sam2.processing_sam2 import Sam2Processor -if is_torchvision_available() and is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - logger = logging.get_logger(__name__) diff --git a/src/transformers/models/segformer/image_processing_segformer_fast.py b/src/transformers/models/segformer/image_processing_segformer_fast.py index dc18283136e1..11dfa3c42ab1 100644 --- a/src/transformers/models/segformer/image_processing_segformer_fast.py +++ b/src/transformers/models/segformer/image_processing_segformer_fast.py @@ -22,6 +22,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from ...image_processing_utils import BatchFeature from ...image_processing_utils_fast import ( @@ -40,13 +41,7 @@ is_torch_tensor, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_v2_available - - -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F +from ...utils import TensorType, auto_docstring class SegformerFastImageProcessorKwargs(DefaultFastImageProcessorKwargs): @@ -135,9 +130,7 @@ def _preprocess_image_like_inputs( "do_normalize": False, "do_rescale": False, # Nearest interpolation is used for segmentation maps instead of BILINEAR. - "interpolation": F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + "interpolation": F.InterpolationMode.NEAREST_EXACT, } ) processed_segmentation_maps = self._preprocess( diff --git a/src/transformers/models/segformer/modular_segformer.py b/src/transformers/models/segformer/modular_segformer.py index 341e6949d8b7..831d046fd9a7 100644 --- a/src/transformers/models/segformer/modular_segformer.py +++ b/src/transformers/models/segformer/modular_segformer.py @@ -17,6 +17,7 @@ from typing import Optional, Union import torch +from torchvision.transforms.v2 import functional as F from transformers.models.beit.image_processing_beit_fast import BeitFastImageProcessorKwargs, BeitImageProcessorFast @@ -36,16 +37,9 @@ from ...processing_utils import Unpack from ...utils import ( TensorType, - is_torchvision_v2_available, ) -if is_torchvision_v2_available(): - from torchvision.transforms.v2 import functional as F -else: - from torchvision.transforms import functional as F - - class SegformerFastImageProcessorKwargs(BeitFastImageProcessorKwargs): pass @@ -96,9 +90,7 @@ def _preprocess_image_like_inputs( "do_normalize": False, "do_rescale": False, # Nearest interpolation is used for segmentation maps instead of BILINEAR. - "interpolation": F.InterpolationMode.NEAREST_EXACT - if is_torchvision_v2_available() - else F.InterpolationMode.NEAREST, + "interpolation": F.InterpolationMode.NEAREST_EXACT, } ) processed_segmentation_maps = self._preprocess( diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 430153690c79..07519ee865ac 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -400,11 +400,7 @@ def is_torchvision_available() -> bool: def is_torchvision_v2_available() -> bool: - if not is_torchvision_available(): - return False - - # NOTE: We require torchvision>=0.15 as v2 transforms are available from this version: https://pytorch.org/vision/stable/transforms.html#v1-or-v2-which-one-should-i-use - return version.parse(_torchvision_version) >= version.parse("0.15") + return is_torchvision_available() def is_galore_torch_available() -> Union[tuple[bool, str], bool]: