Skip to content

Commit c0bfac7

Browse files
committed
just squash commits into one
1 parent f22cb1e commit c0bfac7

File tree

208 files changed

+1577
-1981
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

208 files changed

+1577
-1981
lines changed

docs/source/en/auto_docstring.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ The `@auto_docstring` decorator automatically generates docstrings by:
292292

293293
8. Unrolling kwargs typed with the unpack operator. For specific methods (defined in `UNROLL_KWARGS_METHODS`) or classes (defined in `UNROLL_KWARGS_CLASSES`), the decorator processes `**kwargs` parameters that are typed with `Unpack[KwargsTypedDict]`. It extracts the documentations from the `TypedDict` and adds each parameter to the function's docstring.
294294

295-
Currently only supported for [`FastImageProcessorKwargs`].
295+
Currently only supported for [`ImagesKwargs`].
296296

297297
## Best practices
298298

src/transformers/image_processing_utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
from .image_processing_base import BatchFeature, ImageProcessingMixin
2222
from .image_transforms import center_crop, normalize, rescale
23-
from .image_utils import ChannelDimension, get_image_size
23+
from .image_utils import ChannelDimension, ImageInput, get_image_size
24+
from .processing_utils import ImagesKwargs, Unpack
2425
from .utils import logging
2526
from .utils.import_utils import requires
2627

@@ -36,6 +37,8 @@
3637

3738
@requires(backends=("vision",))
3839
class BaseImageProcessor(ImageProcessingMixin):
40+
valid_kwargs = ImagesKwargs
41+
3942
def __init__(self, **kwargs):
4043
super().__init__(**kwargs)
4144

@@ -46,9 +49,9 @@ def is_fast(self) -> bool:
4649
"""
4750
return False
4851

49-
def __call__(self, images, **kwargs) -> BatchFeature:
52+
def __call__(self, images: ImageInput, *args, **kwargs: Unpack[ImagesKwargs]) -> BatchFeature:
5053
"""Preprocess an image or a batch of images."""
51-
return self.preprocess(images, **kwargs)
54+
return self.preprocess(images, *args, **kwargs)
5255

5356
def preprocess(self, images, **kwargs) -> BatchFeature:
5457
raise NotImplementedError("Each image processor must implement its own preprocess method")

src/transformers/image_processing_utils_fast.py

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from collections.abc import Iterable
1616
from copy import deepcopy
1717
from functools import lru_cache, partial
18-
from typing import Any, Optional, TypedDict, Union
18+
from typing import Any, Optional, Union
1919

2020
import numpy as np
2121

@@ -40,7 +40,7 @@
4040
validate_kwargs,
4141
validate_preprocess_arguments,
4242
)
43-
from .processing_utils import Unpack
43+
from .processing_utils import ImagesKwargs, Unpack
4444
from .utils import (
4545
TensorType,
4646
auto_docstring,
@@ -165,28 +165,6 @@ def divide_to_patches(
165165
return patches
166166

167167

168-
class DefaultFastImageProcessorKwargs(TypedDict, total=False):
169-
do_resize: Optional[bool]
170-
size: Optional[dict[str, int]]
171-
default_to_square: Optional[bool]
172-
resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]]
173-
do_center_crop: Optional[bool]
174-
crop_size: Optional[dict[str, int]]
175-
do_rescale: Optional[bool]
176-
rescale_factor: Optional[Union[int, float]]
177-
do_normalize: Optional[bool]
178-
image_mean: Optional[Union[float, list[float]]]
179-
image_std: Optional[Union[float, list[float]]]
180-
do_pad: Optional[bool]
181-
pad_size: Optional[dict[str, int]]
182-
do_convert_rgb: Optional[bool]
183-
return_tensors: Optional[Union[str, TensorType]]
184-
data_format: Optional[ChannelDimension]
185-
input_data_format: Optional[Union[str, ChannelDimension]]
186-
device: Optional["torch.device"]
187-
disable_grouping: Optional[bool]
188-
189-
190168
@auto_docstring
191169
class BaseImageProcessorFast(BaseImageProcessor):
192170
resample = None
@@ -208,10 +186,10 @@ class BaseImageProcessorFast(BaseImageProcessor):
208186
input_data_format = None
209187
device = None
210188
model_input_names = ["pixel_values"]
211-
valid_kwargs = DefaultFastImageProcessorKwargs
189+
valid_kwargs = ImagesKwargs
212190
unused_kwargs = None
213191

214-
def __init__(self, **kwargs: Unpack[DefaultFastImageProcessorKwargs]):
192+
def __init__(self, **kwargs: Unpack[ImagesKwargs]):
215193
super().__init__(**kwargs)
216194
kwargs = self.filter_out_unused_kwargs(kwargs)
217195
size = kwargs.pop("size", self.size)
@@ -730,11 +708,8 @@ def _validate_preprocess_kwargs(
730708
data_format=data_format,
731709
)
732710

733-
def __call__(self, images: ImageInput, *args, **kwargs: Unpack[DefaultFastImageProcessorKwargs]) -> BatchFeature:
734-
return self.preprocess(images, *args, **kwargs)
735-
736711
@auto_docstring
737-
def preprocess(self, images: ImageInput, *args, **kwargs: Unpack[DefaultFastImageProcessorKwargs]) -> BatchFeature:
712+
def preprocess(self, images: ImageInput, *args, **kwargs: Unpack[ImagesKwargs]) -> BatchFeature:
738713
# args are not validated, but their order in the `preprocess` and `_preprocess` signatures must be the same
739714
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_kwargs_names)
740715
# Set default kwargs from self. This ensures that if a kwarg is not provided
@@ -767,7 +742,7 @@ def _preprocess_image_like_inputs(
767742
do_convert_rgb: bool,
768743
input_data_format: ChannelDimension,
769744
device: Optional[Union[str, "torch.device"]] = None,
770-
**kwargs: Unpack[DefaultFastImageProcessorKwargs],
745+
**kwargs: Unpack[ImagesKwargs],
771746
) -> BatchFeature:
772747
"""
773748
Preprocess image-like inputs.

src/transformers/models/aria/modular_aria.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -959,8 +959,6 @@ def __call__(
959959
self,
960960
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]],
961961
images: Optional[ImageInput] = None,
962-
audio=None,
963-
videos=None,
964962
**kwargs: Unpack[AriaProcessorKwargs],
965963
) -> BatchFeature:
966964
"""

src/transformers/models/aria/processing_aria.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ def __call__(
8585
self,
8686
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]],
8787
images: Optional[ImageInput] = None,
88-
audio=None,
89-
videos=None,
9088
**kwargs: Unpack[AriaProcessorKwargs],
9189
) -> BatchFeature:
9290
"""

src/transformers/models/aya_vision/processing_aya_vision.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,11 @@
1919

2020
from ...image_processing_utils import BatchFeature
2121
from ...image_utils import ImageInput, make_flat_list_of_images
22-
from ...processing_utils import ImagesKwargs, MultiModalData, ProcessingKwargs, ProcessorMixin, Unpack
22+
from ...processing_utils import MultiModalData, ProcessingKwargs, ProcessorMixin, Unpack
2323
from ...tokenization_utils_base import PreTokenizedInput, TextInput
2424

2525

26-
class AyaVisionImagesKwargs(ImagesKwargs, total=False):
27-
crop_to_patches: Optional[bool]
28-
min_patches: Optional[int]
29-
max_patches: Optional[int]
30-
31-
3226
class AyaVisionProcessorKwargs(ProcessingKwargs, total=False):
33-
images_kwargs: AyaVisionImagesKwargs
3427
_defaults = {
3528
"text_kwargs": {
3629
"padding_side": "left",
@@ -140,8 +133,6 @@ def __call__(
140133
self,
141134
images: Optional[ImageInput] = None,
142135
text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
143-
audio=None,
144-
videos=None,
145136
**kwargs: Unpack[AyaVisionProcessorKwargs],
146137
) -> BatchFeature:
147138
"""

src/transformers/models/beit/image_processing_beit.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
valid_images,
3434
validate_preprocess_arguments,
3535
)
36+
from ...processing_utils import ImagesKwargs
3637
from ...utils import (
3738
TensorType,
3839
filter_out_non_signature_kwargs,
@@ -54,6 +55,17 @@
5455
logger = logging.get_logger(__name__)
5556

5657

58+
class BeitImageProcessorKwargs(ImagesKwargs):
59+
r"""
60+
do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
61+
Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
62+
is used for background, and background itself is not included in all classes of a dataset (e.g.
63+
ADE20k). The background label will be replaced by 255.
64+
"""
65+
66+
do_reduce_labels: Optional[bool]
67+
68+
5769
@requires(backends=("vision",))
5870
class BeitImageProcessor(BaseImageProcessor):
5971
r"""
@@ -99,6 +111,7 @@ class BeitImageProcessor(BaseImageProcessor):
99111
"""
100112

101113
model_input_names = ["pixel_values"]
114+
valid_kwargs = BeitImageProcessorKwargs
102115

103116
@filter_out_non_signature_kwargs(extra=INIT_SERVICE_KWARGS)
104117
def __init__(

src/transformers/models/beit/image_processing_beit_fast.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from ...image_processing_utils import BatchFeature
2222
from ...image_processing_utils_fast import (
2323
BaseImageProcessorFast,
24-
DefaultFastImageProcessorKwargs,
2524
group_images_by_shape,
2625
reorder_images,
2726
)
@@ -40,6 +39,7 @@
4039
auto_docstring,
4140
is_torchvision_v2_available,
4241
)
42+
from .image_processing_beit import BeitImageProcessorKwargs
4343

4444

4545
if is_torchvision_v2_available():
@@ -48,17 +48,6 @@
4848
from torchvision.transforms import functional as F
4949

5050

51-
class BeitFastImageProcessorKwargs(DefaultFastImageProcessorKwargs):
52-
r"""
53-
do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
54-
Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
55-
is used for background, and background itself is not included in all classes of a dataset (e.g.
56-
ADE20k). The background label will be replaced by 255.
57-
"""
58-
59-
do_reduce_labels: Optional[bool]
60-
61-
6251
@auto_docstring
6352
class BeitImageProcessorFast(BaseImageProcessorFast):
6453
resample = PILImageResampling.BICUBIC
@@ -72,9 +61,9 @@ class BeitImageProcessorFast(BaseImageProcessorFast):
7261
do_rescale = True
7362
do_normalize = True
7463
do_reduce_labels = False
75-
valid_kwargs = BeitFastImageProcessorKwargs
64+
valid_kwargs = BeitImageProcessorKwargs
7665

77-
def __init__(self, **kwargs: Unpack[BeitFastImageProcessorKwargs]):
66+
def __init__(self, **kwargs: Unpack[BeitImageProcessorKwargs]):
7867
super().__init__(**kwargs)
7968

8069
def reduce_label(self, labels: list["torch.Tensor"]):
@@ -92,7 +81,7 @@ def preprocess(
9281
self,
9382
images: ImageInput,
9483
segmentation_maps: Optional[ImageInput] = None,
95-
**kwargs: Unpack[BeitFastImageProcessorKwargs],
84+
**kwargs: Unpack[BeitImageProcessorKwargs],
9685
) -> BatchFeature:
9786
r"""
9887
segmentation_maps (`ImageInput`, *optional*):
@@ -107,7 +96,7 @@ def _preprocess_image_like_inputs(
10796
do_convert_rgb: bool,
10897
input_data_format: ChannelDimension,
10998
device: Optional[Union[str, "torch.device"]] = None,
110-
**kwargs: Unpack[BeitFastImageProcessorKwargs],
99+
**kwargs: Unpack[BeitImageProcessorKwargs],
111100
) -> BatchFeature:
112101
"""
113102
Preprocess image-like inputs.

src/transformers/models/blip/processing_blip.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ class BlipProcessorKwargs(ProcessingKwargs, total=False):
3636
"return_length": False,
3737
"verbose": True,
3838
},
39-
"images_kwargs": {},
4039
}
4140

4241

@@ -67,8 +66,6 @@ def __call__(
6766
self,
6867
images: Optional[ImageInput] = None,
6968
text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None,
70-
audio=None,
71-
videos=None,
7269
**kwargs: Unpack[BlipProcessorKwargs],
7370
) -> BatchEncoding:
7471
"""

src/transformers/models/blip_2/processing_blip_2.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ class Blip2ProcessorKwargs(ProcessingKwargs, total=False):
4141
"return_length": False,
4242
"verbose": True,
4343
},
44-
"images_kwargs": {},
4544
}
4645

4746

@@ -81,8 +80,6 @@ def __call__(
8180
self,
8281
images: Optional[ImageInput] = None,
8382
text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None,
84-
audio=None,
85-
videos=None,
8683
**kwargs: Unpack[Blip2ProcessorKwargs],
8784
) -> BatchEncoding:
8885
"""

0 commit comments

Comments
 (0)