Skip to content

Commit 552ea88

Browse files
zucchini-nlpAhnJoonSung
authored andcommitted
🚨 [unbloating] unify TypedDict usage in processing (huggingface#40931)
* just squash commits into one * fix style
1 parent d801de3 commit 552ea88

File tree

208 files changed

+1574
-1984
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

208 files changed

+1574
-1984
lines changed

docs/source/en/auto_docstring.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ The `@auto_docstring` decorator automatically generates docstrings by:
292292

293293
8. Unrolling kwargs typed with the unpack operator. For specific methods (defined in `UNROLL_KWARGS_METHODS`) or classes (defined in `UNROLL_KWARGS_CLASSES`), the decorator processes `**kwargs` parameters that are typed with `Unpack[KwargsTypedDict]`. It extracts the documentations from the `TypedDict` and adds each parameter to the function's docstring.
294294

295-
Currently only supported for [`FastImageProcessorKwargs`].
295+
Currently only supported for [`ImagesKwargs`].
296296

297297
## Best practices
298298

src/transformers/image_processing_utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020

2121
from .image_processing_base import BatchFeature, ImageProcessingMixin
2222
from .image_transforms import center_crop, normalize, rescale
23-
from .image_utils import ChannelDimension, get_image_size
23+
from .image_utils import ChannelDimension, ImageInput, get_image_size
24+
from .processing_utils import ImagesKwargs, Unpack
2425
from .utils import logging
2526
from .utils.import_utils import requires
2627

@@ -36,6 +37,8 @@
3637

3738
@requires(backends=("vision",))
3839
class BaseImageProcessor(ImageProcessingMixin):
40+
valid_kwargs = ImagesKwargs
41+
3942
def __init__(self, **kwargs):
4043
super().__init__(**kwargs)
4144

@@ -46,9 +49,9 @@ def is_fast(self) -> bool:
4649
"""
4750
return False
4851

49-
def __call__(self, images, **kwargs) -> BatchFeature:
52+
def __call__(self, images: ImageInput, *args, **kwargs: Unpack[ImagesKwargs]) -> BatchFeature:
5053
"""Preprocess an image or a batch of images."""
51-
return self.preprocess(images, **kwargs)
54+
return self.preprocess(images, *args, **kwargs)
5255

5356
def preprocess(self, images, **kwargs) -> BatchFeature:
5457
raise NotImplementedError("Each image processor must implement its own preprocess method")

src/transformers/image_processing_utils_fast.py

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from collections.abc import Iterable
1616
from copy import deepcopy
1717
from functools import lru_cache, partial
18-
from typing import Any, Optional, TypedDict, Union
18+
from typing import Any, Optional, Union
1919

2020
import numpy as np
2121

@@ -40,7 +40,7 @@
4040
validate_kwargs,
4141
validate_preprocess_arguments,
4242
)
43-
from .processing_utils import Unpack
43+
from .processing_utils import ImagesKwargs, Unpack
4444
from .utils import (
4545
TensorType,
4646
auto_docstring,
@@ -163,28 +163,6 @@ def divide_to_patches(
163163
return patches
164164

165165

166-
class DefaultFastImageProcessorKwargs(TypedDict, total=False):
167-
do_resize: Optional[bool]
168-
size: Optional[dict[str, int]]
169-
default_to_square: Optional[bool]
170-
resample: Optional[Union["PILImageResampling", "F.InterpolationMode"]]
171-
do_center_crop: Optional[bool]
172-
crop_size: Optional[dict[str, int]]
173-
do_rescale: Optional[bool]
174-
rescale_factor: Optional[Union[int, float]]
175-
do_normalize: Optional[bool]
176-
image_mean: Optional[Union[float, list[float]]]
177-
image_std: Optional[Union[float, list[float]]]
178-
do_pad: Optional[bool]
179-
pad_size: Optional[dict[str, int]]
180-
do_convert_rgb: Optional[bool]
181-
return_tensors: Optional[Union[str, TensorType]]
182-
data_format: Optional[ChannelDimension]
183-
input_data_format: Optional[Union[str, ChannelDimension]]
184-
device: Optional["torch.device"]
185-
disable_grouping: Optional[bool]
186-
187-
188166
@auto_docstring
189167
class BaseImageProcessorFast(BaseImageProcessor):
190168
resample = None
@@ -206,10 +184,10 @@ class BaseImageProcessorFast(BaseImageProcessor):
206184
input_data_format = None
207185
device = None
208186
model_input_names = ["pixel_values"]
209-
valid_kwargs = DefaultFastImageProcessorKwargs
187+
valid_kwargs = ImagesKwargs
210188
unused_kwargs = None
211189

212-
def __init__(self, **kwargs: Unpack[DefaultFastImageProcessorKwargs]):
190+
def __init__(self, **kwargs: Unpack[ImagesKwargs]):
213191
super().__init__(**kwargs)
214192
kwargs = self.filter_out_unused_kwargs(kwargs)
215193
size = kwargs.pop("size", self.size)
@@ -728,11 +706,8 @@ def _validate_preprocess_kwargs(
728706
data_format=data_format,
729707
)
730708

731-
def __call__(self, images: ImageInput, *args, **kwargs: Unpack[DefaultFastImageProcessorKwargs]) -> BatchFeature:
732-
return self.preprocess(images, *args, **kwargs)
733-
734709
@auto_docstring
735-
def preprocess(self, images: ImageInput, *args, **kwargs: Unpack[DefaultFastImageProcessorKwargs]) -> BatchFeature:
710+
def preprocess(self, images: ImageInput, *args, **kwargs: Unpack[ImagesKwargs]) -> BatchFeature:
736711
# args are not validated, but their order in the `preprocess` and `_preprocess` signatures must be the same
737712
validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_kwargs_names)
738713
# Set default kwargs from self. This ensures that if a kwarg is not provided
@@ -765,7 +740,7 @@ def _preprocess_image_like_inputs(
765740
do_convert_rgb: bool,
766741
input_data_format: ChannelDimension,
767742
device: Optional[Union[str, "torch.device"]] = None,
768-
**kwargs: Unpack[DefaultFastImageProcessorKwargs],
743+
**kwargs: Unpack[ImagesKwargs],
769744
) -> BatchFeature:
770745
"""
771746
Preprocess image-like inputs.

src/transformers/models/aria/modular_aria.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -959,8 +959,6 @@ def __call__(
959959
self,
960960
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]],
961961
images: Optional[ImageInput] = None,
962-
audio=None,
963-
videos=None,
964962
**kwargs: Unpack[AriaProcessorKwargs],
965963
) -> BatchFeature:
966964
"""

src/transformers/models/aria/processing_aria.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ def __call__(
8585
self,
8686
text: Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]],
8787
images: Optional[ImageInput] = None,
88-
audio=None,
89-
videos=None,
9088
**kwargs: Unpack[AriaProcessorKwargs],
9189
) -> BatchFeature:
9290
"""

src/transformers/models/aya_vision/processing_aya_vision.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,11 @@
1919

2020
from ...image_processing_utils import BatchFeature
2121
from ...image_utils import ImageInput, make_flat_list_of_images
22-
from ...processing_utils import ImagesKwargs, MultiModalData, ProcessingKwargs, ProcessorMixin, Unpack
22+
from ...processing_utils import MultiModalData, ProcessingKwargs, ProcessorMixin, Unpack
2323
from ...tokenization_utils_base import PreTokenizedInput, TextInput
2424

2525

26-
class AyaVisionImagesKwargs(ImagesKwargs, total=False):
27-
crop_to_patches: Optional[bool]
28-
min_patches: Optional[int]
29-
max_patches: Optional[int]
30-
31-
3226
class AyaVisionProcessorKwargs(ProcessingKwargs, total=False):
33-
images_kwargs: AyaVisionImagesKwargs
3427
_defaults = {
3528
"text_kwargs": {
3629
"padding_side": "left",
@@ -140,8 +133,6 @@ def __call__(
140133
self,
141134
images: Optional[ImageInput] = None,
142135
text: Optional[Union[TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]]] = None,
143-
audio=None,
144-
videos=None,
145136
**kwargs: Unpack[AyaVisionProcessorKwargs],
146137
) -> BatchFeature:
147138
"""

src/transformers/models/beit/image_processing_beit.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
valid_images,
3434
validate_preprocess_arguments,
3535
)
36+
from ...processing_utils import ImagesKwargs
3637
from ...utils import (
3738
TensorType,
3839
filter_out_non_signature_kwargs,
@@ -54,6 +55,17 @@
5455
logger = logging.get_logger(__name__)
5556

5657

58+
class BeitImageProcessorKwargs(ImagesKwargs):
59+
r"""
60+
do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
61+
Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
62+
is used for background, and background itself is not included in all classes of a dataset (e.g.
63+
ADE20k). The background label will be replaced by 255.
64+
"""
65+
66+
do_reduce_labels: Optional[bool]
67+
68+
5769
@requires(backends=("vision",))
5870
class BeitImageProcessor(BaseImageProcessor):
5971
r"""
@@ -99,6 +111,7 @@ class BeitImageProcessor(BaseImageProcessor):
99111
"""
100112

101113
model_input_names = ["pixel_values"]
114+
valid_kwargs = BeitImageProcessorKwargs
102115

103116
@filter_out_non_signature_kwargs(extra=INIT_SERVICE_KWARGS)
104117
def __init__(

src/transformers/models/beit/image_processing_beit_fast.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from ...image_processing_utils import BatchFeature
2323
from ...image_processing_utils_fast import (
2424
BaseImageProcessorFast,
25-
DefaultFastImageProcessorKwargs,
2625
group_images_by_shape,
2726
reorder_images,
2827
)
@@ -40,17 +39,7 @@
4039
TensorType,
4140
auto_docstring,
4241
)
43-
44-
45-
class BeitFastImageProcessorKwargs(DefaultFastImageProcessorKwargs):
46-
r"""
47-
do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
48-
Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
49-
is used for background, and background itself is not included in all classes of a dataset (e.g.
50-
ADE20k). The background label will be replaced by 255.
51-
"""
52-
53-
do_reduce_labels: Optional[bool]
42+
from .image_processing_beit import BeitImageProcessorKwargs
5443

5544

5645
@auto_docstring
@@ -66,9 +55,9 @@ class BeitImageProcessorFast(BaseImageProcessorFast):
6655
do_rescale = True
6756
do_normalize = True
6857
do_reduce_labels = False
69-
valid_kwargs = BeitFastImageProcessorKwargs
58+
valid_kwargs = BeitImageProcessorKwargs
7059

71-
def __init__(self, **kwargs: Unpack[BeitFastImageProcessorKwargs]):
60+
def __init__(self, **kwargs: Unpack[BeitImageProcessorKwargs]):
7261
super().__init__(**kwargs)
7362

7463
def reduce_label(self, labels: list["torch.Tensor"]):
@@ -86,7 +75,7 @@ def preprocess(
8675
self,
8776
images: ImageInput,
8877
segmentation_maps: Optional[ImageInput] = None,
89-
**kwargs: Unpack[BeitFastImageProcessorKwargs],
78+
**kwargs: Unpack[BeitImageProcessorKwargs],
9079
) -> BatchFeature:
9180
r"""
9281
segmentation_maps (`ImageInput`, *optional*):
@@ -101,7 +90,7 @@ def _preprocess_image_like_inputs(
10190
do_convert_rgb: bool,
10291
input_data_format: ChannelDimension,
10392
device: Optional[Union[str, "torch.device"]] = None,
104-
**kwargs: Unpack[BeitFastImageProcessorKwargs],
93+
**kwargs: Unpack[BeitImageProcessorKwargs],
10594
) -> BatchFeature:
10695
"""
10796
Preprocess image-like inputs.

src/transformers/models/blip/processing_blip.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ class BlipProcessorKwargs(ProcessingKwargs, total=False):
3636
"return_length": False,
3737
"verbose": True,
3838
},
39-
"images_kwargs": {},
4039
}
4140

4241

@@ -67,8 +66,6 @@ def __call__(
6766
self,
6867
images: Optional[ImageInput] = None,
6968
text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None,
70-
audio=None,
71-
videos=None,
7269
**kwargs: Unpack[BlipProcessorKwargs],
7370
) -> BatchEncoding:
7471
"""

src/transformers/models/blip_2/processing_blip_2.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ class Blip2ProcessorKwargs(ProcessingKwargs, total=False):
4141
"return_length": False,
4242
"verbose": True,
4343
},
44-
"images_kwargs": {},
4544
}
4645

4746

@@ -81,8 +80,6 @@ def __call__(
8180
self,
8281
images: Optional[ImageInput] = None,
8382
text: Optional[Union[str, list[str], TextInput, PreTokenizedInput]] = None,
84-
audio=None,
85-
videos=None,
8683
**kwargs: Unpack[Blip2ProcessorKwargs],
8784
) -> BatchEncoding:
8885
"""

0 commit comments

Comments
 (0)