Skip to content

Commit 4dda5f7

Browse files
authored
Merge branch 'main' into chat-template-url
2 parents 75cbb11 + 688f470 commit 4dda5f7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+805
-112
lines changed

docs/source/en/model_doc/bridgetower.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,11 @@ Tips:
147147
[[autodoc]] BridgeTowerImageProcessor
148148
- preprocess
149149

150+
## BridgeTowerImageProcessorFast
151+
152+
[[autodoc]] BridgeTowerImageProcessorFast
153+
- preprocess
154+
150155
## BridgeTowerProcessor
151156

152157
[[autodoc]] BridgeTowerProcessor

docs/source/en/model_doc/efficientnet.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ The original code can be found [here](https://github.com/tensorflow/tpu/tree/mas
4343
[[autodoc]] EfficientNetImageProcessor
4444
- preprocess
4545

46+
## EfficientNetImageProcessorFast
47+
48+
[[autodoc]] EfficientNetImageProcessorFast
49+
- preprocess
50+
4651
## EfficientNetModel
4752

4853
[[autodoc]] EfficientNetModel

docs/source/ja/model_doc/bridgetower.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ BridgeTower は、ビジュアル エンコーダー、テキスト エンコー
144144
[[autodoc]] BridgeTowerImageProcessor
145145
- preprocess
146146

147+
## BridgeTowerImageProcessorFast
148+
149+
[[autodoc]] BridgeTowerImageProcessorFast
150+
- preprocess
151+
147152
## BridgeTowerProcessor
148153

149154
[[autodoc]] BridgeTowerProcessor

src/transformers/image_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
from torchvision.transforms import InterpolationMode
6767

6868
pil_torch_interpolation_mapping = {
69-
PILImageResampling.NEAREST: InterpolationMode.NEAREST,
69+
PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT,
7070
PILImageResampling.BOX: InterpolationMode.BOX,
7171
PILImageResampling.BILINEAR: InterpolationMode.BILINEAR,
7272
PILImageResampling.HAMMING: InterpolationMode.HAMMING,

src/transformers/models/auto/image_processing_auto.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,13 @@
5656
else:
5757
IMAGE_PROCESSOR_MAPPING_NAMES = OrderedDict(
5858
[
59-
("align", ("EfficientNetImageProcessor",)),
59+
("align", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
6060
("aria", ("AriaImageProcessor",)),
6161
("beit", ("BeitImageProcessor",)),
6262
("bit", ("BitImageProcessor", "BitImageProcessorFast")),
6363
("blip", ("BlipImageProcessor", "BlipImageProcessorFast")),
6464
("blip-2", ("BlipImageProcessor", "BlipImageProcessorFast")),
65-
("bridgetower", ("BridgeTowerImageProcessor",)),
65+
("bridgetower", ("BridgeTowerImageProcessor", "BridgeTowerImageProcessorFast")),
6666
("chameleon", ("ChameleonImageProcessor",)),
6767
("chinese_clip", ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")),
6868
("clip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
@@ -83,7 +83,7 @@
8383
("donut-swin", ("DonutImageProcessor", "DonutImageProcessorFast")),
8484
("dpt", ("DPTImageProcessor",)),
8585
("efficientformer", ("EfficientFormerImageProcessor",)),
86-
("efficientnet", ("EfficientNetImageProcessor",)),
86+
("efficientnet", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
8787
("flava", ("FlavaImageProcessor", "FlavaImageProcessorFast")),
8888
("focalnet", ("BitImageProcessor", "BitImageProcessorFast")),
8989
("fuyu", ("FuyuImageProcessor",)),

src/transformers/models/bamba/modeling_bamba.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -783,8 +783,8 @@ def torch_forward(
783783
hidden_states = hidden_states.reshape(batch_size, seq_len, -1, self.head_dim).float()
784784
B = B.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
785785
C = C.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
786-
B = B.repeat(1, 1, self.num_heads // self.n_groups, 1)
787-
C = C.repeat(1, 1, self.num_heads // self.n_groups, 1)
786+
B = B.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
787+
C = C.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
788788
pad_size = (self.chunk_size - seq_len % self.chunk_size) % self.chunk_size
789789

790790
D_residual = self.D[..., None] * pad_tensor_by_size(hidden_states, pad_size)

src/transformers/models/bamba/modular_bamba.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,8 +580,8 @@ def torch_forward(
580580
hidden_states = hidden_states.reshape(batch_size, seq_len, -1, self.head_dim).float()
581581
B = B.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
582582
C = C.reshape(batch_size, seq_len, -1, self.ssm_state_size).float()
583-
B = B.repeat(1, 1, self.num_heads // self.n_groups, 1)
584-
C = C.repeat(1, 1, self.num_heads // self.n_groups, 1)
583+
B = B.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
584+
C = C.repeat_interleave(self.num_heads // self.n_groups, dim=2, output_size=self.num_heads)
585585
pad_size = (self.chunk_size - seq_len % self.chunk_size) % self.chunk_size
586586

587587
D_residual = self.D[..., None] * pad_tensor_by_size(hidden_states, pad_size)

src/transformers/models/beit/modeling_beit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ def __init__(self, config: BeitConfig, window_size: Optional[tuple] = None) -> N
663663
self.relative_position_bias = BeitRelativePositionBias(config, window_size=window_size)
664664

665665
# stochastic depth decay rule
666-
dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, config.num_hidden_layers)]
666+
dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, config.num_hidden_layers, device="cpu")]
667667
self.layer = nn.ModuleList(
668668
[
669669
BeitLayer(

src/transformers/models/bridgetower/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
if TYPE_CHECKING:
2121
from .configuration_bridgetower import *
2222
from .image_processing_bridgetower import *
23+
from .image_processing_bridgetower_fast import *
2324
from .modeling_bridgetower import *
2425
from .processing_bridgetower import *
2526
else:

src/transformers/models/bridgetower/image_processing_bridgetower.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
PILImageResampling,
2929
get_image_size,
3030
infer_channel_dimension_format,
31-
is_batched,
3231
is_scaled_image,
32+
make_flat_list_of_images,
3333
to_numpy_array,
3434
valid_images,
3535
validate_preprocess_arguments,
@@ -455,7 +455,7 @@ def preprocess(
455455
image_mean = image_mean if image_mean is not None else self.image_mean
456456
image_std = image_std if image_std is not None else self.image_std
457457
do_pad = do_pad if do_pad is not None else self.do_pad
458-
do_center_crop if do_center_crop is not None else self.do_center_crop
458+
do_center_crop = do_center_crop if do_center_crop is not None else self.do_center_crop
459459
# For backwards compatibility. Initial version of this processor was cropping to the "size" argument, which
460460
# it should default to if crop_size is undefined.
461461
crop_size = (
@@ -464,9 +464,7 @@ def preprocess(
464464

465465
size = size if size is not None else self.size
466466
size = get_size_dict(size, default_to_square=False)
467-
468-
if not is_batched(images):
469-
images = [images]
467+
images = make_flat_list_of_images(images)
470468

471469
if not valid_images(images):
472470
raise ValueError(

0 commit comments

Comments
 (0)