Skip to content

Commit d146ba7

Browse files
authored
Merge branch 'main' into sam-vision-encoder
2 parents a1652c6 + cbfb8d7 commit d146ba7

File tree

16 files changed

+260
-222
lines changed

16 files changed

+260
-222
lines changed

README.md

Lines changed: 169 additions & 189 deletions
Large diffs are not rendered by default.

docs/source/en/installation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ rendered properly in your Markdown viewer.
2020

2121
# Installation
2222

23-
Transformers works with [PyTorch](https://pytorch.org/get-started/locally/), [TensorFlow 2.0](https://www.tensorflow.org/install/pip), and [Flax](https://flax.readthedocs.io/en/latest/). It has been tested on Python 3.6+, PyTorch 1.1.0+, TensorFlow 2.0+, and Flax.
23+
Transformers works with [PyTorch](https://pytorch.org/get-started/locally/), [TensorFlow 2.0](https://www.tensorflow.org/install/pip), and [Flax](https://flax.readthedocs.io/en/latest/). It has been tested on Python 3.9+, PyTorch 2.0+, TensorFlow 2.6+, and Flax 0.4.1+.
2424

2525
## Virtual environment
2626

docs/source/en/model_doc/hubert.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,10 @@ pip install -U flash-attn --no-build-isolation
7171
Below is an expected speedup diagram comparing the pure inference time between the native implementation in transformers of `facebook/hubert-large-ls960-ft`, the flash-attention-2 and the sdpa (scale-dot-product-attention) version. We show the average speedup obtained on the `librispeech_asr` `clean` validation split:
7272

7373
```python
74-
>>> from transformers import Wav2Vec2Model
74+
>>> from transformers import HubertModel
75+
>>> import torch
7576

76-
model = Wav2Vec2Model.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to(device)
77+
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")
7778
...
7879
```
7980

src/transformers/configuration_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class PretrainedConfig(PushToHubMixin):
109109
is_encoder_decoder (`bool`, *optional*, defaults to `False`):
110110
Whether the model is used as an encoder/decoder or not.
111111
is_decoder (`bool`, *optional*, defaults to `False`):
112-
Whether the model is used as decoder or not (in which case it's used as an encoder).
112+
Whether to only use the decoder in an encoder-decoder architecture, otherwise it has no effect on decoder-only or encoder-only architectures.
113113
cross_attention_hidden_size** (`bool`, *optional*):
114114
The hidden size of the cross-attention layer in case the model is used as a decoder in an encoder-decoder
115115
setting and the cross-attention hidden dimension differs from `self.config.hidden_size`.

src/transformers/models/blip/modeling_blip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ def generate(
12331233
""",
12341234
BLIP_START_DOCSTRING,
12351235
)
1236-
class BlipForQuestionAnswering(BlipPreTrainedModel):
1236+
class BlipForQuestionAnswering(BlipPreTrainedModel, GenerationMixin):
12371237
config_class = BlipConfig
12381238
_tied_weights_keys = ["text_decoder.cls.predictions.decoder.bias"]
12391239

src/transformers/models/speecht5/modeling_speecht5.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2631,6 +2631,13 @@ def __init__(self, config: SpeechT5Config):
26312631
# Initialize weights and apply final processing
26322632
self.post_init()
26332633

2634+
@classmethod
2635+
def can_generate(cls) -> bool:
2636+
# Speecht5 has a unique model structure, where the external class (`SpeechT5ForTextToSpeech`) doesn't need to inherit from
2637+
# `GenerationMixin` (it has a non-standard generation method). This means that the base `can_generate()` will return `False`,
2638+
# but we need to override it so as to do `GenerationConfig` handling in multiple parts of the codebase.
2639+
return True
2640+
26342641
def get_encoder(self):
26352642
return self.speecht5.get_encoder()
26362643

src/transformers/trainer_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,9 @@ def has_length(dataset):
762762
except TypeError:
763763
# TypeError: len() of unsized object
764764
return False
765+
except AttributeError:
766+
# Ray DataSets raises an AttributeError: https://github.com/ray-project/ray/blob/master/python/ray/data/dataset.py#L5616
767+
return False
765768

766769

767770
def denumpify_detensorize(metrics):

src/transformers/training_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,7 @@ def __post_init__(self):
16411641
self.do_eval = True
16421642

16431643
if self.torch_empty_cache_steps is not None:
1644-
if not (isinstance(self.torch_empty_cache_steps, int) or self.torch_empty_cache_steps > 0):
1644+
if not (isinstance(self.torch_empty_cache_steps, int) and self.torch_empty_cache_steps > 0):
16451645
raise ValueError(
16461646
f"`torch_empty_cache_steps` must be an integer bigger than 0, got {self.torch_empty_cache_steps}."
16471647
)

src/transformers/utils/quantization_config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,9 @@ def post_init(self):
902902
)
903903

904904
if self.backend == AwqBackendPackingMethod.LLMAWQ:
905+
# Only cuda device can run this function
906+
if not torch.cuda.is_available():
907+
raise ValueError("LLM-AWQ backend is only supported on CUDA")
905908
compute_capability = torch.cuda.get_device_capability()
906909
major, minor = compute_capability
907910
if major < 8:

tests/models/bark/test_modeling_bark.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,10 @@ def fine_generation_config(self):
10761076
fine_generation_config = BarkFineGenerationConfig(**self.model.generation_config.fine_acoustics_config)
10771077
return fine_generation_config
10781078

1079+
def test_model_can_generate(self):
1080+
# Bark has custom generate without inheriting GenerationMixin. This test could prevent regression.
1081+
self.assertTrue(self.model.can_generate())
1082+
10791083
@slow
10801084
def test_generate_semantic(self):
10811085
input_ids = self.inputs

0 commit comments

Comments
 (0)