From e4e821c92b0600e9a0c3e8b99222927dc7f61908 Mon Sep 17 00:00:00 2001 From: nakranivaibhav <67785830+nakranivaibhav@users.noreply.github.com> Date: Wed, 24 Jan 2024 22:17:34 +0530 Subject: [PATCH] Improved type hinting for all attention parameters (#28479) * Changed type hinting for all attention inputs to 'Optional[Tuple[torch.FloatTensor,...]] = None' * Fixed the ruff formatting issue * fixed type hinting for all hidden_states to 'Optional[Tuple[torch.FloatTensor, ...]] = None' * Changed type hinting in these 12 scripts modeling_dpr.py,modeling_nat.py,idefics/vision.py,modeling_tf_dpr.py,modeling_luke.py,modeling_swin.py,modeling_tf_swin.py,modeling_blip.py,modeling_tf_blip.py,modeling_donut_swin.py,modeling_dinat.py,modeling_swinv2.py * test fail update * fixed type hinting for these 15 scripts modeling_xlnet.py,modeling_tf_xlnet.py,modeling_led.py,modeling_tf_led.py,modleing_rwkv.py,modeling_dpt.py,modeling_tf_cvt.py,modeling_clip.py,modeling_flax_clip.py,modeling_tf_clip.py,modeling_longformer.py,modeling_tf_longformer.py,modeling_siglip.py,modeling_clap.py,modeling_git.py * Changed type hinting in these 12 scripts modeling_dpr.py,modeling_nat.py,idefics/vision.py,modeling_tf_dpr.py,modeling_luke.py,modeling_swin.py,modeling_tf_swin.py,modeling_blip.py,modeling_tf_blip.py,modeling_donut_swin.py,modeling_dinat.py,modeling_swinv2.py * test fail update * Removed the myvenv file * Fixed type hinting for these 8 scripts modeling_tvlt.py,modeling_sam.py,modeling_tf_sam.py,modeling_tvp.py,modeling_rag.py,modeling_tf_rag.py,modeling_tf_xlm.py,modeling_xlm.py --- src/transformers/modeling_outputs.py | 226 +++++++++--------- src/transformers/models/blip/modeling_blip.py | 12 +- .../models/blip/modeling_tf_blip.py | 12 +- src/transformers/models/clap/modeling_clap.py | 8 +- src/transformers/models/clip/modeling_clip.py | 8 +- .../models/clip/modeling_flax_clip.py | 4 +- src/transformers/models/cvt/modeling_cvt.py | 2 +- .../models/cvt/modeling_tf_cvt.py | 2 +- .../models/dinat/modeling_dinat.py | 18 +- .../models/donut/modeling_donut_swin.py | 12 +- src/transformers/models/dpr/modeling_dpr.py | 12 +- .../models/dpr/modeling_tf_dpr.py | 12 +- src/transformers/models/dpt/modeling_dpt.py | 8 +- src/transformers/models/git/modeling_git.py | 4 +- src/transformers/models/idefics/vision.py | 4 +- src/transformers/models/led/modeling_led.py | 54 ++--- .../models/led/modeling_tf_led.py | 30 +-- .../models/longformer/modeling_longformer.py | 42 ++-- .../longformer/modeling_tf_longformer.py | 42 ++-- src/transformers/models/luke/modeling_luke.py | 50 ++-- src/transformers/models/nat/modeling_nat.py | 18 +- src/transformers/models/rag/modeling_rag.py | 28 +-- .../models/rag/modeling_tf_rag.py | 24 +- src/transformers/models/rwkv/modeling_rwkv.py | 8 +- src/transformers/models/sam/modeling_sam.py | 10 +- .../models/sam/modeling_tf_sam.py | 10 +- .../models/siglip/modeling_siglip.py | 8 +- src/transformers/models/swin/modeling_swin.py | 24 +- .../models/swin/modeling_tf_swin.py | 24 +- .../models/swinv2/modeling_swinv2.py | 24 +- src/transformers/models/tvlt/modeling_tvlt.py | 12 +- src/transformers/models/tvp/modeling_tvp.py | 4 +- .../models/xlm/modeling_tf_xlm.py | 4 +- src/transformers/models/xlm/modeling_xlm.py | 4 +- .../models/xlnet/modeling_tf_xlnet.py | 24 +- .../models/xlnet/modeling_xlnet.py | 28 +-- 36 files changed, 408 insertions(+), 408 deletions(-) diff --git a/src/transformers/modeling_outputs.py b/src/transformers/modeling_outputs.py index cbee6a292b531b..7328e05186f2de 100755 --- a/src/transformers/modeling_outputs.py +++ b/src/transformers/modeling_outputs.py @@ -43,8 +43,8 @@ class BaseModelOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -63,7 +63,7 @@ class BaseModelOutputWithNoAttention(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -94,8 +94,8 @@ class BaseModelOutputWithPooling(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -117,7 +117,7 @@ class BaseModelOutputWithPoolingAndNoAttention(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -155,8 +155,8 @@ class BaseModelOutputWithPast(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -187,9 +187,9 @@ class BaseModelOutputWithCrossAttentions(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -235,10 +235,10 @@ class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -282,9 +282,9 @@ class BaseModelOutputWithPastAndCrossAttentions(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -329,8 +329,8 @@ class MoECausalLMOutputWithPast(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None z_loss: torch.FloatTensor = None aux_loss: torch.FloatTensor = None router_logits: Optional[Tuple[torch.FloatTensor]] = None @@ -363,8 +363,8 @@ class MoEModelOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None router_probs: Optional[Tuple[torch.FloatTensor]] = None @@ -405,8 +405,8 @@ class MoeModelOutputWithPast(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None router_logits: Optional[Tuple[torch.FloatTensor]] = None @@ -454,8 +454,8 @@ class MoeCausalLMOutputWithPast(ModelOutput): aux_loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None router_logits: Optional[Tuple[torch.FloatTensor]] = None @@ -506,9 +506,9 @@ class MoEModelOutputWithPastAndCrossAttentions(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None router_probs: Optional[Tuple[torch.FloatTensor]] = None @@ -565,12 +565,12 @@ class Seq2SeqModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -635,13 +635,13 @@ class Seq2SeqMoEModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None decoder_router_logits: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_router_logits: Optional[Tuple[torch.FloatTensor]] = None @@ -670,8 +670,8 @@ class CausalLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -706,8 +706,8 @@ class CausalLMOutputWithPast(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -749,9 +749,9 @@ class CausalLMOutputWithCrossAttentions(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -786,8 +786,8 @@ class SequenceClassifierOutputWithPast(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -815,8 +815,8 @@ class MaskedLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -871,12 +871,12 @@ class Seq2SeqLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -944,13 +944,13 @@ class Seq2SeqMoEOutput(ModelOutput): encoder_aux_loss: torch.FloatTensor = None decoder_aux_loss: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None decoder_router_logits: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_router_logits: Optional[Tuple[torch.FloatTensor]] = None @@ -980,8 +980,8 @@ class NextSentencePredictorOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1009,8 +1009,8 @@ class SequenceClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1065,12 +1065,12 @@ class Seq2SeqSequenceClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1100,8 +1100,8 @@ class MultipleChoiceModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1129,8 +1129,8 @@ class TokenClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1161,8 +1161,8 @@ class QuestionAnsweringModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None start_logits: torch.FloatTensor = None end_logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1220,12 +1220,12 @@ class Seq2SeqQuestionAnsweringModelOutput(ModelOutput): start_logits: torch.FloatTensor = None end_logits: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1262,8 +1262,8 @@ class SemanticSegmenterOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1290,8 +1290,8 @@ class ImageClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1312,7 +1312,7 @@ class ImageClassifierOutputWithNoAttention(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1341,8 +1341,8 @@ class DepthEstimatorOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None predicted_depth: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1369,8 +1369,8 @@ class ImageSuperResolutionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None reconstruction: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1398,8 +1398,8 @@ class Wav2Vec2BaseModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None extract_features: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1430,8 +1430,8 @@ class XVectorOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None embeddings: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1457,8 +1457,8 @@ class BackboneOutput(ModelOutput): """ feature_maps: Tuple[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1493,8 +1493,8 @@ class BaseModelOutputWithPoolingAndProjection(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None projection_state: Optional[Tuple[torch.FloatTensor]] = None @@ -1550,12 +1550,12 @@ class Seq2SeqSpectrogramOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None spectrogram: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1619,12 +1619,12 @@ class Seq2SeqTSModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None loc: Optional[torch.FloatTensor] = None scale: Optional[torch.FloatTensor] = None static_features: Optional[torch.FloatTensor] = None @@ -1691,12 +1691,12 @@ class Seq2SeqTSPredictionOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None params: Optional[Tuple[torch.FloatTensor]] = None past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None loc: Optional[torch.FloatTensor] = None scale: Optional[torch.FloatTensor] = None static_features: Optional[torch.FloatTensor] = None @@ -1740,8 +1740,8 @@ class MaskedImageModelingOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None reconstruction: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @property def logits(self): diff --git a/src/transformers/models/blip/modeling_blip.py b/src/transformers/models/blip/modeling_blip.py index b6173bcdad152c..1dc79efb6546af 100644 --- a/src/transformers/models/blip/modeling_blip.py +++ b/src/transformers/models/blip/modeling_blip.py @@ -98,8 +98,8 @@ class BlipForConditionalGenerationModelOutput(ModelOutput): logits: Optional[Tuple[torch.FloatTensor]] = None image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @property def decoder_logits(self): @@ -140,8 +140,8 @@ class BlipTextVisionModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -181,9 +181,9 @@ class BlipImageTextMatchingModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None vision_pooler_output: Optional[torch.FloatTensor] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None question_embeds: Optional[Tuple[torch.FloatTensor]] = None diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index ec2e0043d9e5ae..2747f72ece1259 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -108,8 +108,8 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput): logits: Tuple[tf.Tensor] | None = None image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @property def decoder_logits(self): @@ -150,8 +150,8 @@ class TFBlipTextVisionModelOutput(ModelOutput): loss: tf.Tensor | None = None image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -191,9 +191,9 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput): loss: tf.Tensor | None = None image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None vision_pooler_output: tf.Tensor | None = None - attentions: Tuple[tf.Tensor] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None question_embeds: Tuple[tf.Tensor] | None = None diff --git a/src/transformers/models/clap/modeling_clap.py b/src/transformers/models/clap/modeling_clap.py index b2997e1d49353f..6310b9675fb654 100644 --- a/src/transformers/models/clap/modeling_clap.py +++ b/src/transformers/models/clap/modeling_clap.py @@ -159,8 +159,8 @@ class ClapTextModelOutput(ModelOutput): text_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -188,8 +188,8 @@ class ClapAudioModelOutput(ModelOutput): audio_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass diff --git a/src/transformers/models/clip/modeling_clip.py b/src/transformers/models/clip/modeling_clip.py index 77d24a5da32518..2d93a6f11db38b 100644 --- a/src/transformers/models/clip/modeling_clip.py +++ b/src/transformers/models/clip/modeling_clip.py @@ -83,8 +83,8 @@ class CLIPVisionModelOutput(ModelOutput): image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -112,8 +112,8 @@ class CLIPTextModelOutput(ModelOutput): text_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass diff --git a/src/transformers/models/clip/modeling_flax_clip.py b/src/transformers/models/clip/modeling_flax_clip.py index bae7097a8c9d65..265e7005b74e0e 100644 --- a/src/transformers/models/clip/modeling_flax_clip.py +++ b/src/transformers/models/clip/modeling_flax_clip.py @@ -182,8 +182,8 @@ class FlaxCLIPTextModelOutput(ModelOutput): text_embeds: jnp.ndarray = None last_hidden_state: jnp.ndarray = None - hidden_states: Optional[Tuple[jnp.ndarray]] = None - attentions: Optional[Tuple[jnp.ndarray]] = None + hidden_states: Optional[Tuple[jnp.ndarray, ...]] = None + attentions: Optional[Tuple[jnp.ndarray, ...]] = None @flax.struct.dataclass diff --git a/src/transformers/models/cvt/modeling_cvt.py b/src/transformers/models/cvt/modeling_cvt.py index d21b5c9a8749a6..ef7e3671e69d35 100644 --- a/src/transformers/models/cvt/modeling_cvt.py +++ b/src/transformers/models/cvt/modeling_cvt.py @@ -74,7 +74,7 @@ class BaseModelOutputWithCLSToken(ModelOutput): last_hidden_state: torch.FloatTensor = None cls_token_value: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None # Copied from transformers.models.beit.modeling_beit.drop_path diff --git a/src/transformers/models/cvt/modeling_tf_cvt.py b/src/transformers/models/cvt/modeling_tf_cvt.py index e21c33ad3f0cc2..061a80eb45c1e9 100644 --- a/src/transformers/models/cvt/modeling_tf_cvt.py +++ b/src/transformers/models/cvt/modeling_tf_cvt.py @@ -77,7 +77,7 @@ class TFBaseModelOutputWithCLSToken(ModelOutput): last_hidden_state: tf.Tensor = None cls_token_value: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None class TFCvtDropPath(tf.keras.layers.Layer): diff --git a/src/transformers/models/dinat/modeling_dinat.py b/src/transformers/models/dinat/modeling_dinat.py index aae79e0452a2d7..71470efece28c1 100644 --- a/src/transformers/models/dinat/modeling_dinat.py +++ b/src/transformers/models/dinat/modeling_dinat.py @@ -105,9 +105,9 @@ class DinatEncoderOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -142,9 +142,9 @@ class DinatModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: Optional[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -179,9 +179,9 @@ class DinatImageClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None # Copied from transformers.models.nat.modeling_nat.NatEmbeddings with Nat->Dinat diff --git a/src/transformers/models/donut/modeling_donut_swin.py b/src/transformers/models/donut/modeling_donut_swin.py index 4e02c320a72971..65af7f5b1c2ac6 100644 --- a/src/transformers/models/donut/modeling_donut_swin.py +++ b/src/transformers/models/donut/modeling_donut_swin.py @@ -83,9 +83,9 @@ class DonutSwinEncoderOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -120,9 +120,9 @@ class DonutSwinModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: Optional[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None # Copied from transformers.models.swin.modeling_swin.window_partition diff --git a/src/transformers/models/dpr/modeling_dpr.py b/src/transformers/models/dpr/modeling_dpr.py index cc0d0a1fcb6d46..1071a42d810076 100644 --- a/src/transformers/models/dpr/modeling_dpr.py +++ b/src/transformers/models/dpr/modeling_dpr.py @@ -82,8 +82,8 @@ class DPRContextEncoderOutput(ModelOutput): """ pooler_output: torch.FloatTensor - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -110,8 +110,8 @@ class DPRQuestionEncoderOutput(ModelOutput): """ pooler_output: torch.FloatTensor - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -143,8 +143,8 @@ class DPRReaderOutput(ModelOutput): start_logits: torch.FloatTensor end_logits: torch.FloatTensor = None relevance_logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None class DPRPreTrainedModel(PreTrainedModel): diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 9dec1453acc0d1..db9aa6d2272449 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -82,8 +82,8 @@ class TFDPRContextEncoderOutput(ModelOutput): """ pooler_output: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -110,8 +110,8 @@ class TFDPRQuestionEncoderOutput(ModelOutput): """ pooler_output: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -143,8 +143,8 @@ class TFDPRReaderOutput(ModelOutput): start_logits: tf.Tensor = None end_logits: tf.Tensor = None relevance_logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None class TFDPREncoderLayer(tf.keras.layers.Layer): diff --git a/src/transformers/models/dpt/modeling_dpt.py b/src/transformers/models/dpt/modeling_dpt.py index ca44b6a42aee3f..09fc6406fd854e 100755 --- a/src/transformers/models/dpt/modeling_dpt.py +++ b/src/transformers/models/dpt/modeling_dpt.py @@ -76,7 +76,7 @@ class BaseModelOutputWithIntermediateActivations(ModelOutput): """ last_hidden_states: torch.FloatTensor = None - intermediate_activations: Optional[Tuple[torch.FloatTensor]] = None + intermediate_activations: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -110,9 +110,9 @@ class BaseModelOutputWithPoolingAndIntermediateActivations(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - intermediate_activations: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + intermediate_activations: Optional[Tuple[torch.FloatTensor, ...]] = None class DPTViTHybridEmbeddings(nn.Module): diff --git a/src/transformers/models/git/modeling_git.py b/src/transformers/models/git/modeling_git.py index 8909efa3862fc6..c4baed9e0bc98c 100644 --- a/src/transformers/models/git/modeling_git.py +++ b/src/transformers/models/git/modeling_git.py @@ -77,8 +77,8 @@ class GitVisionModelOutput(ModelOutput): image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None class GitEmbeddings(nn.Module): diff --git a/src/transformers/models/idefics/vision.py b/src/transformers/models/idefics/vision.py index 04b2894c4af472..d90f837b3c77ba 100644 --- a/src/transformers/models/idefics/vision.py +++ b/src/transformers/models/idefics/vision.py @@ -57,8 +57,8 @@ class IdeficsVisionModelOutput(ModelOutput): image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None # Adapted from transformers.models.clip.modeling_clip.CLIPVisionEmbeddings diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 21061210ab23d2..c10a8de11584d2 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -1191,9 +1191,9 @@ class LEDEncoderBaseModelOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1255,13 +1255,13 @@ class LEDSeq2SeqModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None past_key_values: Optional[List[torch.FloatTensor]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1322,13 +1322,13 @@ class LEDSeq2SeqLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[List[torch.FloatTensor]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1389,13 +1389,13 @@ class LEDSeq2SeqSequenceClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None past_key_values: Optional[List[torch.FloatTensor]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -1459,13 +1459,13 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput): start_logits: torch.FloatTensor = None end_logits: torch.FloatTensor = None past_key_values: Optional[List[torch.FloatTensor]] = None - decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None encoder_last_hidden_state: Optional[torch.FloatTensor] = None - encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None - encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None + encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None LED_START_DOCSTRING = r""" diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index fcc90eca2582ea..95397cd18e97e7 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1471,9 +1471,9 @@ class TFLEDEncoderBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -1535,13 +1535,13 @@ class TFLEDSeq2SeqModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None past_key_values: List[tf.Tensor] | None = None - decoder_hidden_states: Tuple[tf.Tensor] | None = None - decoder_attentions: Tuple[tf.Tensor] | None = None - cross_attentions: Tuple[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor, ...] | None = None + decoder_attentions: Tuple[tf.Tensor, ...] | None = None + cross_attentions: Tuple[tf.Tensor, ...] | None = None encoder_last_hidden_state: tf.Tensor | None = None - encoder_hidden_states: Tuple[tf.Tensor] | None = None - encoder_attentions: Tuple[tf.Tensor] | None = None - encoder_global_attentions: Tuple[tf.Tensor] | None = None + encoder_hidden_states: Tuple[tf.Tensor, ...] | None = None + encoder_attentions: Tuple[tf.Tensor, ...] | None = None + encoder_global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -1602,13 +1602,13 @@ class TFLEDSeq2SeqLMOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None past_key_values: List[tf.Tensor] | None = None - decoder_hidden_states: Tuple[tf.Tensor] | None = None - decoder_attentions: Tuple[tf.Tensor] | None = None - cross_attentions: Tuple[tf.Tensor] | None = None + decoder_hidden_states: Tuple[tf.Tensor, ...] | None = None + decoder_attentions: Tuple[tf.Tensor, ...] | None = None + cross_attentions: Tuple[tf.Tensor, ...] | None = None encoder_last_hidden_state: tf.Tensor | None = None - encoder_hidden_states: Tuple[tf.Tensor] | None = None - encoder_attentions: Tuple[tf.Tensor] | None = None - encoder_global_attentions: Tuple[tf.Tensor] | None = None + encoder_hidden_states: Tuple[tf.Tensor, ...] | None = None + encoder_attentions: Tuple[tf.Tensor, ...] | None = None + encoder_global_attentions: Tuple[tf.Tensor, ...] | None = None LED_START_DOCSTRING = r""" diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 40587cebc17697..aefd225869ca8e 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -90,9 +90,9 @@ class LongformerBaseModelOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -138,9 +138,9 @@ class LongformerBaseModelOutputWithPooling(ModelOutput): last_hidden_state: torch.FloatTensor pooler_output: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -184,9 +184,9 @@ class LongformerMaskedLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -233,9 +233,9 @@ class LongformerQuestionAnsweringModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None start_logits: torch.FloatTensor = None end_logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -279,9 +279,9 @@ class LongformerSequenceClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -327,9 +327,9 @@ class LongformerMultipleChoiceModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -373,9 +373,9 @@ class LongformerTokenClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - global_attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None def _get_question_end_index(input_ids, sep_token_id): diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index c8ecb9521b4a1d..c586157f9da30d 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -103,9 +103,9 @@ class TFLongformerBaseModelOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -151,9 +151,9 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -197,9 +197,9 @@ class TFLongformerMaskedLMOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -246,9 +246,9 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput): loss: tf.Tensor | None = None start_logits: tf.Tensor = None end_logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -292,9 +292,9 @@ class TFLongformerSequenceClassifierOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -340,9 +340,9 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -386,9 +386,9 @@ class TFLongformerTokenClassifierOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - global_attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + global_attentions: Tuple[tf.Tensor, ...] | None = None def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_sep_token=True): diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index 6343867353f61e..1742283ef685d4 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -78,7 +78,7 @@ class BaseLukeModelOutputWithPooling(BaseModelOutputWithPooling): """ entity_last_hidden_state: torch.FloatTensor = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -109,7 +109,7 @@ class BaseLukeModelOutput(BaseModelOutput): """ entity_last_hidden_state: torch.FloatTensor = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -151,8 +151,8 @@ class LukeMaskedLMOutput(ModelOutput): logits: torch.FloatTensor = None entity_logits: torch.FloatTensor = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -181,9 +181,9 @@ class EntityClassificationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -212,9 +212,9 @@ class EntityPairClassificationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -243,9 +243,9 @@ class EntitySpanClassificationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -277,9 +277,9 @@ class LukeSequenceClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -311,9 +311,9 @@ class LukeTokenClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -348,9 +348,9 @@ class LukeQuestionAnsweringModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None start_logits: torch.FloatTensor = None end_logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -384,9 +384,9 @@ class LukeMultipleChoiceModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None class LukeEmbeddings(nn.Module): diff --git a/src/transformers/models/nat/modeling_nat.py b/src/transformers/models/nat/modeling_nat.py index 278ed3d4b6bea2..7384e2ac4c1257 100644 --- a/src/transformers/models/nat/modeling_nat.py +++ b/src/transformers/models/nat/modeling_nat.py @@ -104,9 +104,9 @@ class NatEncoderOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -140,9 +140,9 @@ class NatModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: Optional[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -176,9 +176,9 @@ class NatImageClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None class NatEmbeddings(nn.Module): diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py index df7c68cef076ab..09fc9dabe84e58 100644 --- a/src/transformers/models/rag/modeling_rag.py +++ b/src/transformers/models/rag/modeling_rag.py @@ -120,14 +120,14 @@ class RetrievAugLMMarginOutput(ModelOutput): context_input_ids: Optional[torch.LongTensor] = None context_attention_mask: Optional[torch.LongTensor] = None question_encoder_last_hidden_state: Optional[torch.FloatTensor] = None - question_enc_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - question_enc_attentions: Optional[Tuple[torch.FloatTensor]] = None + question_enc_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + question_enc_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None generator_enc_last_hidden_state: Optional[torch.FloatTensor] = None - generator_enc_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - generator_enc_attentions: Optional[Tuple[torch.FloatTensor]] = None - generator_dec_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - generator_dec_attentions: Optional[Tuple[torch.FloatTensor]] = None - generator_cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + generator_enc_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_enc_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_dec_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_dec_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -210,14 +210,14 @@ class RetrievAugLMOutput(ModelOutput): context_input_ids: Optional[torch.LongTensor] = None context_attention_mask: Optional[torch.LongTensor] = None question_encoder_last_hidden_state: Optional[torch.FloatTensor] = None - question_enc_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - question_enc_attentions: Optional[Tuple[torch.FloatTensor]] = None + question_enc_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + question_enc_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None generator_enc_last_hidden_state: Optional[torch.FloatTensor] = None - generator_enc_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - generator_enc_attentions: Optional[Tuple[torch.FloatTensor]] = None - generator_dec_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - generator_dec_attentions: Optional[Tuple[torch.FloatTensor]] = None - generator_cross_attentions: Optional[Tuple[torch.FloatTensor]] = None + generator_enc_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_enc_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_dec_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_dec_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + generator_cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None class RagPreTrainedModel(PreTrainedModel): diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 5a4bd20173568f..7ebf1bb1461e3a 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -123,13 +123,13 @@ class TFRetrievAugLMMarginOutput(ModelOutput): context_input_ids: tf.Tensor | None = None context_attention_mask: tf.Tensor | None = None question_encoder_last_hidden_state: tf.Tensor | None = None - question_enc_hidden_states: Tuple[tf.Tensor] | None = None - question_enc_attentions: Tuple[tf.Tensor] | None = None + question_enc_hidden_states: Tuple[tf.Tensor, ...] | None = None + question_enc_attentions: Tuple[tf.Tensor, ...] | None = None generator_enc_last_hidden_state: tf.Tensor | None = None - generator_enc_hidden_states: Tuple[tf.Tensor] | None = None - generator_enc_attentions: Tuple[tf.Tensor] | None = None - generator_dec_hidden_states: Tuple[tf.Tensor] | None = None - generator_dec_attentions: Tuple[tf.Tensor] | None = None + generator_enc_hidden_states: Tuple[tf.Tensor, ...] | None = None + generator_enc_attentions: Tuple[tf.Tensor, ...] | None = None + generator_dec_hidden_states: Tuple[tf.Tensor, ...] | None = None + generator_dec_attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -206,13 +206,13 @@ class TFRetrievAugLMOutput(ModelOutput): context_input_ids: tf.Tensor | None = None context_attention_mask: tf.Tensor | None = None question_encoder_last_hidden_state: tf.Tensor | None = None - question_enc_hidden_states: Tuple[tf.Tensor] | None = None - question_enc_attentions: Tuple[tf.Tensor] | None = None + question_enc_hidden_states: Tuple[tf.Tensor, ...] | None = None + question_enc_attentions: Tuple[tf.Tensor, ...] | None = None generator_enc_last_hidden_state: tf.Tensor | None = None - generator_enc_hidden_states: Tuple[tf.Tensor] | None = None - generator_enc_attentions: Tuple[tf.Tensor] | None = None - generator_dec_hidden_states: Tuple[tf.Tensor] | None = None - generator_dec_attentions: Tuple[tf.Tensor] | None = None + generator_enc_hidden_states: Tuple[tf.Tensor, ...] | None = None + generator_enc_attentions: Tuple[tf.Tensor, ...] | None = None + generator_dec_hidden_states: Tuple[tf.Tensor, ...] | None = None + generator_dec_attentions: Tuple[tf.Tensor, ...] | None = None class TFRagPreTrainedModel(TFPreTrainedModel): diff --git a/src/transformers/models/rwkv/modeling_rwkv.py b/src/transformers/models/rwkv/modeling_rwkv.py index ef3f294c0d5d82..e6dfa46f2a0539 100644 --- a/src/transformers/models/rwkv/modeling_rwkv.py +++ b/src/transformers/models/rwkv/modeling_rwkv.py @@ -493,8 +493,8 @@ class RwkvOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None state: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -526,8 +526,8 @@ class RwkvCausalLMOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None state: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None RWKV_START_DOCSTRING = r""" diff --git a/src/transformers/models/sam/modeling_sam.py b/src/transformers/models/sam/modeling_sam.py index 5b459f64695b39..7fc9e670ce9b29 100644 --- a/src/transformers/models/sam/modeling_sam.py +++ b/src/transformers/models/sam/modeling_sam.py @@ -71,8 +71,8 @@ class SamVisionEncoderOutput(ModelOutput): image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -106,9 +106,9 @@ class SamImageSegmentationOutput(ModelOutput): iou_scores: torch.FloatTensor = None pred_masks: torch.FloatTensor = None - vision_hidden_states: Optional[Tuple[torch.FloatTensor]] = None - vision_attentions: Optional[Tuple[torch.FloatTensor]] = None - mask_decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None + vision_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + vision_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + mask_decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None class SamPatchEmbeddings(nn.Module): diff --git a/src/transformers/models/sam/modeling_tf_sam.py b/src/transformers/models/sam/modeling_tf_sam.py index ded4ed5f4b4589..7e79da1cb99276 100644 --- a/src/transformers/models/sam/modeling_tf_sam.py +++ b/src/transformers/models/sam/modeling_tf_sam.py @@ -74,8 +74,8 @@ class TFSamVisionEncoderOutput(ModelOutput): image_embeds: tf.Tensor | None = None last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -109,9 +109,9 @@ class TFSamImageSegmentationOutput(ModelOutput): iou_scores: tf.Tensor = None pred_masks: tf.Tensor = None - vision_hidden_states: Tuple[tf.Tensor] | None = None - vision_attentions: Tuple[tf.Tensor] | None = None - mask_decoder_attentions: Tuple[tf.Tensor] | None = None + vision_hidden_states: Tuple[tf.Tensor, ...] | None = None + vision_attentions: Tuple[tf.Tensor, ...] | None = None + mask_decoder_attentions: Tuple[tf.Tensor, ...] | None = None class TFSamPatchEmbeddings(tf.keras.layers.Layer): diff --git a/src/transformers/models/siglip/modeling_siglip.py b/src/transformers/models/siglip/modeling_siglip.py index 1df70200d32bd5..7ff886fed6e0fa 100644 --- a/src/transformers/models/siglip/modeling_siglip.py +++ b/src/transformers/models/siglip/modeling_siglip.py @@ -171,8 +171,8 @@ class SiglipVisionModelOutput(ModelOutput): image_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -201,8 +201,8 @@ class SiglipTextModelOutput(ModelOutput): text_embeds: Optional[torch.FloatTensor] = None last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 4fe4be5ac79a6d..967f9440090a76 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -92,9 +92,9 @@ class SwinEncoderOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -128,9 +128,9 @@ class SwinModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: Optional[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -164,9 +164,9 @@ class SwinMaskedImageModelingOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None reconstruction: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @property def logits(self): @@ -209,9 +209,9 @@ class SwinImageClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None def window_partition(input_feature, window_size): diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index cb5ba35cb2a819..f26da27790c76e 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -97,9 +97,9 @@ class TFSwinEncoderOutput(ModelOutput): """ last_hidden_state: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - reshaped_hidden_states: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -133,9 +133,9 @@ class TFSwinModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None pooler_output: tf.Tensor | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - reshaped_hidden_states: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -169,9 +169,9 @@ class TFSwinMaskedImageModelingOutput(ModelOutput): loss: tf.Tensor | None = None reconstruction: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - reshaped_hidden_states: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None @property def logits(self): @@ -214,9 +214,9 @@ class TFSwinImageClassifierOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None - reshaped_hidden_states: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None + reshaped_hidden_states: Tuple[tf.Tensor, ...] | None = None def window_partition(input_feature: tf.Tensor, window_size: int) -> tf.Tensor: diff --git a/src/transformers/models/swinv2/modeling_swinv2.py b/src/transformers/models/swinv2/modeling_swinv2.py index ed5130c02ea4f8..15edb2e2c896c7 100644 --- a/src/transformers/models/swinv2/modeling_swinv2.py +++ b/src/transformers/models/swinv2/modeling_swinv2.py @@ -94,9 +94,9 @@ class Swinv2EncoderOutput(ModelOutput): """ last_hidden_state: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -131,9 +131,9 @@ class Swinv2ModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor = None pooler_output: Optional[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -168,9 +168,9 @@ class Swinv2MaskedImageModelingOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None reconstruction: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None @property def logits(self): @@ -214,9 +214,9 @@ class Swinv2ImageClassifierOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None - reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None + reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None # Copied from transformers.models.swin.modeling_swin.window_partition diff --git a/src/transformers/models/tvlt/modeling_tvlt.py b/src/transformers/models/tvlt/modeling_tvlt.py index ec8b29634a93e1..d2fe1040a3ed71 100644 --- a/src/transformers/models/tvlt/modeling_tvlt.py +++ b/src/transformers/models/tvlt/modeling_tvlt.py @@ -88,8 +88,8 @@ class TvltModelOutput(ModelOutput): audio_label_masks: torch.LongTensor = None pixel_ids_restore: torch.LongTensor = None audio_ids_restore: torch.LongTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -111,8 +111,8 @@ class TvltDecoderOutput(ModelOutput): """ logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -145,8 +145,8 @@ class TvltForPreTrainingOutput(ModelOutput): matching_logits: torch.FloatTensor = None pixel_logits: torch.FloatTensor = None audio_logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None def generate_pixel_mask_noise(pixel_values, pixel_mask=None, mask_ratio=0.75): diff --git a/src/transformers/models/tvp/modeling_tvp.py b/src/transformers/models/tvp/modeling_tvp.py index 65fac8b3a0e0ce..04192630eebd34 100644 --- a/src/transformers/models/tvp/modeling_tvp.py +++ b/src/transformers/models/tvp/modeling_tvp.py @@ -61,8 +61,8 @@ class TvpVideoGroundingOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None class TvpLoss(nn.Module): diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index 2cc93c673ca1b8..8f5cc91dde6c71 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -614,8 +614,8 @@ class TFXLMWithLMHeadModelOutput(ModelOutput): """ logits: tf.Tensor = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None XLM_START_DOCSTRING = r""" diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index d342cde80d3cf6..2b7265489bdddf 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -297,8 +297,8 @@ class XLMForQuestionAnsweringOutput(ModelOutput): end_top_log_probs: Optional[torch.FloatTensor] = None end_top_index: Optional[torch.LongTensor] = None cls_logits: Optional[torch.FloatTensor] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None XLM_START_DOCSTRING = r""" diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 44a3850a0dba0c..7c5155282b1e86 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -871,8 +871,8 @@ class TFXLNetModelOutput(ModelOutput): last_hidden_state: tf.Tensor = None mems: List[tf.Tensor] | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -908,8 +908,8 @@ class TFXLNetLMHeadModelOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None mems: List[tf.Tensor] | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -942,8 +942,8 @@ class TFXLNetForSequenceClassificationOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None mems: List[tf.Tensor] | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -976,8 +976,8 @@ class TFXLNetForTokenClassificationOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None mems: List[tf.Tensor] | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -1012,8 +1012,8 @@ class TFXLNetForMultipleChoiceOutput(ModelOutput): loss: tf.Tensor | None = None logits: tf.Tensor = None mems: List[tf.Tensor] | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None @dataclass @@ -1049,8 +1049,8 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput): start_logits: tf.Tensor = None end_logits: tf.Tensor = None mems: List[tf.Tensor] | None = None - hidden_states: Tuple[tf.Tensor] | None = None - attentions: Tuple[tf.Tensor] | None = None + hidden_states: Tuple[tf.Tensor, ...] | None = None + attentions: Tuple[tf.Tensor, ...] | None = None XLNET_START_DOCSTRING = r""" diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index 9e06c3e5dba14c..022d63fa6ed842 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -605,8 +605,8 @@ class XLNetModelOutput(ModelOutput): last_hidden_state: torch.FloatTensor mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -642,8 +642,8 @@ class XLNetLMHeadModelOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -676,8 +676,8 @@ class XLNetForSequenceClassificationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -710,8 +710,8 @@ class XLNetForTokenClassificationOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -746,8 +746,8 @@ class XLNetForMultipleChoiceOutput(ModelOutput): loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -783,8 +783,8 @@ class XLNetForQuestionAnsweringSimpleOutput(ModelOutput): start_logits: torch.FloatTensor = None end_logits: torch.FloatTensor = None mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None @dataclass @@ -831,8 +831,8 @@ class XLNetForQuestionAnsweringOutput(ModelOutput): end_top_index: Optional[torch.LongTensor] = None cls_logits: Optional[torch.FloatTensor] = None mems: Optional[List[torch.FloatTensor]] = None - hidden_states: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None + hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None + attentions: Optional[Tuple[torch.FloatTensor, ...]] = None XLNET_START_DOCSTRING = r"""