diff --git a/examples/deebert/src/modeling_highway_bert.py b/examples/deebert/src/modeling_highway_bert.py index 90f2e5bdb64a3c..fb3393dca66410 100644 --- a/examples/deebert/src/modeling_highway_bert.py +++ b/examples/deebert/src/modeling_highway_bert.py @@ -65,7 +65,7 @@ def forward( hidden_states = layer_outputs[0] if self.output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) current_outputs = (hidden_states,) if self.output_hidden_states: diff --git a/src/transformers/modeling_longformer.py b/src/transformers/modeling_longformer.py index 49b3df9c51003c..754f312d1f0b71 100755 --- a/src/transformers/modeling_longformer.py +++ b/src/transformers/modeling_longformer.py @@ -917,7 +917,7 @@ def custom_forward(*inputs): hidden_states = layer_outputs[0] if output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) # Add last layer if output_hidden_states: diff --git a/src/transformers/modeling_mobilebert.py b/src/transformers/modeling_mobilebert.py index 0ec1039222cda5..4ed636251a23b0 100644 --- a/src/transformers/modeling_mobilebert.py +++ b/src/transformers/modeling_mobilebert.py @@ -577,7 +577,7 @@ def forward( hidden_states = layer_outputs[0] if output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) # Add last layer if output_hidden_states: diff --git a/src/transformers/modeling_tf_bert.py b/src/transformers/modeling_tf_bert.py index c362b9ed2e5e6b..b17540eba49097 100644 --- a/src/transformers/modeling_tf_bert.py +++ b/src/transformers/modeling_tf_bert.py @@ -404,7 +404,7 @@ def call( hidden_states = layer_outputs[0] if output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) # Add last layer if output_hidden_states: diff --git a/src/transformers/modeling_tf_electra.py b/src/transformers/modeling_tf_electra.py index 7eaa2b0432c31b..6744194e0f5726 100644 --- a/src/transformers/modeling_tf_electra.py +++ b/src/transformers/modeling_tf_electra.py @@ -257,7 +257,7 @@ def call( hidden_states = layer_outputs[0] if output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) # Add last layer if output_hidden_states: diff --git a/src/transformers/modeling_tf_mobilebert.py b/src/transformers/modeling_tf_mobilebert.py index e97eda208e3e4f..c7b122f8c1948a 100644 --- a/src/transformers/modeling_tf_mobilebert.py +++ b/src/transformers/modeling_tf_mobilebert.py @@ -587,7 +587,7 @@ def call( hidden_states = layer_outputs[0] if output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) # Add last layer if output_hidden_states: diff --git a/src/transformers/modeling_tf_roberta.py b/src/transformers/modeling_tf_roberta.py index 8841d377206926..f3a805a30aad9a 100644 --- a/src/transformers/modeling_tf_roberta.py +++ b/src/transformers/modeling_tf_roberta.py @@ -447,7 +447,7 @@ def call( hidden_states = layer_outputs[0] if output_attentions: - all_attentions = all_attentions + layer_outputs[1:] + all_attentions = all_attentions + (layer_outputs[1],) # Add last layer if output_hidden_states: