diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index b33691d646234c..ca41ec2a22db69 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -1037,7 +1037,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 21a6eaab595265..75aadf2d90a9ce 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -1528,7 +1528,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index 45a4ad76b57371..3d5e443e1cf5c0 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -2671,7 +2671,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index f5b23e46005ff5..b6ac5abc02866a 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -1023,7 +1023,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index b3cb1a93cced3a..3dc968cdf04f4c 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -642,7 +642,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 5229054ff76616..4d8479942eda61 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -964,7 +964,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index 890a620ed41225..428ce54fff406c 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -1298,7 +1298,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index d1ab71bb7ad724..4aa6f5568714fd 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1872,7 +1872,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 8f50c6d6f0f905..a37f3e283345f7 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -1279,7 +1279,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 4beca117a6855b..c19ac5265a2b51 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -2445,7 +2445,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index cf535a719c8bdf..c3503c292a1996 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -1178,7 +1178,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index 462c8fb376261b..7fb76f0328db91 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -798,7 +798,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index 8dc0d208d16097..bcf08ae4109d33 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -847,7 +847,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index fa562c5f344991..6f0eaa3f8ce7e1 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -1562,7 +1562,10 @@ def forward( if self.config.problem_type == "regression": loss_fct = MSELoss() - loss = loss_fct(logits.view(-1, self.num_labels), labels) + if self.num_labels == 1: + loss = loss_fct(logits.squeeze(), labels.squeeze()) + else: + loss = loss_fct(logits, labels) elif self.config.problem_type == "single_label_classification": loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 837e267bdda6f3..493cf7d55530c6 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -20,6 +20,7 @@ import random import tempfile import unittest +import warnings from typing import List, Tuple from huggingface_hub import HfApi @@ -1462,7 +1463,14 @@ def test_problem_types(self): inputs["labels"] = inputs["labels"].to(problem_type["dtype"]) - loss = model(**inputs).loss + # This tests that we do not trigger the warning form PyTorch "Using a target size that is different + # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure + # they have the same size." which is a symptom something in wrong for the regression problem. + # See https://github.com/huggingface/transformers/issues/11780 + with warnings.catch_warnings(record=True) as warning_list: + loss = model(**inputs).loss + self.assertListEqual(warning_list, []) + loss.backward()