From d91bbbcdd1c12f411942cfd40892af92ae81e4bb Mon Sep 17 00:00:00 2001 From: Arthur Zucker Date: Mon, 14 Oct 2024 11:14:02 +0200 Subject: [PATCH] revert bad changes --- scripts/deberta_scrtipt.py | 85 ------------------- scripts/from transformers import pipeline.py | 10 --- src/transformers/modeling_utils.py | 4 +- .../models/deberta/modeling_deberta.py | 7 +- src/transformers/trainer.py | 4 +- 5 files changed, 10 insertions(+), 100 deletions(-) delete mode 100644 scripts/deberta_scrtipt.py delete mode 100644 scripts/from transformers import pipeline.py diff --git a/scripts/deberta_scrtipt.py b/scripts/deberta_scrtipt.py deleted file mode 100644 index bfe540663ce069..00000000000000 --- a/scripts/deberta_scrtipt.py +++ /dev/null @@ -1,85 +0,0 @@ -import torch -from transformers import pipeline, AutoTokenizer, AutoModel, AutoModelForMaskedLM -import time - - -model = AutoModelForMaskedLM.from_pretrained("microsoft/deberta-base") - -test_sentence = 'Do you [MASK] the muffin man?' - -# for comparison -bert = pipeline('fill-mask', model = 'bert-base-uncased') -print('\n'.join([d['sequence'] for d in bert(test_sentence)])) - - -deberta = pipeline('fill-mask', model = 'microsoft/deberta-base') -print('\n'.join([d['sequence'] for d in deberta(test_sentence)])) - - -tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-base") - -tokenized_dict = tokenizer( - ["Is this working",], ["Not yet",], - return_tensors="pt" -) - -deberta.model.forward = torch.compile(deberta.model.forward) -start=time.time() -deberta.model(**tokenized_dict) -end=time.time() -print(end-start) - - -start=time.time() -deberta.model(**tokenized_dict) -end=time.time() -print(end-start) - - -start=time.time() -deberta.model(**tokenized_dict) -end=time.time() -print(end-start) - - -model = AutoModel.from_pretrained('microsoft/deberta-base') -model.config.return_dict = False -model.config.output_hidden_states=False -input_tuple = (tokenized_dict['input_ids'], tokenized_dict['attention_mask']) - - -start=time.time() -traced_model = torch.jit.trace(model, input_tuple) -end=time.time() -print(end-start) - - -start=time.time() -traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) -end=time.time() -print(end-start) - - -start=time.time() -traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) -end=time.time() -print(end-start) - - -start=time.time() -traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) -end=time.time() -print(end-start) - - -start=time.time() -traced_model(tokenized_dict['input_ids'], tokenized_dict['attention_mask']) -end=time.time() -print(end-start) - - -torch.jit.save(traced_model, "compiled_deberta.pt") - - - -# my_script_module = torch.jit.script(model) diff --git a/scripts/from transformers import pipeline.py b/scripts/from transformers import pipeline.py deleted file mode 100644 index 356add2dba8b09..00000000000000 --- a/scripts/from transformers import pipeline.py +++ /dev/null @@ -1,10 +0,0 @@ -from transformers import pipeline -test_sentence = 'Do you [MASK] the muffin man?' - -# for comparison -bert = pipeline('fill-mask', model = 'bert-base-uncased') -print('\n'.join([d['sequence'] for d in bert(test_sentence)])) - - -deberta = pipeline('fill-mask', model = 'microsoft/deberta-v3-large') -print('\n'.join([d['sequence'] for d in deberta(test_sentence)])) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index f9552ff376d167..926e26a10b5c52 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1643,11 +1643,11 @@ def _set_default_torch_dtype(cls, dtype: torch.dtype) -> torch.dtype: return dtype_orig @property - def base_model(self): + def base_model(self) -> nn.Module: """ `torch.nn.Module`: The main body of the model. """ - return getattr(self, "base_model_prefix", self) + return getattr(self, self.base_model_prefix, self) @classmethod def can_generate(cls) -> bool: diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index d0bea548d411ea..0c8906f14cdbdc 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -930,7 +930,12 @@ def forward(self, sequence_output, word_embeddings): @add_start_docstrings("""DeBERTa Model with a `language modeling` head on top.""", DEBERTA_START_DOCSTRING) class DebertaForMaskedLM(DebertaPreTrainedModel): - _tied_weights_keys = ["cls.predictions.decoder.weight", "cls.predictions.decoder.bias"] + _tied_weights_keys = [ + "cls.predictions.decoder.weight", + "cls.predictions.decoder.bias", + "deberta.embeddings.word_embeddings.weight", + "lm_predictions.lm_head.weight", + ] def __init__(self, config): super().__init__(config) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 2c2397a0f54b71..20b9f6dad231d1 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3512,8 +3512,8 @@ def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, `torch.Tensor`: The tensor with training loss on this batch. """ model.train() - # if hasattr(self.optimizer, "train") and callable(self.optimizer.train): - # self.optimizer.train() + if hasattr(self.optimizer, "train") and callable(self.optimizer.train): + self.optimizer.train() inputs = self._prepare_inputs(inputs) if is_sagemaker_mp_enabled():