Merge pull request huggingface#18 from stevezheng23/dev/zheng/quac

fix at issues in roberta/berta modeling (cont.)
stevezheng23 · Oct 30, 2019 · 245834d · 245834d
2 parents 97c6ac9 + c7e3cae
commit 245834d
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/transformers/modeling_roberta.py b/transformers/modeling_roberta.py
@@ -1004,11 +1004,11 @@ def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_
 
                 at_span_loss.backward()
                 at_perturb_grads = at_perturbs.grad.detach()
-                at_perturb_grads /= torch.norm(at_perturb_grads, p='fro', dim=-1)
+                at_perturb_grads /= torch.norm(at_perturb_grads, p='fro', dim=-1, keepdim=True)
                 at_updated_perturbs = at_perturbs + self.at_alpha * at_perturb_grads
                 at_updated_perturbs = torch.clamp(at_updated_perturbs, min=-self.at_epsilon, max=self.at_epsilon)
                 at_perturbs.data = at_updated_perturbs
-                at_perturbs.grad.zero()
+                at_perturbs.grad.zero_()
 
             at_outputs = self.roberta(input_ids,
                                       attention_mask=attention_mask,