Compute dropout_probability only in training mode (SpeechT5) (huggi…

…ngface#24498) fix Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
novice03 · Jun 26, 2023 · 7b4e3b5 · 7b4e3b5
1 parent c9fd498
commit 7b4e3b5
Showing 1 changed file with 8 additions and 5 deletions.
diff --git a/src/transformers/models/speecht5/modeling_speecht5.py b/src/transformers/models/speecht5/modeling_speecht5.py
@@ -1380,9 +1380,11 @@ def forward(
  all_hidden_states = all_hidden_states + (hidden_states,)
 
  # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
- dropout_probability = torch.rand([])
+ skip_the_layer = False
+ if self.training:
+ dropout_probability = torch.rand([])
+ skip_the_layer = dropout_probability < self.layerdrop
 
- skip_the_layer = self.training and (dropout_probability < self.layerdrop)
  if not skip_the_layer or deepspeed_zero3_is_enabled:
  # under deepspeed zero3 all gpus must run in sync
  if self.gradient_checkpointing and self.training:
@@ -1705,9 +1707,10 @@ def forward(
  all_hidden_states = all_hidden_states + (hidden_states,)
 
  # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
- dropout_probability = torch.rand([])
-
- skip_the_layer = self.training and (dropout_probability < self.layerdrop)
+ skip_the_layer = False
+ if self.training:
+ dropout_probability = torch.rand([])
+ skip_the_layer = dropout_probability < self.layerdrop
  if skip_the_layer and not deepspeed_zero3_is_enabled:
  continue