Skip to content

Commit

Permalink
Compute dropout_probability only in training mode (SpeechT5) (huggi…
Browse files Browse the repository at this point in the history
…ngface#24498)

fix

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
  • Loading branch information
ydshieh and ydshieh committed Jun 26, 2023
1 parent c9fd498 commit 7b4e3b5
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions src/transformers/models/speecht5/modeling_speecht5.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,9 +1380,11 @@ def forward(
all_hidden_states = all_hidden_states + (hidden_states,)

# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = torch.rand([])
skip_the_layer = False
if self.training:
dropout_probability = torch.rand([])
skip_the_layer = dropout_probability < self.layerdrop

skip_the_layer = self.training and (dropout_probability < self.layerdrop)
if not skip_the_layer or deepspeed_zero3_is_enabled:
# under deepspeed zero3 all gpus must run in sync
if self.gradient_checkpointing and self.training:
Expand Down Expand Up @@ -1705,9 +1707,10 @@ def forward(
all_hidden_states = all_hidden_states + (hidden_states,)

# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
dropout_probability = torch.rand([])

skip_the_layer = self.training and (dropout_probability < self.layerdrop)
skip_the_layer = False
if self.training:
dropout_probability = torch.rand([])
skip_the_layer = dropout_probability < self.layerdrop
if skip_the_layer and not deepspeed_zero3_is_enabled:
continue

Expand Down

0 comments on commit 7b4e3b5

Please sign in to comment.