We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bd52d61 commit 8bc3475Copy full SHA for 8bc3475
src/transformers/models/openai_moe/modeling_openai_moe.py
@@ -242,7 +242,7 @@ def eager_attention_forward(
242
scores = unnormalized_scores / normalizer
243
244
attn_weights = nn.functional.dropout(scores, p=dropout, training=module.training)
245
- attn_output = torch.matmul(attn_weights[..., :-1], value_states) # ignore the sinks
+ attn_output = torch.matmul(attn_weights, value_states) # ignore the sinks
246
attn_output = attn_output.transpose(1, 2).contiguous()
247
return attn_output, attn_weights
248
0 commit comments