Skip to content

Commit

Permalink
Fix missing sequences_scores in the Whisper beam search output (hug…
Browse files Browse the repository at this point in the history
…gingface#32970)

* added sequences_scores to the output

* added beam_indices to output

* added test to check for beam_indices, sequences_scores and their shape

* removed redundant whitespaces

* make fixup
  • Loading branch information
Nik-Kras authored and amyeroberts committed Oct 2, 2024
1 parent 804b921 commit 54e4c94
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
10 changes: 6 additions & 4 deletions src/transformers/models/whisper/generation_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,21 +1000,23 @@ def _stack_split_outputs(self, seek_outputs, model_output_type, device, kwargs):
# Stack back seek_outputs tensors after splitting them with the split_by_batch_index method
outputs = {}
for key in seek_outputs[0].keys():
if key == "sequences":
if key in ["sequences", "beam_indices"]:
outputs[key] = torch.stack([v[key] for v in seek_outputs], dim=0).to(device)
if key in ["scores", "encoder_attentions", "encoder_hidden_states", "logits"]:
elif key in ["scores", "encoder_attentions", "encoder_hidden_states", "logits"]:
outputs[key] = tuple(
torch.stack([v[key][i] for v in seek_outputs]).to(device) for i in range(len(seek_outputs[0][key]))
)
if key in ["decoder_attentions", "decoder_hidden_states", "cross_attentions"]:
elif key == "sequences_scores":
outputs[key] = torch.stack([v[key] for v in seek_outputs], dim=0).to(device)
elif key in ["decoder_attentions", "decoder_hidden_states", "cross_attentions"]:
outputs[key] = tuple(
tuple(
torch.stack([v[key][i][j] for v in seek_outputs]).squeeze(1).to(device)
for j in range(len(seek_outputs[0][key][0]))
)
for i in range(len(seek_outputs[0][key]))
)
if key == "past_key_values":
elif key == "past_key_values":
past_key_value_type = kwargs.get("past_key_values")
if seek_outputs[0][key] is not None:
outputs[key] = tuple(
Expand Down
19 changes: 19 additions & 0 deletions tests/models/whisper/test_modeling_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,25 @@ def test_inputs_embeds(self):
with torch.no_grad():
model(**inputs)[0]

def test_beam_search_output(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
model = WhisperForConditionalGeneration(config).to(torch_device).eval()

input_features = input_dict["input_features"]

# Perform beam search
output = model.generate(
input_features, num_beams=3, num_return_sequences=3, return_dict_in_generate=True, output_scores=True
)

# Check if beam_indices and sequences_scores are in the output
self.assertIn("beam_indices", output, "beam_indices not found in the output")
self.assertIn("sequences_scores", output, "sequences_scores not found in the output")

# Validate the shapes of the beam_indices and sequences_scores
self.assertEqual(output.beam_indices.shape[0], input_features.shape[0] * 3)
self.assertEqual(output.sequences_scores.shape[0], input_features.shape[0] * 3)

# training is not supported yet
@unittest.skip(reason="Training is not supported yet")
def test_training(self):
Expand Down

0 comments on commit 54e4c94

Please sign in to comment.