Skip to content

Commit

Permalink
Code reviews
Browse files Browse the repository at this point in the history
Signed-off-by: Piotr Żelasko <petezor@gmail.com>
  • Loading branch information
pzelasko committed Mar 4, 2024
1 parent 7345806 commit bfcb6cf
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
4 changes: 4 additions & 0 deletions examples/asr/transcribe_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis
else:
cfg.decoding = cfg.rnnt_decoding

remove_path_after_done = None
if isinstance(asr_model, EncDecMultiTaskModel):
# Special case for EncDecMultiTaskModel, where the input manifest is directly passed into the model's transcribe() function
partial_audio = False
Expand All @@ -332,6 +333,7 @@ def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis
for item in read_and_maybe_sort_manifest(cfg.dataset_manifest, try_sort=True):
print(json.dumps(item), file=f)
cfg.dataset_manifest = f.name
remove_path_after_done = f.name
filepaths = cfg.dataset_manifest
else:
# prepare audio filepaths and decide wether it's partial audio
Expand Down Expand Up @@ -394,6 +396,8 @@ def autocast(dtype=None):
logging.info(f"Finished transcribing from manifest file: {cfg.dataset_manifest}")
if cfg.presort_manifest:
transcriptions = restore_transcription_order(cfg.dataset_manifest, transcriptions)
if remove_path_after_done is not None:
os.unlink(remove_path_after_done)
else:
logging.info(f"Finished transcribing {len(filepaths)} files !")
logging.info(f"Writing transcriptions into file: {cfg.output_filename}")
Expand Down
11 changes: 5 additions & 6 deletions nemo/collections/asr/parts/utils/transcribe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,16 +282,14 @@ def prepare_audio_data(cfg: DictConfig) -> Tuple[List[str], bool]:
logging.error(f"The input dataset_manifest {cfg.dataset_manifest} is empty. Exiting!")
return None

has_two_fields = []
all_entries_have_offset_and_duration = True
for item in read_and_maybe_sort_manifest(cfg.dataset_manifest, try_sort=cfg.presort_manifest):
if "offset" in item and "duration" in item:
has_two_fields.append(True)
else:
has_two_fields.append(False)
if not ("offset" in item and "duration" in item):
all_entries_have_offset_and_duration = False
audio_key = cfg.get('audio_key', 'audio_filepath')
audio_file = get_full_path(audio_file=item[audio_key], manifest_file=cfg.dataset_manifest)
filepaths.append(audio_file)
partial_audio = all(has_two_fields)
partial_audio = all_entries_have_offset_and_duration
logging.info(f"\nTranscribing {len(filepaths)} files...\n")

return filepaths, partial_audio
Expand All @@ -312,6 +310,7 @@ def restore_transcription_order(manifest_path: str, transcriptions: list) -> lis
if not all("duration" in item[1] for item in items):
return transcriptions
new2old = [item[0] for item in sorted(items, reverse=True, key=lambda it: it[1]["duration"])]
del items # free up some memory
is_list = isinstance(transcriptions[0], list)
if is_list:
transcriptions = list(zip(*transcriptions))
Expand Down

0 comments on commit bfcb6cf

Please sign in to comment.