[ASR] Fix GPU memory leak in transcribe_speech.py (#7249)

Signed-off-by: Ryan <rlangman@nvidia.com>
NVIDIA · Aug 17, 2023 · b7c8ef1 · b7c8ef1
1 parent 2a5ecce
commit b7c8ef1
Showing 1 changed file with 6 additions and 1 deletion.
diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py
@@ -421,13 +421,18 @@ def transcribe_partial_audio(
                 input_signal=test_batch[0].to(device), input_signal_length=test_batch[1].to(device)
             )
             logits, logits_len = outputs[0], outputs[1]
+
             if isinstance(asr_model, EncDecHybridRNNTCTCModel) and decoder_type == "ctc":
                 logits = asr_model.ctc_decoder(encoder_output=logits)
+
+            logits = logits.cpu()
+
             if logprobs:
+                logits = logits.numpy()
                 # dump log probs per file
                 for idx in range(logits.shape[0]):
                     lg = logits[idx][: logits_len[idx]]
-                    hypotheses.append(lg.cpu().numpy())
+                    hypotheses.append(lg)
             else:
                 current_hypotheses, all_hyp = decode_function(logits, logits_len, return_hypotheses=return_hypotheses,)