diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 800d91acb7ed..2021c7d93136 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -131,16 +131,16 @@ jobs: ### \'\' # L0: GPU unit tests - OPTIONAL_L0_Unit_Tests_GPU_ASR: + L0_Unit_Tests_GPU_ASR: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true' + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true' with: RUNNER: self-hosted-azure TIMEOUT: 20 + # TODO: remove this hack SCRIPT: | - NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --with_downloads - IS_OPTIONAL: true + python -c "from nemo.collections.asr.models import ASRModel" && NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --with_downloads L0_Unit_Tests_GPU_Audio: needs: [cicd-test-container-setup] @@ -1212,18 +1212,6 @@ jobs: matmul_precision=medium AFTER_SCRIPT: | rm -rf preds.json - - - # L2: Transducer alignment - OPTIONAL_L2_Transducer_alignment_Running_pytest: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_Transducer_alignment_Running_pytest') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - SCRIPT: | - pytest tests/collections/asr/decoding/rnnt_alignments_check.py --durations=-1 --with_downloads - IS_OPTIONAL: true # L2: Segmentation Tool L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav: @@ -5456,7 +5444,7 @@ jobs: - gpu-test - cicd-test-container-setup - #- OPTIONAL_L0_Unit_Tests_GPU_ASR + - L0_Unit_Tests_GPU_ASR - L0_Unit_Tests_GPU_Audio - L0_Unit_Tests_GPU_Common - L0_Unit_Tests_GPU_LLM @@ -5507,7 +5495,6 @@ jobs: - L2_ASR_Adapters_Linear_Adapters - L2_ASR_Adapters_RelPos_MHA_Adapters - L2_Speech_Transcription_Speech_to_Text_Transcribe - #- OPTIONAL_L2_Transducer_alignment_Running_pytest - L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav - L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3 - L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference diff --git a/nemo/collections/asr/parts/submodules/cuda_graph_rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/cuda_graph_rnnt_greedy_decoding.py index aa49435ded16..fc501b3d00de 100644 --- a/nemo/collections/asr/parts/submodules/cuda_graph_rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/cuda_graph_rnnt_greedy_decoding.py @@ -293,6 +293,13 @@ def __call__( device: torch.device, partial_hypotheses: Optional[List[rnnt_utils.Hypothesis]] = None, ): + if x.device.type != "cuda": + # If CUDA graphs are enabled and "frame-looping" algorithm is requested, current class + # is not suitable to handle non-CUDA inputs; thus we are passing them to original caller + return self.caller._greedy_decode_blank_as_pad_loop_frames( + x=x, out_len=out_len, device=device, partial_hypotheses=partial_hypotheses + ) + if partial_hypotheses is not None: raise NotImplementedError( "`partial_hypotheses` support is not available " diff --git a/tests/collections/asr/decoding/test_cuda_graph_rnnt_greedy_decoding.py b/tests/collections/asr/decoding/test_cuda_graph_rnnt_greedy_decoding.py index 31fe822573ce..4715f4826493 100644 --- a/tests/collections/asr/decoding/test_cuda_graph_rnnt_greedy_decoding.py +++ b/tests/collections/asr/decoding/test_cuda_graph_rnnt_greedy_decoding.py @@ -53,8 +53,8 @@ def stt_en_fastconformer_transducer_large(): 8, True, marks=pytest.mark.xfail( - reason="""Cannot instantiate the -body cuda graph of a conditional node with a persistent kernel (in this case, + reason="""Cannot instantiate the +body cuda graph of a conditional node with a persistent kernel (in this case, a persistent LSTM), which is triggered in cudnn by using a batch size of 8.""" ), ), diff --git a/tests/collections/asr/decoding/rnnt_alignments_check.py b/tests/collections/asr/decoding/test_rnnt_alignments.py similarity index 94% rename from tests/collections/asr/decoding/rnnt_alignments_check.py rename to tests/collections/asr/decoding/test_rnnt_alignments.py index ec0656cbce49..5c43af28b1d4 100644 --- a/tests/collections/asr/decoding/rnnt_alignments_check.py +++ b/tests/collections/asr/decoding/test_rnnt_alignments.py @@ -13,10 +13,6 @@ # limitations under the License. -# NOTE: the file name does not contain "test" on purpose to avoid executing -# these tests outside of the CI machines environment, where test data is -# stored - from pathlib import Path from typing import Union @@ -27,6 +23,7 @@ from nemo.collections.asr.models import EncDecRNNTBPEModel from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest +from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis from nemo.collections.asr.parts.utils.transcribe_utils import prepare_audio_data DEVICES = [] @@ -65,7 +62,7 @@ def get_rnnt_alignments( loop_labels: bool = True, use_cuda_graph_decoder=False, device="cuda", -): +) -> list[Hypothesis]: cfg = OmegaConf.structured(TranscriptionConfig()) cfg.rnnt_decoding.confidence_cfg.preserve_frame_confidence = True cfg.rnnt_decoding.preserve_alignments = True @@ -74,12 +71,13 @@ def get_rnnt_alignments( cfg.rnnt_decoding.greedy.loop_labels = loop_labels cfg.rnnt_decoding.greedy.use_cuda_graph_decoder = use_cuda_graph_decoder cfg.dataset_manifest = str(manifest_path) - filepaths = prepare_audio_data(cfg)[0][:10] # selecting 10 files only + filepaths = prepare_audio_data(cfg)[0][:8] # selecting 8 files only + # NB: 9th file has the same transcription but a bit different alignment for batched/non-batched decoding model = model.to(device) model.change_decoding_strategy(cfg.rnnt_decoding) - transcriptions = model.transcribe( + transcriptions: list[Hypothesis] = model.transcribe( audio=filepaths, batch_size=cfg.batch_size, num_workers=cfg.num_workers,