Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FastConformer Hybrid ASR models for EN, ES, IT, DE, PL, HR, UA, BY #6549

Merged
merged 4 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/asr/data/benchmark_by.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Model,Model Base Class,Model Card
stt_by_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_by_fastconformer_hybrid_large_pc"
1 change: 1 addition & 0 deletions docs/source/asr/data/benchmark_de.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ stt_de_citrinet_1024,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidi
stt_de_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_contextnet_1024"
stt_de_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_conformer_ctc_large"
stt_de_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_conformer_transducer_large"
stt_de_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_fastconformer_hybrid_large_pc"
5 changes: 4 additions & 1 deletion docs/source/asr/data/benchmark_en.csv
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ stt_en_conformer_transducer_small,EncDecRNNTBPEModel,"https://ngc.nvidia.com/cat
stt_en_conformer_transducer_medium,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_medium"
stt_en_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large"
stt_en_conformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xlarge"
stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge"
stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge"
stt_en_fastconformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large"
stt_en_fastconformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large"
stt_en_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc"
3 changes: 2 additions & 1 deletion docs/source/asr/data/benchmark_es.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ stt_es_citrinet_512,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvi
stt_es_citrinet_1024_gamma_0_25,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_citrinet_1024_gamma_0_25"
stt_es_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_conformer_ctc_large"
stt_es_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_conformer_transducer_large"
stt_es_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_contextnet_1024"
stt_es_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_contextnet_1024"
stt_es_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_fastconformer_hybrid_large_pc"
1 change: 1 addition & 0 deletions docs/source/asr/data/benchmark_hr.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Model,Model Base Class,Model Card
stt_hr_conformer_ctc_large,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_ctc_large"
stt_hr_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_transducer_large"
stt_hr_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_fastconformer_hybrid_large_pc"
2 changes: 1 addition & 1 deletion docs/source/asr/data/benchmark_it.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Model,Model Base Class,Model Card
stt_it_quartznet15x5,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_it_quartznet15x5"

stt_it_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_it_fastconformer_hybrid_large_pc"
1 change: 1 addition & 0 deletions docs/source/asr/data/benchmark_pl.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Model,Model Base Class,Model Card
stt_pl_quartznet15x5,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_pl_quartznet15x5"
stt_pl_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_pl_fastconformer_hybrid_large_pc"
2 changes: 2 additions & 0 deletions docs/source/asr/data/benchmark_ua.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Model,Model Base Class,Model Card
stt_ua_fastconformer_hybrid_large_pc,EncDecHybridRNNTCTCBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_ua_fastconformer_hybrid_large_pc"
19 changes: 19 additions & 0 deletions docs/source/asr/results.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,22 @@ Kinyarwanda
:widths: 40, 10, 50
:header-rows: 1

-----------------------------

Belarusian
^^^^^^^^^^^
.. csv-table::
:file: data/benchmark_by.csv
:align: left
:widths: 40, 10, 50
:header-rows: 1

-----------------------------

Ukrainian
^^^^^^^^^^^
.. csv-table::
:file: data/benchmark_ua.csv
:align: left
:widths: 40, 10, 50
:header-rows: 1
7 changes: 7 additions & 0 deletions nemo/collections/asr/models/ctc_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,4 +597,11 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_en_fastconformer_ctc_large",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_ctc_large",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_ctc_large/versions/1.0.0/files/stt_en_fastconformer_ctc_large.nemo",
)
results.append(model)

return results
61 changes: 59 additions & 2 deletions nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import copy
import os
from typing import Dict, Optional, Union
from typing import Dict, List, Optional, Union

import torch
from omegaconf import DictConfig, ListConfig, OmegaConf, open_dict
Expand Down Expand Up @@ -454,12 +454,69 @@ def change_decoding_strategy(self, decoding_cfg: DictConfig = None, decoder_type
raise ValueError(f"decoder_type={decoder_type} is not supported. Supported values: [ctc,rnnt]")

@classmethod
def list_available_models(cls) -> Optional[PretrainedModelInfo]:
def list_available_models(cls) -> List[PretrainedModelInfo]:
"""
This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud.

Returns:
List of available pre-trained models.
"""
results = []

model = PretrainedModelInfo(
pretrained_model_name="stt_en_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_en_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_de_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_de_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_de_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_it_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_it_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_it_fastconformer_hybrid_large_pc/versions/1.18/files/stt_it_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_es_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_es_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_es_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_es_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_hr_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_hr_fastconformer_hybrid_large_pc/versions/1.18.0/files/FastConformer-Hybrid-Transducer-CTC-BPE-v256-averaged.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_ua_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_ua_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_ua_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_ua_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_pl_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_pl_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_pl_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_pl_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_by_fastconformer_hybrid_large_pc",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_by_fastconformer_hybrid_large_pc",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_by_fastconformer_hybrid_large_pc/versions/1.18.0/files/stt_by_fastconformer_hybrid_large_pc.nemo",
)
results.append(model)

return results
7 changes: 7 additions & 0 deletions nemo/collections/asr/models/rnnt_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,13 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_en_fastconformer_transducer_large",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_fastconformer_transducer_large",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_fastconformer_transducer_large/versions/1.0.0/files/stt_en_fastconformer_transducer_large.nemo",
)
results.append(model)

return results

def __init__(self, cfg: DictConfig, trainer: Trainer = None):
Expand Down