Skip to content

Commit

Permalink
Apply isort and black reformatting
Browse files Browse the repository at this point in the history
Signed-off-by: nithinraok <nithinraok@users.noreply.github.com>
  • Loading branch information
nithinraok committed Nov 9, 2024
1 parent b4a2ff1 commit e1ed2e6
Show file tree
Hide file tree
Showing 14 changed files with 88 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@
@dataclass
class TranscriptionConfig:
"""
Transcription config
Transcription config
"""

# Required configs
model_path: Optional[str] = None # Path to a .nemo file
pretrained_name: Optional[str] = None # Name of a pretrained model
Expand Down Expand Up @@ -121,7 +122,7 @@ class TranscriptionConfig:
@hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
"""
Transcribes the input audio and can be used to infer long audio files by chunking
Transcribes the input audio and can be used to infer long audio files by chunking
them into smaller segments.
"""
logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class TranscriptionConfig:
"""
Transcription Configuration for buffered inference.
"""

# Required configs
model_path: Optional[str] = None # Path to a .nemo file
pretrained_name: Optional[str] = None # Name of a pretrained model
Expand Down Expand Up @@ -117,7 +118,7 @@ class TranscriptionConfig:
@hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
"""
Transcribes the input audio and can be used to infer long audio files by chunking
Transcribes the input audio and can be used to infer long audio files by chunking
them into smaller segments.
"""
logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class TranscriptionConfig:
"""
Transcription Configuration for buffered inference.
"""

# Required configs
model_path: Optional[str] = None # Path to a .nemo file
pretrained_name: Optional[str] = None # Name of a pretrained model
Expand Down Expand Up @@ -147,7 +148,7 @@ class TranscriptionConfig:
@hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
"""
Transcribes the input audio and can be used to infer long audio files by chunking
Transcribes the input audio and can be used to infer long audio files by chunking
them into smaller segments.
"""
logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')
Expand Down
2 changes: 2 additions & 0 deletions examples/asr/speech_translation/translate_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class ModelChangeConfig:
"""
Sub-config for changes specific to the Conformer Encoder
"""

conformer: ConformerChangeConfig = ConformerChangeConfig()


Expand All @@ -75,6 +76,7 @@ class TranslationConfig:
"""
Translation Configuration for audio to text translation.
"""

# Required configs
model_path: Optional[str] = None # Path to a .nemo file
pretrained_name: Optional[str] = None # Name of a pretrained model
Expand Down
4 changes: 3 additions & 1 deletion examples/asr/transcribe_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class ModelChangeConfig:
"""
Sub-config for changes specific to the Conformer Encoder
"""

conformer: ConformerChangeConfig = field(default_factory=ConformerChangeConfig)


Expand All @@ -114,6 +115,7 @@ class TranscriptionConfig:
"""
Transcription Configuration for audio to text transcription.
"""

# Required configs
model_path: Optional[str] = None # Path to a .nemo file
pretrained_name: Optional[str] = None # Name of a pretrained model
Expand Down Expand Up @@ -170,7 +172,7 @@ class TranscriptionConfig:
# Implicit single-turn assuming default role='user' (works with Canary-1B)
# +prompt.source_lang=en +prompt.target_lang=es +prompt.task=asr +prompt.pnc=yes
# Explicit single-turn prompt:
# +prompt.role=user +prompt.slots.source_lang=en +prompt.slots.target_lang=es
# +prompt.role=user +prompt.slots.source_lang=en +prompt.slots.target_lang=es
# +prompt.slots.task=s2t_translation +prompt.slots.pnc=yes
# Explicit multi-turn prompt:
# +prompt.turns='[{role:user,slots:{source_lang:en,target_lang:es,task:asr,pnc:yes}}]'
Expand Down
10 changes: 5 additions & 5 deletions nemo/collections/asr/models/aed_multitask_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):

self.val_loss = GlobalAverageLossMetric(dist_sync_on_step=False, take_avg_loss=True)

# TODO: PytorchMetrics lets you join two metrics together to save compute.
# TODO: PytorchMetrics lets you join two metrics together to save compute.
# But need to make wer and bleu have same outputs first
self.wer = WER(self.decoding, log_prediction=self.cfg.get("log_prediction"))
self.bleu = BLEU(
Expand Down Expand Up @@ -273,14 +273,14 @@ def change_vocabulary(
prompt_format: Optional[str] = None,
):
"""
Changes vocabulary used during AED decoding process. Use this method when fine-tuning on
Changes vocabulary used during AED decoding process. Use this method when fine-tuning on
from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
modules unchanged. For example, you would use it if you want to use pretrained encoder when
fine-tuning on data in another language, or when you'd need model to learn capitalization,
modules unchanged. For example, you would use it if you want to use pretrained encoder when
fine-tuning on data in another language, or when you'd need model to learn capitalization,
punctuation and/or special characters.
Args:
new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
(if the tokenizer type is `agg`)
new_tokenizer_type: Type of tokenizer. Can be either `agg`, `bpe` or `wpe`.
decoding_cfg: A config for the decoding, which is optional. If the decoding type
Expand Down
8 changes: 4 additions & 4 deletions nemo/collections/asr/models/ctc_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,13 @@ def change_vocabulary(
"""
Changes vocabulary of the tokenizer used during CTC decoding process.
Use this method when fine-tuning on from pre-trained model.
This method changes only decoder and leaves encoder and pre-processing modules unchanged.
For example, you would use it if you want to use pretrained encoder when fine-tuning on a
data in another language, or when you'd need model to learn capitalization, punctuation
This method changes only decoder and leaves encoder and pre-processing modules unchanged.
For example, you would use it if you want to use pretrained encoder when fine-tuning on a
data in another language, or when you'd need model to learn capitalization, punctuation
and/or special characters.
Args:
new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
(if the tokenizer type is `agg`)
new_tokenizer_type: Either `agg`, `bpe` or `wpe`. `bpe` is used for SentencePiece tokenizers,
whereas `wpe` is used for `BertTokenizer`.
Expand Down
8 changes: 4 additions & 4 deletions nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,10 @@ def change_vocabulary(
ctc_decoding_cfg: Optional[DictConfig] = None,
):
"""
Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning on
from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
modules unchanged. For example, you would use it if you want to use pretrained encoder when
fine-tuning on data in another language, or when you'd need model to learn capitalization,
Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning on
from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
modules unchanged. For example, you would use it if you want to use pretrained encoder when
fine-tuning on data in another language, or when you'd need model to learn capitalization,
punctuation and/or special characters.
Args:
Expand Down
10 changes: 5 additions & 5 deletions nemo/collections/asr/models/rnnt_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,14 +344,14 @@ def change_vocabulary(
decoding_cfg: Optional[DictConfig] = None,
):
"""
Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning
on from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
modules unchanged. For example, you would use it if you want to use pretrained encoder when fine-tuning
on data in another language, or when you'd need model to learn capitalization, punctuation
Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning
on from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
modules unchanged. For example, you would use it if you want to use pretrained encoder when fine-tuning
on data in another language, or when you'd need model to learn capitalization, punctuation
and/or special characters.
Args:
new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
(if the tokenizer type is `agg`)
new_tokenizer_type: Type of tokenizer. Can be either `agg`, `bpe` or `wpe`.
decoding_cfg: A config for the decoder, which is optional. If the decoding type
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/asr/modules/conv_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ class SpeakerDecoder(NeuralModule, Exportable):
Args:
feat_in (int): Number of channels being input to this module
num_classes (int): Number of unique speakers in dataset
emb_sizes (list) : shapes of intermediate embedding layers (we consider speaker embbeddings
emb_sizes (list) : shapes of intermediate embedding layers (we consider speaker embbeddings
from 1st of this layers). Defaults to [1024,1024]
pool_mode (str) : Pooling strategy type. options are 'xvector','tap', 'attention'
Defaults to 'xvector (mean and variance)'
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/asr/parts/mixins/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ def transcribe(
to `None`. Defaults to `None`. Uses zero-based indexing.
augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied.
verbose: (bool) whether to display tqdm progress bar
timestamps: Optional(Bool): timestamps will be returned if set to True as part of hypothesis object
(output.timestep['segment']/output.timestep['word']). Refer to `Hypothesis` class for more details.
timestamps: Optional(Bool): timestamps will be returned if set to True as part of hypothesis object
(output.timestep['segment']/output.timestep['word']). Refer to `Hypothesis` class for more details.
Default is None and would retain the previous state set by using self.change_decoding_strategy().
override_config: (Optional[TranscribeConfig]) override transcription config pre-defined by the user.
**Note**: All other arguments in the function will be ignored if override_config is passed.
Expand Down
Loading

0 comments on commit e1ed2e6

Please sign in to comment.