Apply isort and black reformatting

Signed-off-by: nithinraok <nithinraok@users.noreply.github.com>
NVIDIA · Nov 9, 2024 · e1ed2e6 · e1ed2e6
1 parent b4a2ff1
commit e1ed2e6
Show file tree

Hide file tree

Showing 14 changed files with 88 additions and 81 deletions.
diff --git a/examples/asr/asr_chunked_inference/aed/speech_to_text_aed_chunked_infer.py b/examples/asr/asr_chunked_inference/aed/speech_to_text_aed_chunked_infer.py
@@ -69,8 +69,9 @@
 @dataclass
 class TranscriptionConfig:
     """
-    Transcription config 
+    Transcription config
     """
+
     # Required configs
     model_path: Optional[str] = None  # Path to a .nemo file
     pretrained_name: Optional[str] = None  # Name of a pretrained model
@@ -121,7 +122,7 @@ class TranscriptionConfig:
 @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
 def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
     """
-    Transcribes the input audio and can be used to infer long audio files by chunking 
+    Transcribes the input audio and can be used to infer long audio files by chunking
     them into smaller segments.
     """
     logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

diff --git a/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py b/examples/asr/asr_chunked_inference/ctc/speech_to_text_buffered_infer_ctc.py
@@ -67,6 +67,7 @@ class TranscriptionConfig:
     """
     Transcription Configuration for buffered inference.
     """
+
     # Required configs
     model_path: Optional[str] = None  # Path to a .nemo file
     pretrained_name: Optional[str] = None  # Name of a pretrained model
@@ -117,7 +118,7 @@ class TranscriptionConfig:
 @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
 def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
     """
-    Transcribes the input audio and can be used to infer long audio files by chunking 
+    Transcribes the input audio and can be used to infer long audio files by chunking
     them into smaller segments.
     """
     logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

diff --git a/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py b/examples/asr/asr_chunked_inference/rnnt/speech_to_text_buffered_infer_rnnt.py
@@ -90,6 +90,7 @@ class TranscriptionConfig:
     """
     Transcription Configuration for buffered inference.
     """
+
     # Required configs
     model_path: Optional[str] = None  # Path to a .nemo file
     pretrained_name: Optional[str] = None  # Name of a pretrained model
@@ -147,7 +148,7 @@ class TranscriptionConfig:
 @hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
 def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
     """
-    Transcribes the input audio and can be used to infer long audio files by chunking 
+    Transcribes the input audio and can be used to infer long audio files by chunking
     them into smaller segments.
     """
     logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

diff --git a/examples/asr/speech_translation/translate_speech.py b/examples/asr/speech_translation/translate_speech.py
@@ -67,6 +67,7 @@ class ModelChangeConfig:
     """
     Sub-config for changes specific to the Conformer Encoder
     """
+
     conformer: ConformerChangeConfig = ConformerChangeConfig()
 
 
@@ -75,6 +76,7 @@ class TranslationConfig:
     """
     Translation Configuration for audio to text translation.
     """
+
     # Required configs
     model_path: Optional[str] = None  # Path to a .nemo file
     pretrained_name: Optional[str] = None  # Name of a pretrained model

diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py
@@ -106,6 +106,7 @@ class ModelChangeConfig:
     """
     Sub-config for changes specific to the Conformer Encoder
     """
+
     conformer: ConformerChangeConfig = field(default_factory=ConformerChangeConfig)
 
 
@@ -114,6 +115,7 @@ class TranscriptionConfig:
     """
     Transcription Configuration for audio to text transcription.
     """
+
     # Required configs
     model_path: Optional[str] = None  # Path to a .nemo file
     pretrained_name: Optional[str] = None  # Name of a pretrained model
@@ -170,7 +172,7 @@ class TranscriptionConfig:
     # Implicit single-turn assuming default role='user' (works with Canary-1B)
     #  +prompt.source_lang=en +prompt.target_lang=es +prompt.task=asr +prompt.pnc=yes
     # Explicit single-turn prompt:
-    #  +prompt.role=user +prompt.slots.source_lang=en +prompt.slots.target_lang=es 
+    #  +prompt.role=user +prompt.slots.source_lang=en +prompt.slots.target_lang=es
     # +prompt.slots.task=s2t_translation +prompt.slots.pnc=yes
     # Explicit multi-turn prompt:
     #  +prompt.turns='[{role:user,slots:{source_lang:en,target_lang:es,task:asr,pnc:yes}}]'

diff --git a/nemo/collections/asr/models/aed_multitask_models.py b/nemo/collections/asr/models/aed_multitask_models.py
@@ -224,7 +224,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
 
         self.val_loss = GlobalAverageLossMetric(dist_sync_on_step=False, take_avg_loss=True)
 
-        # TODO: PytorchMetrics lets you join two metrics together to save compute. 
+        # TODO: PytorchMetrics lets you join two metrics together to save compute.
         # But need to make wer and bleu have same outputs first
         self.wer = WER(self.decoding, log_prediction=self.cfg.get("log_prediction"))
         self.bleu = BLEU(
@@ -273,14 +273,14 @@ def change_vocabulary(
         prompt_format: Optional[str] = None,
     ):
         """
-        Changes vocabulary used during AED decoding process. Use this method when fine-tuning on 
+        Changes vocabulary used during AED decoding process. Use this method when fine-tuning on
         from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
-        modules unchanged. For example, you would use it if you want to use pretrained encoder when 
-        fine-tuning on data in another language, or when you'd need model to learn capitalization, 
+        modules unchanged. For example, you would use it if you want to use pretrained encoder when
+        fine-tuning on data in another language, or when you'd need model to learn capitalization,
         punctuation and/or special characters.
 
         Args:
-            new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer 
+            new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
                 (if the tokenizer type is `agg`)
             new_tokenizer_type: Type of tokenizer. Can be either `agg`, `bpe` or `wpe`.
             decoding_cfg: A config for the decoding, which is optional. If the decoding type

diff --git a/nemo/collections/asr/models/ctc_bpe_models.py b/nemo/collections/asr/models/ctc_bpe_models.py
@@ -209,13 +209,13 @@ def change_vocabulary(
         """
         Changes vocabulary of the tokenizer used during CTC decoding process.
         Use this method when fine-tuning on from pre-trained model.
-        This method changes only decoder and leaves encoder and pre-processing modules unchanged. 
-        For example, you would use it if you want to use pretrained encoder when fine-tuning on a 
-        data in another language, or when you'd need model to learn capitalization, punctuation 
+        This method changes only decoder and leaves encoder and pre-processing modules unchanged.
+        For example, you would use it if you want to use pretrained encoder when fine-tuning on a
+        data in another language, or when you'd need model to learn capitalization, punctuation
         and/or special characters.
 
         Args:
-            new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer 
+            new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
                 (if the tokenizer type is `agg`)
             new_tokenizer_type: Either `agg`, `bpe` or `wpe`. `bpe` is used for SentencePiece tokenizers,
                 whereas `wpe` is used for `BertTokenizer`.

diff --git a/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py b/nemo/collections/asr/models/hybrid_rnnt_ctc_bpe_models.py
@@ -253,10 +253,10 @@ def change_vocabulary(
         ctc_decoding_cfg: Optional[DictConfig] = None,
     ):
         """
-        Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning on 
-        from pre-trained model. This method changes only decoder and leaves encoder and pre-processing 
-        modules unchanged. For example, you would use it if you want to use pretrained encoder when 
-        fine-tuning on data in another language, or when you'd need model to learn capitalization, 
+        Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning on
+        from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
+        modules unchanged. For example, you would use it if you want to use pretrained encoder when
+        fine-tuning on data in another language, or when you'd need model to learn capitalization,
         punctuation and/or special characters.
 
         Args:

diff --git a/nemo/collections/asr/models/rnnt_bpe_models.py b/nemo/collections/asr/models/rnnt_bpe_models.py
@@ -344,14 +344,14 @@ def change_vocabulary(
         decoding_cfg: Optional[DictConfig] = None,
     ):
         """
-        Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning 
-        on from pre-trained model. This method changes only decoder and leaves encoder and pre-processing 
-        modules unchanged. For example, you would use it if you want to use pretrained encoder when fine-tuning 
-        on data in another language, or when you'd need model to learn capitalization, punctuation 
+        Changes vocabulary used during RNNT decoding process. Use this method when fine-tuning
+        on from pre-trained model. This method changes only decoder and leaves encoder and pre-processing
+        modules unchanged. For example, you would use it if you want to use pretrained encoder when fine-tuning
+        on data in another language, or when you'd need model to learn capitalization, punctuation
         and/or special characters.
 
         Args:
-            new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer 
+            new_tokenizer_dir: Directory path to tokenizer or a config for a new tokenizer
                 (if the tokenizer type is `agg`)
             new_tokenizer_type: Type of tokenizer. Can be either `agg`, `bpe` or `wpe`.
             decoding_cfg: A config for the decoder, which is optional. If the decoding type

diff --git a/nemo/collections/asr/modules/conv_asr.py b/nemo/collections/asr/modules/conv_asr.py
@@ -776,7 +776,7 @@ class SpeakerDecoder(NeuralModule, Exportable):
     Args:
         feat_in (int): Number of channels being input to this module
         num_classes (int): Number of unique speakers in dataset
-        emb_sizes (list) : shapes of intermediate embedding layers (we consider speaker embbeddings 
+        emb_sizes (list) : shapes of intermediate embedding layers (we consider speaker embbeddings
             from 1st of this layers). Defaults to [1024,1024]
         pool_mode (str) : Pooling strategy type. options are 'xvector','tap', 'attention'
             Defaults to 'xvector (mean and variance)'

diff --git a/nemo/collections/asr/parts/mixins/transcription.py b/nemo/collections/asr/parts/mixins/transcription.py
@@ -202,8 +202,8 @@ def transcribe(
                 to `None`. Defaults to `None`. Uses zero-based indexing.
             augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied.
             verbose: (bool) whether to display tqdm progress bar
-            timestamps: Optional(Bool): timestamps will be returned if set to True as part of hypothesis object 
-                (output.timestep['segment']/output.timestep['word']). Refer to `Hypothesis` class for more details. 
+            timestamps: Optional(Bool): timestamps will be returned if set to True as part of hypothesis object
+                (output.timestep['segment']/output.timestep['word']). Refer to `Hypothesis` class for more details.
                 Default is None and would retain the previous state set by using self.change_decoding_strategy().
             override_config: (Optional[TranscribeConfig]) override transcription config pre-defined by the user.
                 **Note**: All other arguments in the function will be ignored if override_config is passed.