Apply isort and black reformatting

Signed-off-by: monica-sekoyan <monica-sekoyan@users.noreply.github.com>
NVIDIA · May 29, 2024 · 5a1929a · 5a1929a
1 parent 418e1f0
commit 5a1929a
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 17 deletions.
diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py
@@ -341,7 +341,8 @@ def forward_for_export(self, processed_signal, processed_signal_len):
     @typecheck()
     def forward(self, input_signal, input_signal_length):
         processed_signal, processed_signal_len = self.preprocessor(
-            input_signal=input_signal, length=input_signal_length,
+            input_signal=input_signal,
+            length=input_signal_length,
         )
 
         if self.spec_augmentation is not None and self.training:
@@ -591,7 +592,9 @@ def verify_speakers(self, path2audio_file1, path2audio_file2, threshold=0.7):
             return False
 
     @torch.no_grad()
-    def verify_speakers_batch(self, manifest_filepath1, manifest_filepath2, threshold=0.7, batch_size=32, sample_rate=16000, device='cuda'):
+    def verify_speakers_batch(
+        self, manifest_filepath1, manifest_filepath2, threshold=0.7, batch_size=32, sample_rate=16000, device='cuda'
+    ):
         """
         Verify if audio files from the first and second manifests are from the same speaker or not.
 
@@ -606,8 +609,12 @@ def verify_speakers_batch(self, manifest_filepath1, manifest_filepath2, threshol
         Returns:
             True if both audio pair is from same speaker, False otherwise
         """
-        embs1, _, _, _ = self.batch_inference(manifest_filepath1, batch_size=batch_size, sample_rate=sample_rate, device=device)
-        embs2, _, _, _ = self.batch_inference(manifest_filepath2, batch_size=batch_size, sample_rate=sample_rate, device=device)
+        embs1, _, _, _ = self.batch_inference(
+            manifest_filepath1, batch_size=batch_size, sample_rate=sample_rate, device=device
+        )
+        embs2, _, _, _ = self.batch_inference(
+            manifest_filepath2, batch_size=batch_size, sample_rate=sample_rate, device=device
+        )
 
         if embs1.shape != embs2.shape:
             raise ValueError(
@@ -624,10 +631,8 @@ def verify_speakers_batch(self, manifest_filepath1, manifest_filepath2, threshol
         Y = embs2.unsqueeze(dim=2)
         # Score
         similarity_scores = torch.matmul(X, Y).squeeze() / (
-            (
-                torch.matmul(X, X.permute(0, 2, 1)).squeeze() * torch.matmul(Y.permute(0, 2, 1), Y).squeeze()
-            ) ** 0.5
-            )
+            (torch.matmul(X, X.permute(0, 2, 1)).squeeze() * torch.matmul(Y.permute(0, 2, 1), Y).squeeze()) ** 0.5
+        )
         similarity_scores = (similarity_scores + 1) / 2
 
         # Decision
@@ -672,7 +677,9 @@ def batch_inference(self, manifest_filepath, batch_size=32, sample_rate=16000, d
         dataset = AudioToSpeechLabelDataset(manifest_filepath=manifest_filepath, labels=None, featurizer=featurizer)
 
         dataloader = torch.utils.data.DataLoader(
-            dataset=dataset, batch_size=batch_size, collate_fn=dataset.fixed_seq_collate_fn,
+            dataset=dataset,
+            batch_size=batch_size,
+            collate_fn=dataset.fixed_seq_collate_fn,
         )
 
         logits = []
@@ -694,7 +701,7 @@ def batch_inference(self, manifest_filepath, batch_size=32, sample_rate=16000, d
         self.train(mode=mode)
         if mode is True:
             self.unfreeze()
-        
+
         logits, embs, gt_labels = np.asarray(logits), np.asarray(embs), np.asarray(gt_labels)
 
         return embs, logits, gt_labels, trained_labels
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -51,7 +51,7 @@
     from pydub import AudioSegment as Audio
     from pydub.exceptions import CouldntDecodeError
 
-    #FFMPEG for some formats needs explicitly defined coding-decoding strategy
+    # FFMPEG for some formats needs explicitly defined coding-decoding strategy
     ffmpeg_codecs = {'opus': 'opus'}
 
 except ModuleNotFoundError:
@@ -374,7 +374,13 @@ def from_file_list(
         sample_rate = target_sr
 
         return cls(
-            samples, sample_rate, target_sr=target_sr, trim=trim, channel_selector=channel_selector, *args, **kwargs,
+            samples,
+            sample_rate,
+            target_sr=target_sr,
+            trim=trim,
+            channel_selector=channel_selector,
+            *args,
+            **kwargs,
         )
 
     @classmethod
@@ -472,9 +478,8 @@ def duration(self):
 
     @property
     def rms_db(self):
-        """Return per-channel RMS value.
-        """
-        mean_square = np.mean(self._samples ** 2, axis=0)
+        """Return per-channel RMS value."""
+        mean_square = np.mean(self._samples**2, axis=0)
         return 10 * np.log10(mean_square)
 
     @property
@@ -485,7 +490,7 @@ def gain_db(self, gain):
         self._samples *= 10.0 ** (gain / 20.0)
 
     def normalize_db(self, target_db=-20, ref_channel=None):
-        """Normalize the signal to a target RMS value in decibels. 
+        """Normalize the signal to a target RMS value in decibels.
         For multi-channel audio, the RMS value is determined by the reference channel (if not None),
         otherwise it will be the maximum RMS across all channels.
         """
@@ -513,7 +518,11 @@ def pad(self, pad_size, symmetric=False):
                 f"Padding not implemented for signals with more that 2 dimensions. Current samples dimension: {samples_ndim}."
             )
         # apply padding
-        self._samples = np.pad(self._samples, pad_width, mode='constant',)
+        self._samples = np.pad(
+            self._samples,
+            pad_width,
+            mode='constant',
+        )
 
     def subsegment(self, start_time=None, end_time=None):
         """Cut the AudioSegment between given boundaries.