Fixing CI issues

Signed-off-by: Boris Fomitchev <bfomitchev@nvidia.com>
borisfom · Jun 19, 2024 · 21a5882 · 21a5882
1 parent a88f1c2
commit 21a5882
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 43 deletions.
diff --git a/Dockerfile.ci b/Dockerfile.ci
@@ -48,6 +48,7 @@ pip install --no-cache-dir --no-build-isolation --extra-index-url https://pypi.n
 "nvidia-modelopt[torch]~=${MODELOPT_VERSION}" \
 "apex @ git+https://github.com/NVIDIA/apex.git@${APEX_TAG}" \
 "llama-index==0.10.43" \
+"onnxscript @ git+https://github.com/microsoft/onnxscript" \
 -r tools/ctc_segmentation/requirements.txt \
 ".[all]"
 

diff --git a/nemo/collections/asr/models/msdd_models.py b/nemo/collections/asr/models/msdd_models.py
@@ -163,8 +163,7 @@ def add_speaker_model_config(self, cfg):
             del cfg.speaker_model_cfg.validation_ds
 
     def _init_segmentation_info(self):
-        """Initialize segmentation settings: window, shift and multiscale weights.
-        """
+        """Initialize segmentation settings: window, shift and multiscale weights."""
         self._diarizer_params = self.cfg_msdd_model.diarizer
         self.multiscale_args_dict = parse_scale_configs(
             self._diarizer_params.speaker_embeddings.parameters.window_length_in_sec,
@@ -275,10 +274,14 @@ def __setup_dataloader_from_config_infer(
         )
 
     def setup_training_data(self, train_data_config: Optional[Union[DictConfig, Dict]]):
-        self._train_dl = self.__setup_dataloader_from_config(config=train_data_config,)
+        self._train_dl = self.__setup_dataloader_from_config(
+            config=train_data_config,
+        )
 
     def setup_validation_data(self, val_data_layer_config: Optional[Union[DictConfig, Dict]]):
-        self._validation_dl = self.__setup_dataloader_from_config(config=val_data_layer_config,)
+        self._validation_dl = self.__setup_dataloader_from_config(
+            config=val_data_layer_config,
+        )
 
     def setup_test_data(self, test_data_config: Optional[Union[DictConfig, Dict]]):
         if self.pairwise_infer:
@@ -338,32 +341,32 @@ def get_ms_emb_seq(
                 Merged embeddings without zero-padding in the batch. See `ms_seg_counts` for details.
                 Shape: (Total number of segments in the batch, emb_dim)
             scale_mapping (Tensor):
-		The element at the m-th row and the n-th column of the scale mapping matrix indicates the (m+1)-th scale
-		segment index which has the closest center distance with (n+1)-th segment in the base scale.
-		Example:
-		    scale_mapping_argmat[2][101] = 85
-		In the above example, it means that 86-th segment in the 3rd scale (python index is 2) is mapped with
-		102-th segment in the base scale. Thus, the longer segments bound to have more repeating numbers since
-		multiple base scale segments (since the base scale has the shortest length) fall into the range of the
-		longer segments. At the same time, each row contains N numbers of indices where N is number of
-		segments in the base-scale (i.e., the finest scale).
+                The element at the m-th row and the n-th column of the scale mapping matrix indicates the (m+1)-th scale
+                segment index which has the closest center distance with (n+1)-th segment in the base scale.
+                Example:
+                    scale_mapping_argmat[2][101] = 85
+                In the above example, it means that 86-th segment in the 3rd scale (python index is 2) is mapped with
+                102-th segment in the base scale. Thus, the longer segments bound to have more repeating numbers since
+                multiple base scale segments (since the base scale has the shortest length) fall into the range of the
+                longer segments. At the same time, each row contains N numbers of indices where N is number of
+                segments in the base-scale (i.e., the finest scale).
                 Shape: (batch_size, scale_n, self.diar_window_length)
             ms_seg_counts (Tensor):
                 Cumulative sum of the number of segments in each scale. This information is needed to reconstruct
                 the multi-scale input matrix during forward propagating.
 
-		Example: `batch_size=3, scale_n=6, emb_dim=192`
-                    ms_seg_counts =  
-                     [[8,  9, 12, 16, 25, 51],  
-                      [11, 13, 14, 17, 25, 51],  
-                      [ 9,  9, 11, 16, 23, 50]]  
+                Example: `batch_size=3, scale_n=6, emb_dim=192`
+                    ms_seg_counts =
+                     [[8,  9, 12, 16, 25, 51],
+                      [11, 13, 14, 17, 25, 51],
+                      [ 9,  9, 11, 16, 23, 50]]
 
-		In this function, `ms_seg_counts` is used to get the actual length of each embedding sequence without
-		zero-padding.
+                In this function, `ms_seg_counts` is used to get the actual length of each embedding sequence without
+                zero-padding.
 
         Returns:
             ms_emb_seq (Tensor):
-	        Multi-scale embedding sequence that is mapped, matched and repeated. The longer scales are less repeated,
+                Multi-scale embedding sequence that is mapped, matched and repeated. The longer scales are less repeated,
                 while shorter scales are more frequently repeated following the scale mapping tensor.
         """
         scale_n, batch_size = scale_mapping[0].shape[0], scale_mapping.shape[0]
@@ -409,9 +412,9 @@ def get_cluster_avg_embs_model(
                                 [ 9,  9, 11, 16, 23, 50]
                             ]
 
-                    Counts of merged segments: (121, 131, 118)  
-                    embs has shape of (370, 192)  
-                    clus_label_index has shape of (3, 131)  
+                    Counts of merged segments: (121, 131, 118)
+                    embs has shape of (370, 192)
+                    clus_label_index has shape of (3, 131)
 
                 Shape: (batch_size, scale_n)
 
@@ -553,7 +556,7 @@ def forward(
         with torch.no_grad():
             self.msdd._speaker_model.eval()
             logits, embs_d = self.msdd._speaker_model.forward_for_export(
-                processed_signal=audio_signal[detach_ids[1]], processed_signal_len=audio_signal_len[detach_ids[1]]
+                audio_signal=audio_signal[detach_ids[1]], length=audio_signal_len[detach_ids[1]]
             )
             embs = torch.zeros(audio_signal.shape[0], embs_d.shape[1]).to(embs_d.device)
             embs[detach_ids[1], :] = embs_d.detach()
@@ -854,9 +857,9 @@ def run_clustering_diarizer(self, manifest_filepath: str, emb_dir: str):
         os.makedirs(self.out_rttm_dir, exist_ok=True)
 
         self.clus_diar_model._cluster_params = self.cfg_diar_infer.diarizer.clustering.parameters
-        self.clus_diar_model.multiscale_args_dict[
-            "multiscale_weights"
-        ] = self.cfg_diar_infer.diarizer.speaker_embeddings.parameters.multiscale_weights
+        self.clus_diar_model.multiscale_args_dict["multiscale_weights"] = (
+            self.cfg_diar_infer.diarizer.speaker_embeddings.parameters.multiscale_weights
+        )
         self.clus_diar_model._diarizer_params.speaker_embeddings.parameters = (
             self.cfg_diar_infer.diarizer.speaker_embeddings.parameters
         )
@@ -1076,7 +1079,6 @@ def extract_standalone_speaker_model(self, prefix: str = 'msdd._speaker_model.')
         return _speaker_model
 
     def _init_msdd_model(self, cfg: Union[DictConfig, NeuralDiarizerInferenceConfig]):
-
         """
         Initialized MSDD model with the provided config. Load either from `.nemo` file or `.ckpt` checkpoint files.
         """
@@ -1128,7 +1130,7 @@ def get_pred_mat(self, data_list: List[Union[Tuple[int], List[torch.Tensor]]]) -
         digit_map = dict(zip(sorted(set(all_tups)), range(n_est_spks)))
         total_len = max([sess[1].shape[1] for sess in data_list])
         sum_pred = torch.zeros(total_len, n_est_spks)
-        for (_dim_tup, pred_mat) in data_list:
+        for _dim_tup, pred_mat in data_list:
             dim_tup = [digit_map[x] for x in _dim_tup]
             if len(pred_mat.shape) == 3:
                 pred_mat = pred_mat.squeeze(0)
@@ -1167,8 +1169,7 @@ def get_integrated_preds_list(
         return output_list
 
     def get_emb_clus_infer(self, cluster_embeddings):
-        """Assign dictionaries containing the clustering results from the class instance `cluster_embeddings`.
-        """
+        """Assign dictionaries containing the clustering results from the class instance `cluster_embeddings`."""
         self.msdd_model.emb_sess_test_dict = cluster_embeddings.emb_sess_test_dict
         self.msdd_model.clus_test_label_dict = cluster_embeddings.clus_test_label_dict
         self.msdd_model.emb_seq_test = cluster_embeddings.emb_seq_test
@@ -1456,7 +1457,10 @@ def from_pretrained(
         """
         logging.setLevel(logging.INFO if verbose else logging.WARNING)
         cfg = NeuralDiarizerInferenceConfig.init_config(
-            diar_model_path=model_name, vad_model_path=vad_model_name, map_location=map_location, verbose=verbose,
+            diar_model_path=model_name,
+            vad_model_path=vad_model_name,
+            map_location=map_location,
+            verbose=verbose,
         )
         return cls(cfg)
 

diff --git a/nemo/core/classes/common.py b/nemo/core/classes/common.py
@@ -1016,16 +1016,13 @@ def __init__(
         self.ignore_collections = ignore_collections
 
     def __call__(self, wrapped):
-        return self.wrapped_call(wrapped) if is_typecheck_enabled() else self.unwrapped_call(wrapped)
+        return self.wrapped_call(wrapped)
 
     def unwrapped_call(self, wrapped):
         return wrapped
 
-    def wrapped_call(self, wrapped):
-        return self.decorated_call(wrapped)
-
     @wrapt.decorator(enabled=is_typecheck_enabled)
-    def decorated_call(self, wrapped, instance: Typing, args, kwargs):
+    def wrapped_call(self, wrapped, instance: Typing, args, kwargs):
         """
         Wrapper method that can be used on any function of a class that implements :class:`~nemo.core.Typing`.
         By default, it will utilize the `input_types` and `output_types` properties of the class inheriting Typing.

diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py
@@ -225,11 +225,6 @@ def _export(
                     if dynamic_axes is None:
                         dynamic_axes = self.dynamic_shapes_for_export(use_dynamo)
                     if use_dynamo:
-                        import onnxscript
-
-                        # https://github.com/microsoft/onnxscript/issues/1544
-                        onnxscript.optimizer.constant_folding._DEFAULT_CONSTANT_FOLD_SIZE_LIMIT = 1024 * 1024 * 64
-
                         # https://github.com/pytorch/pytorch/issues/126339
                         with monkeypatched(torch.nn.RNNBase, "flatten_parameters", lambda *args: None):
                             logging.info(f"Running export.export, dynamic shapes:{dynamic_axes}\n")