From c5381e27ce71faee8f4c84e0bef49e160e97cfbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 26 Sep 2024 10:33:50 +0200
Subject: [PATCH] Add missing import guards for causal_conv1d and mamba_ssm
 dependencies (#10429) (#10506)

* Add causal_conv1d import guard


* Add mamba_ssm import guard


* Apply isort and black reformatting


---------

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
Signed-off-by: janekl <janekl@users.noreply.github.com>
Co-authored-by: Jan Lasek <janek.lasek@gmail.com>
Co-authored-by: janekl <janekl@users.noreply.github.com>
Co-authored-by: Pablo Garay <palenq@gmail.com>
---
 .../megatron/griffin/recurrent_module.py      | 10 ++++++-
 .../language_modeling/megatron_mamba_model.py | 27 ++++++++++++++++---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/nemo/collections/nlp/models/language_modeling/megatron/griffin/recurrent_module.py b/nemo/collections/nlp/models/language_modeling/megatron/griffin/recurrent_module.py
index 033d3abec732..9ac57532d5ca 100755
--- a/nemo/collections/nlp/models/language_modeling/megatron/griffin/recurrent_module.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/griffin/recurrent_module.py
@@ -19,7 +19,6 @@
 import torch
 import torch._dynamo
 from accelerated_scan.triton import scan
-from causal_conv1d import causal_conv1d_fn
 from einops import rearrange
 from torch import nn
 
@@ -40,6 +39,13 @@
     TransformerConfig = ApexGuardDefaults
     HAVE_MEGATRON_CORE = False
 
+try:
+    from causal_conv1d import causal_conv1d_fn
+
+    HAVE_CAUSAL_CONV1D = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_CAUSAL_CONV1D = False
+
 torch._dynamo.config.suppress_errors = True
 
 
@@ -277,6 +283,8 @@ def __call__(
 
 class Conv1D(MegatronModule):
     def __init__(self, config, width, temporal_width):
+        if not HAVE_CAUSAL_CONV1D:
+            raise ImportError("Package causal_conv1d is required to use Conv1D")
         super().__init__(config=config)
         self.config = config
         self.width = width
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_mamba_model.py b/nemo/collections/nlp/models/language_modeling/megatron_mamba_model.py
index afbe85e0edbb..4f0000dafaa2 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_mamba_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_mamba_model.py
@@ -13,14 +13,31 @@
 # limitations under the License.
 
 import torch
-from megatron.core.models.mamba import MambaModel
-from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
 from omegaconf.dictconfig import DictConfig
 from pytorch_lightning.trainer.trainer import Trainer
 
 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
 from nemo.utils import logging
 
+try:
+    import mamba_ssm
+
+    HAVE_MAMBA_SSM = True
+
+except ModuleNotFoundError:
+
+    HAVE_MAMBA_SSM = False
+
+try:
+    from megatron.core.models.mamba import MambaModel
+    from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec
+
+    HAVE_MEGATRON_CORE = True
+
+except (ImportError, ModuleNotFoundError):
+
+    HAVE_MEGATRON_CORE = False
+
 
 class MegatronMambaModel(MegatronGPTModel):
     """
@@ -28,7 +45,8 @@ class MegatronMambaModel(MegatronGPTModel):
     """
 
     def __init__(self, cfg: DictConfig, trainer: Trainer):
-
+        if not HAVE_MEGATRON_CORE or not HAVE_MAMBA_SSM:
+            raise ImportError("Both megatron.core and mamba_ssm packages are required to use MegatronMambaModel")
         self.vocab_size = cfg.get('vocab_size', 65536)
         self.cfg = cfg
         super().__init__(cfg=cfg, trainer=trainer)
@@ -36,7 +54,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
         self.mcore_gpt = True
 
     def model_provider_func(self, pre_process, post_process):
-
+        if not HAVE_MEGATRON_CORE or not HAVE_MAMBA_SSM:
+            raise ImportError("Both megatron.core and mamba_ssm packages are required to use MegatronMambaModel")
         self.hybrid_override_pattern = self.cfg.get(
             'hybrid_override_pattern', "M" * self.transformer_config.num_layers
         )