diff --git a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py index 970181851906..35d3a2e5d97b 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/bert_model.py @@ -49,6 +49,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py index 1a4503a49163..1b9ef415c64b 100755 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py @@ -45,6 +45,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 976c4166d8a2..52695761f027 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -34,6 +34,7 @@ clip_grad_norm_fp32, ) from nemo.collections.nlp.modules.common.megatron.megatron_init import initialize_model_parallel_for_nemo +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer from nemo.collections.nlp.parts.nlp_overrides import NEMO_MEGATRON_MODEL_PARALLEL_APPSTATE_OVERRIDE, GradScaler from nemo.core.optim import MainParamsOptimizerWrapper, prepare_lr_scheduler @@ -57,6 +58,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False __all__ = ["MegatronBaseModel"] diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py index 00e749b70ca2..5b3bd0a7eac9 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_prompt_learning_model.py @@ -33,6 +33,7 @@ VirtualPromptSource, VirtualPromptStyle, ) +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults from nemo.collections.nlp.modules.common.transformer.text_generation import TextGeneration from nemo.collections.nlp.parts.nlp_overrides import GradScaler from nemo.utils import AppState, logging @@ -52,6 +53,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index bd766f0ef6a6..d799cb6fb044 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -35,6 +35,7 @@ from nemo.collections.nlp.modules.common.megatron.build_model import build_model from nemo.collections.nlp.modules.common.megatron.module import Float16Module from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, average_losses_across_data_parallel_group, get_all_params_for_weight_decay_optimization, get_ltor_masks_and_position_ids, @@ -84,6 +85,8 @@ except (ImportError, ModuleNotFoundError): + TransformerConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False try: diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py index c455660cb21f..6164239635a0 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py @@ -31,6 +31,7 @@ from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.modules.common import VirtualPromptPlaceholderToken, VirtualPromptSource, VirtualPromptStyle from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, average_losses_across_data_parallel_group, get_iterator_k_split, ) @@ -61,6 +62,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py index edc414c906c0..6d8ef1941360 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py @@ -34,6 +34,7 @@ MegatronTokenLevelEncoderDecoderModule, ) from nemo.collections.nlp.modules.common.megatron.utils import ( + ApexGuardDefaults, average_losses_across_data_parallel_group, get_params_for_weight_decay_optimization, ) @@ -59,7 +60,7 @@ HAVE_APEX = False try: - from megatron.core import ModelParallelConfig, parallel_state, tensor_parallel + from megatron.core import parallel_state, tensor_parallel from megatron.core.enums import ModelType from megatron.core.pipeline_parallel.schedules import get_forward_backward_func diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index 751f3eb03c0e..d4a75aa18fb1 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -25,7 +25,7 @@ from nemo.collections.common.parts.adapter_modules import AdapterModuleUtil from nemo.collections.common.parts.utils import activation_registry from nemo.collections.nlp.modules.common.megatron.fused_bias_gelu import fused_bias_gelu -from nemo.collections.nlp.modules.common.megatron.utils import init_method_const, init_method_normal +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, init_method_const, init_method_normal from nemo.collections.nlp.modules.common.prompt_encoder import InferenceTable from nemo.core.classes.mixins import adapter_mixin_strategies @@ -46,6 +46,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/megatron/attention.py b/nemo/collections/nlp/modules/common/megatron/attention.py index 43ab070b53fd..a5a8b86b85bf 100644 --- a/nemo/collections/nlp/modules/common/megatron/attention.py +++ b/nemo/collections/nlp/modules/common/megatron/attention.py @@ -60,6 +60,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False try: diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index c7122d51928b..83d16c775296 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -59,6 +59,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py index eea51fe1b3b3..2f1fdc59f1ac 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_decoders.py @@ -41,6 +41,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False __all__ = [] diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoder_decoder.py index 572c27098fa7..4ab85d8b8754 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoder_decoder.py @@ -37,6 +37,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False __all__ = ["MegatronTransformerEncoderDecoderModule"] diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py index 8c6efa45da14..4bd99f7120f0 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_encoders.py @@ -42,6 +42,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False __all__ = [] diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py b/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py index ec387920e55d..8ee4beac6db8 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_perceiver_encoders.py @@ -43,6 +43,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py index 642bc2d07df8..36ea0814fab7 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_decoder.py @@ -43,6 +43,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py index e2b6a7177cf8..5847aa311afd 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_transformer_encoder.py @@ -42,6 +42,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False __all__ = ["MegatronTransformerEncoderModule"] diff --git a/nemo/collections/nlp/modules/common/megatron/mlp.py b/nemo/collections/nlp/modules/common/megatron/mlp.py index 1c6a27b67796..35308028817b 100644 --- a/nemo/collections/nlp/modules/common/megatron/mlp.py +++ b/nemo/collections/nlp/modules/common/megatron/mlp.py @@ -51,6 +51,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/megatron/module.py b/nemo/collections/nlp/modules/common/megatron/module.py index 42bda16df221..5b94d738466b 100644 --- a/nemo/collections/nlp/modules/common/megatron/module.py +++ b/nemo/collections/nlp/modules/common/megatron/module.py @@ -17,6 +17,7 @@ import torch from torch.autograd import Variable from torch.nn.parameter import Parameter +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults from nemo.utils import logging @@ -27,6 +28,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py index d8d3f17b93e8..1da13f2d8181 100644 --- a/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/retrieval_token_level_encoder_decoder.py @@ -42,8 +42,11 @@ from megatron.core import ModelParallelConfig, tensor_parallel HAVE_MEGATRON_CORE = True + except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = True diff --git a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py index 94426e8454f2..a47b85db3374 100644 --- a/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/retrieval_transformer.py @@ -40,6 +40,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False MIN_DIM_HEAD = 32 diff --git a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py index 7bb30b0ba375..e5c09f4849cb 100644 --- a/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py +++ b/nemo/collections/nlp/modules/common/megatron/token_level_encoder_decoder.py @@ -59,6 +59,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False __all__ = ["MegatronTokenLevelHead", "MegatronTokenLevelEncoderDecoderModule"] diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 5b21dc8bb000..9cdcccf6e685 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -63,6 +63,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False try: diff --git a/nemo/collections/nlp/modules/common/megatron/utils.py b/nemo/collections/nlp/modules/common/megatron/utils.py index 3ecf915afe46..ca7fcbfef905 100644 --- a/nemo/collections/nlp/modules/common/megatron/utils.py +++ b/nemo/collections/nlp/modules/common/megatron/utils.py @@ -33,7 +33,7 @@ HAVE_APEX = False try: - from megatron.core import ModelParallelConfig, parallel_state, tensor_parallel + from megatron.core import parallel_state, tensor_parallel from megatron.core.tensor_parallel.layers import linear_with_grad_accumulation_and_async_allreduce HAVE_MEGATRON_CORE = True diff --git a/nemo/collections/nlp/modules/common/prompt_encoder.py b/nemo/collections/nlp/modules/common/prompt_encoder.py index 43745ce7a946..64f628582bc4 100644 --- a/nemo/collections/nlp/modules/common/prompt_encoder.py +++ b/nemo/collections/nlp/modules/common/prompt_encoder.py @@ -20,7 +20,7 @@ from torch import nn from nemo.collections.nlp.modules.common.megatron.fused_bias_gelu import fused_bias_gelu -from nemo.collections.nlp.modules.common.megatron.utils import init_method_normal +from nemo.collections.nlp.modules.common.megatron.utils import ApexGuardDefaults, init_method_normal from nemo.core.classes import Exportable, NeuralModule from nemo.core.classes.common import typecheck @@ -31,6 +31,8 @@ except (ImportError, ModuleNotFoundError): + ModelParallelConfig = ApexGuardDefaults + HAVE_MEGATRON_CORE = False diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index 6d7e9abd6a99..36b30aae47b9 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -39,7 +39,7 @@ HAVE_APEX = False try: - from megatron.core import ModelParallelConfig, parallel_state, tensor_parallel + from megatron.core import parallel_state, tensor_parallel HAVE_MEGATRON_CORE = True