diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index bec3cfbc2e898..e66708d2d2dd6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1585,7 +1585,7 @@ def build_transformer_config(self) -> TransformerConfig: 'recompute_method': recompute_method, 'recompute_num_layers': recompute_num_layers, 'distribute_saved_activations': False, # not currently used in NeMo - 'ub_tp_comm_overlap': ub_tp_comm_overlap, + 'tp_comm_overlap': ub_tp_comm_overlap, 'fp8': fp8, }