diff --git a/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py b/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py index 4a94b37aae7b..3d14c355eee1 100644 --- a/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py +++ b/nemo/collections/nlp/modules/common/megatron/layer_norm_1p.py @@ -40,7 +40,7 @@ def reset_parameters(self): torch.nn.init.zeros_(self.bias) def forward(self, x): - return _fast_layer_norm(x, self.weight + 1, self.bias, self.epsilon) + return _fast_layer_norm(x, self.weight + 1, self.bias, self.epsilon, memory_efficient=False) else: