diff --git a/examples/nlp/language_modeling/megatron_bert_pretraining.py b/examples/nlp/language_modeling/megatron_bert_pretraining.py
index b6cb84481fc7..e6abee295a1a 100644
--- a/examples/nlp/language_modeling/megatron_bert_pretraining.py
+++ b/examples/nlp/language_modeling/megatron_bert_pretraining.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import torch.multiprocessing as mp
 from omegaconf.omegaconf import OmegaConf, open_dict
 from pytorch_lightning import Trainer
 from pytorch_lightning.plugins.environments import TorchElasticEnvironment
@@ -28,6 +29,8 @@
 from nemo.utils import logging
 from nemo.utils.exp_manager import exp_manager
 
+mp.set_start_method("spawn", force=True)
+
 
 @hydra_runner(config_path="conf", config_name="megatron_bert_config")
 def main(cfg) -> None:
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
index 12d121ca2bcd..90053f3052c8 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py
@@ -643,7 +643,11 @@ def build_pretraining_data_loader(self, dataset, consumed_samples):
 
         # Torch dataloader.
         return torch.utils.data.DataLoader(
-            dataset, batch_sampler=batch_sampler, num_workers=self.cfg.data.num_workers, pin_memory=True,
+            dataset,
+            batch_sampler=batch_sampler,
+            num_workers=self.cfg.data.num_workers,
+            pin_memory=True,
+            persistent_workers=True if self.cfg.data.num_workers > 0 else False,
         )
 
     def setup_training_data(self, cfg):
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 52a640a9efd1..d4132c4c7e80 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -795,7 +795,7 @@ def build_pretraining_data_loader(
             batch_sampler=batch_sampler,
             num_workers=self.cfg.data.num_workers,
             pin_memory=True,
-            persistent_workers=True,
+            persistent_workers=True if self.cfg.data.num_workers > 0 else False,
         )
 
     def setup(self, stage=None):
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
index acf96688b33a..331136c64a46 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_prompt_learning_model.py
@@ -604,7 +604,9 @@ def build_virtual_prompt_dataset(
             drop_last=drop_last,
             num_workers=num_workers,
             pin_memory=pin_memory,
-            persistent_workers=True,  # (@adithyare and @eharper) We need this to make spawn=True to work.
+            persistent_workers=True
+            if num_workers > 0
+            else False,  # (@adithyare and @eharper) We need this to make spawn=True to work.
         )
 
         return dataset, dataloader
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
index bfcc2c43631d..b3c08dff7ae8 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_t5_prompt_learning_model.py
@@ -440,7 +440,9 @@ def build_virtual_prompt_dataset(
             drop_last=drop_last,
             num_workers=num_workers,
             pin_memory=pin_memory,
-            persistent_workers=True,  # (@adithyare and @eharper) We need to set this to True to get around issues with spawn=True
+            persistent_workers=True
+            if num_workers > 0
+            else False,  # (@adithyare and @eharper) We need to set this to True to get around issues with spawn=True
         )
         print('build success', len(dataloader), dataset_paths)
         return dataset, dataloader
diff --git a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py
index fbf4b029fcdc..efa059419eda 100644
--- a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py
+++ b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py
@@ -430,7 +430,7 @@ def _setup_eval_dataloader_from_config(self, cfg: DictConfig, dataset):
                     pin_memory=cfg.get("pin_memory", False),
                     drop_last=cfg.get("drop_last", False),
                     shuffle=False,
-                    persistent_workers=True,
+                    persistent_workers=True if cfg.get("num_workers", 0) > 0 else False,
                 )
             )
 
@@ -592,7 +592,7 @@ def _setup_megatron_dataloader_from_config(self, cfg, dataset, consumed_samples)
             collate_fn=collate_fn,
             num_workers=cfg.num_workers,
             pin_memory=cfg.pin_memory,
-            persistent_workers=True,
+            persistent_workers=True if cfg.num_workers > 0 else False,
         )
 
     def process_global_batch_for_text_translation_datasets(self, batch):