From 5990e6241eeba8805bf4476e20aa06d71a2e8582 Mon Sep 17 00:00:00 2001
From: vfdev-5 <vfdev.5@gmail.com>
Date: Tue, 8 Sep 2020 10:32:11 +0000
Subject: [PATCH] Fixes #1258 - Replaced mp.spawn by mp.start_processes for
 native comp model

---
 ignite/distributed/comp_models/native.py            | 2 +-
 ignite/distributed/utils.py                         | 6 +++---
 tests/ignite/distributed/comp_models/test_native.py | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ignite/distributed/comp_models/native.py b/ignite/distributed/comp_models/native.py
index 74c1b2abae1c..75d6a946d9a1 100644
--- a/ignite/distributed/comp_models/native.py
+++ b/ignite/distributed/comp_models/native.py
@@ -279,7 +279,7 @@ def spawn(
             if LooseVersion(torch.__version__) >= LooseVersion("1.5.0"):
                 spawn_kwargs["start_method"] = kwargs.get("start_method", "spawn")
 
-            mp.spawn(
+            mp.start_processes(
                 _NativeDistModel._dist_worker_task_fn,
                 nprocs=nproc_per_node,
                 args=(
diff --git a/ignite/distributed/utils.py b/ignite/distributed/utils.py
index 56d07afa036a..bde3fdbc658e 100644
--- a/ignite/distributed/utils.py
+++ b/ignite/distributed/utils.py
@@ -285,7 +285,7 @@ def train_fn(local_rank, a, b, c, d=12):
 
             - | "nccl" or "gloo" : `nnodes` (default, 1), `node_rank` (default, 0), `master_addr`
               | (default, "127.0.0.1"), `master_port` (default, 2222), `timeout` to `dist.init_process_group`_ function
-              | and kwargs for `mp.spawn`_ function.
+              | and kwargs for `mp.start_processes`_ function.
 
             - | "xla-tpu" : `nnodes` (default, 1), `node_rank` (default, 0) and kwargs to `xmp.spawn`_ function.
 
@@ -293,8 +293,8 @@ def train_fn(local_rank, a, b, c, d=12):
               | and `node_rank=0` are tolerated and ignored, otherwise an exception is raised.
 
     .. _dist.init_process_group: https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group
-    .. _mp.spawn: https://pytorch.org/docs/stable/multiprocessing.html#torch.multiprocessing.spawn
-    .. _xmp.spawn: http://pytorch.org/xla/release/1.5/index.html#torch_xla.distributed.xla_multiprocessing.spawn
+    .. _mp.start_processes: https://pytorch.org/docs/stable/_modules/torch/multiprocessing/spawn.html#spawn
+    .. _xmp.spawn: http://pytorch.org/xla/release/1.6/index.html#torch_xla.distributed.xla_multiprocessing.spawn
     .. _hvd_run: https://horovod.readthedocs.io/en/latest/api.html#module-horovod.run
 
     """
diff --git a/tests/ignite/distributed/comp_models/test_native.py b/tests/ignite/distributed/comp_models/test_native.py
index 550969a47972..b509df82b182 100644
--- a/tests/ignite/distributed/comp_models/test_native.py
+++ b/tests/ignite/distributed/comp_models/test_native.py
@@ -276,13 +276,14 @@ def _test_dist_spawn_fn(local_rank, backend, world_size, device):
         assert _model.device() == torch.device(device)
 
 
-def _test__native_dist_model_spawn(backend, num_workers_per_machine, device):
+def _test__native_dist_model_spawn(backend, num_workers_per_machine, device, **spawn_kwargs):
     _NativeDistModel.spawn(
         _test_dist_spawn_fn,
         args=(backend, num_workers_per_machine, device),
         kwargs_dict={},
         backend=backend,
         nproc_per_node=num_workers_per_machine,
+        **spawn_kwargs,
     )
 
 
@@ -290,6 +291,7 @@ def _test__native_dist_model_spawn(backend, num_workers_per_machine, device):
 @pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
 def test__native_dist_model_spawn_gloo():
     _test__native_dist_model_spawn("gloo", num_workers_per_machine=4, device="cpu")
+    _test__native_dist_model_spawn("gloo", num_workers_per_machine=4, device="cpu", start_method="fork")
 
 
 @pytest.mark.distributed