pytorch · vfdev-5 · Jun 11, 2020 · Jun 9, 2020 · Jun 9, 2020 · Jun 10, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -47,7 +47,7 @@ run_pytorch_container: &run_pytorch_container
       environment:
         wd: << pipeline.parameters.workingdir >>
       command: |
-        docker run --gpus=all --rm -itd -v ${wd}:/ignite -w /ignite --name pthd << pipeline.parameters.pytorch_stable_image >>
+        docker run --gpus=all --rm -itd --shm-size 16G -v ${wd}:/ignite -w /ignite --name pthd << pipeline.parameters.pytorch_stable_image >>
         docker exec -it pthd nvidia-smi
         docker exec -it pthd ls
 
@@ -80,7 +80,7 @@ jobs:
 
             # pytest on cuda
             export test_cmd='sh tests/run_gpu_tests.sh'
-            docker exec -it pthd /bin/bash -c "$test_cmd"
+            docker exec -it pthd /bin/bash -c "${test_cmd}"
 
             # MNIST tests
 
@@ -118,7 +118,7 @@ jobs:
       - run:
           name: Codecov upload
           command: |
-            codecov -F gpu || echo 'Codecov upload failed'
+            bash <(curl -s https://codecov.io/bash) -Z -F gpu
 
 
   two_gpus_tests:
@@ -135,7 +135,64 @@ jobs:
           name: Run 1 Node 2 GPUs Unit Tests
           command: |
             export test_cmd='sh tests/run_gpu_tests.sh 2'
-            docker exec -it pthd /bin/bash -c "$test_cmd"
+            docker exec -it pthd /bin/bash -c "${test_cmd}"
+
+      - run:
+          name: Codecov upload
+          command: |
+            bash <(curl -s https://codecov.io/bash) -Z -F gpu-2
+
+
+  two_gpus_check_dist_cifar10_example:
+    <<: *two_gpus
+
+    working_directory: << pipeline.parameters.workingdir >>
+
+    steps:
+      - checkout
+      - <<: *pull_pytorch_stable_image
+      - <<: *run_pytorch_container
+      - <<: *install_dependencies
+      - run:
+          name: "Install additional example dependencies"
+          command: |
+            docker exec -it pthd pip install fire
+      - run:
+          name: "Run without backend"
+          command: |
+            export example_path="examples/contrib/cifar10"
+            # initial run
+            export stop_cmd="--stop_iteration=500"
+            export test_cmd="CI=1 python ${example_path}/main.py run"
+            docker exec -it pthd /bin/bash -c "${test_cmd} ${stop_cmd}"
+            # resume
+            export resume_opt="--resume-from=/tmp/output-cifar10/resnet18_backend-None-1_stop-on-500/training_checkpoint_400.pt"
+            docker exec -it pthd /bin/bash -c "${test_cmd} --num_epochs=7 ${resume_opt}"
+
+      - run:
+          name: "Run with NCCL backend using torch dist launch"
+          command: |
+            export example_path="examples/contrib/cifar10"
+            # initial run
+            export stop_cmd="--stop_iteration=500"
+            export test_cmd="CI=1 python -u -m torch.distributed.launch --nproc_per_node=2 --use_env ${example_path}/main.py run --backend=nccl"
+            docker exec -it pthd /bin/bash -c "${test_cmd} ${stop_cmd}"
+            # resume
+            export resume_opt="--resume-from=/tmp/output-cifar10/resnet18_backend-nccl-2_stop-on-500/training_checkpoint_400.pt"
+            docker exec -it pthd /bin/bash -c "${test_cmd} --num_epochs=7 ${resume_opt}"
+
+      - run:
+          name: "Run with NCCL backend using spawn"
+          command: |
+            export example_path="examples/contrib/cifar10"
+            # initial run
+            export stop_cmd="--stop_iteration=500"
+            export test_cmd="CI=1 python -u ${example_path}/main.py run --backend=nccl --num_procs_per_node=2"
+            docker exec -it pthd /bin/bash -c "${test_cmd} ${stop_cmd}"
+            # resume
+            export resume_opt="--resume-from=/tmp/output-cifar10/resnet18_backend-nccl-2_stop-on-500/training_checkpoint_400.pt"
+            docker exec -it pthd /bin/bash -c "${test_cmd} --num_epochs=7 ${resume_opt}"
+
 
 # -------------------------------------------------------------------------------------
 # Workflows
@@ -146,3 +203,4 @@ workflows:
     jobs:
       - one_gpu_tests
       - two_gpus_tests
+      - two_gpus_check_dist_cifar10_example
diff --git a/docs/source/distributed.rst b/docs/source/distributed.rst
@@ -7,20 +7,87 @@ Helper module to use distributed settings for multiple backends:
 
 - XLA on TPUs via `pytorch/xla <https://github.com/pytorch/xla>`_
 
-This module wraps common methods to fetch information about distributed configuration, initialize/finalize process
-group or spawn multiple processes.
+Distributed launcher and `auto` helpers
+---------------------------------------
+
+We provide a context manager to simplify the code of distributed configuration setup for all above supported backends.
+In addition, methods like :meth:`~ignite.distributed.auto.auto_model`, :meth:`~ignite.distributed.auto.auto_optim` and
+:meth:`~ignite.distributed.auto.auto_dataloader` helps to adapt in a transparent way provided model, optimizer and data
+loaders to existing configuration:
+
+.. code-block:: python
+
+    # main.py
+
+    import ignite.distributed as idist
+
+    def training(local_rank, config, **kwargs):
+
+        print(idist.get_rank(), ": run with config:", config, "- backend=", idist.backend())
+
+        train_loader = idist.auto_dataloader(dataset, batch_size=32, num_workers=12, shuffle=True, **kwargs)
+        # batch size, num_workers and sampler are automatically adapted to existing configuration
+        # ...
+        model = resnet50()
+        model = idist.auto_model(model)
+        # model is DDP or DP or just itself according to existing configuration
+        # ...
+        optimizer = optim.SGD(model.parameters(), lr=0.01)
+        optimizer = idist.auto_optim(optimizer)
+        # optimizer is itself, except XLA configuration and overrides `step()` method.
+        # User can safely call `optimizer.step()` (behind `xm.optimizer_step(optimizier)` is performed)
+
+
+    backend = "nccl"  # torch native distributed configuration on multiple GPUs
+    # backend = "xla-tpu"  # XLA TPUs distributed configuration
+    # backend = None  # no distributed configuration
+    with idist.Parallel(backend=backend, **dist_configs) as parallel:
+        parallel.run(training, config, a=1, b=2)
+
+Above code may be executed with `torch.distributed.launch`_ tool or by python and specifying distributed configuration
+in the code. For more details, please, see :class:`~ignite.distributed.launcher.Parallel`,
+:meth:`~ignite.distributed.auto.auto_model`, :meth:`~ignite.distributed.auto.auto_optim` and
+:meth:`~ignite.distributed.auto.auto_dataloader`.
+
+Complete example of CIFAR10 training can be found
+`here <https://github.com/pytorch/ignite/tree/master/examples/contrib/cifar10>`_.
+
 
+.. _torch.distributed.launch: https://pytorch.org/docs/stable/distributed.html#launch-utility
 
-Examples:
 
-    - Example to spawn `nprocs` processes that run `fn` with `args`: :meth:`~ignite.distributed.spawn`
+ignite.distributed.auto
+-----------------------
 
+.. currentmodule:: ignite.distributed.auto
+
+.. automodule:: ignite.distributed.auto
+    :members:
+
+
+ignite.distributed.launcher
+---------------------------
+
+.. currentmodule:: ignite.distributed.launcher
+
+.. automodule:: ignite.distributed.launcher
+    :members:
 
-.. currentmodule:: ignite.distributed
 
-.. automodule:: ignite.distributed
+ignite.distributed.utils
+------------------------
+
+This module wraps common methods to fetch information about distributed configuration, initialize/finalize process
+group or spawn multiple processes.
+
+.. currentmodule:: ignite.distributed.utils
+
+.. automodule:: ignite.distributed.utils
     :members:
-    :imported-members:
+
+    .. attribute:: has_native_dist_support
+
+        True if `torch.distributed` is available
 
     .. attribute:: has_xla_support
 

diff --git a/examples/contrib/cifar10/.gitignore b/examples/contrib/cifar10/.gitignore
@@ -1,5 +1,3 @@
 output
 cifar10
-.polyaxonignore
-.polyaxon
-plx_configs/*.yaml
+raw_pytorch