Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions ignite/distributed/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def auto_dataloader(dataset: Dataset, **kwargs: Any) -> Union[DataLoader, "_MpDe
)

Args:
dataset (Dataset): input torch dataset
**kwargs: keyword arguments for `torch DataLoader`_.
dataset: input torch dataset
kwargs: keyword arguments for `torch DataLoader`_.

Returns:
`torch DataLoader`_ or `XLA MpDeviceLoader`_ for XLA devices
Expand Down Expand Up @@ -154,11 +154,11 @@ def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Mod
model = idist.auto_model(model)

Args:
model (torch.nn.Module): model to adapt.
sync_bn (bool): if True, applies `torch convert_sync_batchnorm`_ to the model for native torch
model: model to adapt.
sync_bn: if True, applies `torch convert_sync_batchnorm`_ to the model for native torch
distributed only. Default, False. Note, if using Nvidia/Apex, batchnorm conversion should be
applied before calling ``amp.initialize``.
**kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_
kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_
if applicable. Please, make sure to use acceptable kwargs for given backend.

Returns:
Expand Down Expand Up @@ -241,7 +241,7 @@ def auto_optim(optimizer: Optimizer) -> Optimizer:
optimizer = idist.auto_optim(optimizer)

Args:
optimizer (Optimizer): input torch optimizer
optimizer: input torch optimizer

Returns:
Optimizer
Expand Down Expand Up @@ -276,9 +276,9 @@ class DistributedProxySampler(DistributedSampler):
Input sampler is assumed to have a constant size.

Args:
sampler (Sampler): Input torch data sampler.
num_replicas (int, optional): Number of processes participating in distributed training.
rank (int, optional): Rank of the current process within ``num_replicas``.
sampler: Input torch data sampler.
num_replicas: Number of processes participating in distributed training.
rank: Rank of the current process within ``num_replicas``.

"""

Expand Down
20 changes: 10 additions & 10 deletions ignite/distributed/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,25 +152,25 @@ def training(local_rank, config, **kwargs):
.. _horovodrun: https://horovod.readthedocs.io/en/latest/api.html#module-horovod.run

Args:
backend (str, optional): backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
configuration.
nproc_per_node (int, optional): optional argument, number of processes per
nproc_per_node: optional argument, number of processes per
node to specify. If not None, :meth:`~ignite.distributed.Parallel.run` will spawn ``nproc_per_node``
processes that run input function with its arguments.
nnodes (int, optional): optional argument, number of nodes participating in distributed configuration.
nnodes: optional argument, number of nodes participating in distributed configuration.
If not None, :meth:`~ignite.distributed.Parallel.run` will spawn ``nproc_per_node``
processes that run input function with its arguments. Total world size is `nproc_per_node * nnodes`.
This option is only supported by native torch distributed module. For other modules, please setup
``spawn_kwargs`` with backend specific arguments.
node_rank (int, optional): optional argument, current machine index. Mandatory argument if ``nnodes`` is
node_rank: optional argument, current machine index. Mandatory argument if ``nnodes`` is
specified and larger than one.
This option is only supported by native torch distributed module. For other modules, please setup
``spawn_kwargs`` with backend specific arguments.
master_addr (str, optional): optional argument, master node TCP/IP address for torch native backends
master_addr: optional argument, master node TCP/IP address for torch native backends
(`nccl`, `gloo`). Mandatory argument if ``nnodes`` is specified and larger than one.
master_port (int, optional): optional argument, master node port for torch native backends
master_port: optional argument, master node port for torch native backends
(`nccl`, `gloo`). Mandatory argument if ``master_addr`` is specified.
**spawn_kwargs: kwargs to ``idist.spawn`` function.
spawn_kwargs: kwargs to ``idist.spawn`` function.

.. versionchanged:: 0.4.2
``backend`` now accepts `horovod` distributed framework.
Expand Down Expand Up @@ -264,10 +264,10 @@ def training(local_rank, config, **kwargs):
parallel.run(training, config, a=1, b=2)

Args:
func (Callable): function to execute. First argument of the function should be `local_rank` - local process
func: function to execute. First argument of the function should be `local_rank` - local process
index.
*args: positional arguments of ``func`` (without `local_rank`).
**kwargs: keyword arguments of ``func``.
args: positional arguments of ``func`` (without `local_rank`).
kwargs: keyword arguments of ``func``.

"""
if self._spawn_params is not None and self.backend is not None:
Expand Down
34 changes: 17 additions & 17 deletions ignite/distributed/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def sync(temporary: bool = False) -> None:
This method should be used when distributed context is manually created or destroyed.

Args:
temporary (bool): If True, distributed model synchronization is done every call of ``idist.get_*`` methods.
temporary: If True, distributed model synchronization is done every call of ``idist.get_*`` methods.
This may have a negative performance impact.
"""
global _model
Expand Down Expand Up @@ -285,15 +285,15 @@ def train_fn(local_rank, a, b, c, d=12):
idist.spawn("xla-tpu", train_fn, args=(a, b, c), kwargs_dict={"d": 23}, nproc_per_node=8)

Args:
backend (str): backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`
fn (function): function to called as the entrypoint of the spawned process.
backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`
fn: function to called as the entrypoint of the spawned process.
This function must be defined at the top level of a module so it can be pickled and spawned.
This is a requirement imposed by multiprocessing. The function is called as ``fn(i, *args, **kwargs_dict)``,
where `i` is the process index and args is the passed through tuple of arguments.
args (tuple): arguments passed to `fn`.
kwargs_dict (Mapping): kwargs passed to `fn`.
nproc_per_node (int): number of processes to spawn on a single node. Default, 1.
**kwargs: acceptable kwargs according to provided backend:
args: arguments passed to `fn`.
kwargs_dict: kwargs passed to `fn`.
nproc_per_node: number of processes to spawn on a single node. Default, 1.
kwargs: acceptable kwargs according to provided backend:

- | "nccl" or "gloo" : `nnodes` (default, 1), `node_rank` (default, 0), `master_addr`
| (default, "127.0.0.1"), `master_port` (default, 2222), `timeout` to `dist.init_process_group`_ function
Expand Down Expand Up @@ -329,8 +329,8 @@ def all_reduce(tensor: Union[torch.Tensor, float], op: str = "SUM") -> Union[tor
"""Helper method to perform all reduce operation.

Args:
tensor (torch.Tensor or number): tensor or number to collect across participating processes.
op (str): reduction operation, "SUM" by default. Possible values: "SUM", "PRODUCT", "MIN", "MAX", "AND", "OR".
tensor: tensor or number to collect across participating processes.
op: reduction operation, "SUM" by default. Possible values: "SUM", "PRODUCT", "MIN", "MAX", "AND", "OR".
Please, several values are not supported for the backend like "horovod".

Returns:
Expand All @@ -347,7 +347,7 @@ def all_gather(tensor: Union[torch.Tensor, float, str]) -> Union[torch.Tensor, f
"""Helper method to perform all gather operation.

Args:
tensor (torch.Tensor or number or str): tensor or number or str to collect across participating processes.
tensor: tensor or number or str to collect across participating processes.

Returns:
torch.Tensor of shape ``(world_size * tensor.shape[0], tensor.shape[1], ...)`` if input is a tensor or
Expand All @@ -365,9 +365,9 @@ def broadcast(tensor: Union[torch.Tensor, float, str], src: int = 0) -> Union[to
"""Helper method to perform broadcast operation.

Args:
tensor (torch.Tensor or number or str): tensor or number or str to broadcast to participating processes.
tensor: tensor or number or str to broadcast to participating processes.
Make sure to respect dtype of torch tensor input for all processes, otherwise execution will crash.
src (int): source rank. Default, 0.
src: source rank. Default, 0.

Returns:
torch.Tensor or string or number
Expand Down Expand Up @@ -434,7 +434,7 @@ def run(local_rank, *args, **kwargs):
# ...

Args:
index (int): local rank or current process index
index: local rank or current process index

"""
from ignite.distributed.comp_models.base import ComputationModel
Expand Down Expand Up @@ -487,8 +487,8 @@ def train_fn(local_rank, a, b, c):


Args:
backend (str, optional): backend: `nccl`, `gloo`, `xla-tpu`, `horovod`.
**kwargs: acceptable kwargs according to provided backend:
backend: backend: `nccl`, `gloo`, `xla-tpu`, `horovod`.
kwargs: acceptable kwargs according to provided backend:

- "nccl" or "gloo" : timeout(=timedelta(minutes=30)).

Expand Down Expand Up @@ -543,8 +543,8 @@ def one_rank_only(rank: int = 0, with_barrier: bool = False) -> Callable:
"""Decorator to filter handlers wrt a rank number

Args:
rank (int): rank number of the handler (default: 0).
with_barrier (bool): synchronisation with a barrier (default: False).
rank: rank number of the handler (default: 0).
with_barrier: synchronisation with a barrier (default: False).

.. code-block:: python

Expand Down