diff --git a/ignite/distributed/auto.py b/ignite/distributed/auto.py index 9764e38db6f7..1d6a585b0e63 100644 --- a/ignite/distributed/auto.py +++ b/ignite/distributed/auto.py @@ -50,8 +50,8 @@ def auto_dataloader(dataset: Dataset, **kwargs: Any) -> Union[DataLoader, "_MpDe ) Args: - dataset (Dataset): input torch dataset - **kwargs: keyword arguments for `torch DataLoader`_. + dataset: input torch dataset + kwargs: keyword arguments for `torch DataLoader`_. Returns: `torch DataLoader`_ or `XLA MpDeviceLoader`_ for XLA devices @@ -154,11 +154,11 @@ def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Mod model = idist.auto_model(model) Args: - model (torch.nn.Module): model to adapt. - sync_bn (bool): if True, applies `torch convert_sync_batchnorm`_ to the model for native torch + model: model to adapt. + sync_bn: if True, applies `torch convert_sync_batchnorm`_ to the model for native torch distributed only. Default, False. Note, if using Nvidia/Apex, batchnorm conversion should be applied before calling ``amp.initialize``. - **kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_ + kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_ if applicable. Please, make sure to use acceptable kwargs for given backend. Returns: @@ -241,7 +241,7 @@ def auto_optim(optimizer: Optimizer) -> Optimizer: optimizer = idist.auto_optim(optimizer) Args: - optimizer (Optimizer): input torch optimizer + optimizer: input torch optimizer Returns: Optimizer @@ -276,9 +276,9 @@ class DistributedProxySampler(DistributedSampler): Input sampler is assumed to have a constant size. Args: - sampler (Sampler): Input torch data sampler. - num_replicas (int, optional): Number of processes participating in distributed training. - rank (int, optional): Rank of the current process within ``num_replicas``. + sampler: Input torch data sampler. + num_replicas: Number of processes participating in distributed training. + rank: Rank of the current process within ``num_replicas``. """ diff --git a/ignite/distributed/launcher.py b/ignite/distributed/launcher.py index 28f1a036c842..4d24979fee47 100644 --- a/ignite/distributed/launcher.py +++ b/ignite/distributed/launcher.py @@ -152,25 +152,25 @@ def training(local_rank, config, **kwargs): .. _horovodrun: https://horovod.readthedocs.io/en/latest/api.html#module-horovod.run Args: - backend (str, optional): backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed + backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed configuration. - nproc_per_node (int, optional): optional argument, number of processes per + nproc_per_node: optional argument, number of processes per node to specify. If not None, :meth:`~ignite.distributed.Parallel.run` will spawn ``nproc_per_node`` processes that run input function with its arguments. - nnodes (int, optional): optional argument, number of nodes participating in distributed configuration. + nnodes: optional argument, number of nodes participating in distributed configuration. If not None, :meth:`~ignite.distributed.Parallel.run` will spawn ``nproc_per_node`` processes that run input function with its arguments. Total world size is `nproc_per_node * nnodes`. This option is only supported by native torch distributed module. For other modules, please setup ``spawn_kwargs`` with backend specific arguments. - node_rank (int, optional): optional argument, current machine index. Mandatory argument if ``nnodes`` is + node_rank: optional argument, current machine index. Mandatory argument if ``nnodes`` is specified and larger than one. This option is only supported by native torch distributed module. For other modules, please setup ``spawn_kwargs`` with backend specific arguments. - master_addr (str, optional): optional argument, master node TCP/IP address for torch native backends + master_addr: optional argument, master node TCP/IP address for torch native backends (`nccl`, `gloo`). Mandatory argument if ``nnodes`` is specified and larger than one. - master_port (int, optional): optional argument, master node port for torch native backends + master_port: optional argument, master node port for torch native backends (`nccl`, `gloo`). Mandatory argument if ``master_addr`` is specified. - **spawn_kwargs: kwargs to ``idist.spawn`` function. + spawn_kwargs: kwargs to ``idist.spawn`` function. .. versionchanged:: 0.4.2 ``backend`` now accepts `horovod` distributed framework. @@ -264,10 +264,10 @@ def training(local_rank, config, **kwargs): parallel.run(training, config, a=1, b=2) Args: - func (Callable): function to execute. First argument of the function should be `local_rank` - local process + func: function to execute. First argument of the function should be `local_rank` - local process index. - *args: positional arguments of ``func`` (without `local_rank`). - **kwargs: keyword arguments of ``func``. + args: positional arguments of ``func`` (without `local_rank`). + kwargs: keyword arguments of ``func``. """ if self._spawn_params is not None and self.backend is not None: diff --git a/ignite/distributed/utils.py b/ignite/distributed/utils.py index 793d89e13793..1376ce102930 100644 --- a/ignite/distributed/utils.py +++ b/ignite/distributed/utils.py @@ -52,7 +52,7 @@ def sync(temporary: bool = False) -> None: This method should be used when distributed context is manually created or destroyed. Args: - temporary (bool): If True, distributed model synchronization is done every call of ``idist.get_*`` methods. + temporary: If True, distributed model synchronization is done every call of ``idist.get_*`` methods. This may have a negative performance impact. """ global _model @@ -285,15 +285,15 @@ def train_fn(local_rank, a, b, c, d=12): idist.spawn("xla-tpu", train_fn, args=(a, b, c), kwargs_dict={"d": 23}, nproc_per_node=8) Args: - backend (str): backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod` - fn (function): function to called as the entrypoint of the spawned process. + backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod` + fn: function to called as the entrypoint of the spawned process. This function must be defined at the top level of a module so it can be pickled and spawned. This is a requirement imposed by multiprocessing. The function is called as ``fn(i, *args, **kwargs_dict)``, where `i` is the process index and args is the passed through tuple of arguments. - args (tuple): arguments passed to `fn`. - kwargs_dict (Mapping): kwargs passed to `fn`. - nproc_per_node (int): number of processes to spawn on a single node. Default, 1. - **kwargs: acceptable kwargs according to provided backend: + args: arguments passed to `fn`. + kwargs_dict: kwargs passed to `fn`. + nproc_per_node: number of processes to spawn on a single node. Default, 1. + kwargs: acceptable kwargs according to provided backend: - | "nccl" or "gloo" : `nnodes` (default, 1), `node_rank` (default, 0), `master_addr` | (default, "127.0.0.1"), `master_port` (default, 2222), `timeout` to `dist.init_process_group`_ function @@ -329,8 +329,8 @@ def all_reduce(tensor: Union[torch.Tensor, float], op: str = "SUM") -> Union[tor """Helper method to perform all reduce operation. Args: - tensor (torch.Tensor or number): tensor or number to collect across participating processes. - op (str): reduction operation, "SUM" by default. Possible values: "SUM", "PRODUCT", "MIN", "MAX", "AND", "OR". + tensor: tensor or number to collect across participating processes. + op: reduction operation, "SUM" by default. Possible values: "SUM", "PRODUCT", "MIN", "MAX", "AND", "OR". Please, several values are not supported for the backend like "horovod". Returns: @@ -347,7 +347,7 @@ def all_gather(tensor: Union[torch.Tensor, float, str]) -> Union[torch.Tensor, f """Helper method to perform all gather operation. Args: - tensor (torch.Tensor or number or str): tensor or number or str to collect across participating processes. + tensor: tensor or number or str to collect across participating processes. Returns: torch.Tensor of shape ``(world_size * tensor.shape[0], tensor.shape[1], ...)`` if input is a tensor or @@ -365,9 +365,9 @@ def broadcast(tensor: Union[torch.Tensor, float, str], src: int = 0) -> Union[to """Helper method to perform broadcast operation. Args: - tensor (torch.Tensor or number or str): tensor or number or str to broadcast to participating processes. + tensor: tensor or number or str to broadcast to participating processes. Make sure to respect dtype of torch tensor input for all processes, otherwise execution will crash. - src (int): source rank. Default, 0. + src: source rank. Default, 0. Returns: torch.Tensor or string or number @@ -434,7 +434,7 @@ def run(local_rank, *args, **kwargs): # ... Args: - index (int): local rank or current process index + index: local rank or current process index """ from ignite.distributed.comp_models.base import ComputationModel @@ -487,8 +487,8 @@ def train_fn(local_rank, a, b, c): Args: - backend (str, optional): backend: `nccl`, `gloo`, `xla-tpu`, `horovod`. - **kwargs: acceptable kwargs according to provided backend: + backend: backend: `nccl`, `gloo`, `xla-tpu`, `horovod`. + kwargs: acceptable kwargs according to provided backend: - "nccl" or "gloo" : timeout(=timedelta(minutes=30)). @@ -543,8 +543,8 @@ def one_rank_only(rank: int = 0, with_barrier: bool = False) -> Callable: """Decorator to filter handlers wrt a rank number Args: - rank (int): rank number of the handler (default: 0). - with_barrier (bool): synchronisation with a barrier (default: False). + rank: rank number of the handler (default: 0). + with_barrier: synchronisation with a barrier (default: False). .. code-block:: python