diff --git a/ignite/distributed/auto.py b/ignite/distributed/auto.py
index 9764e38db6f7..1d6a585b0e63 100644
--- a/ignite/distributed/auto.py
+++ b/ignite/distributed/auto.py
@@ -50,8 +50,8 @@ def auto_dataloader(dataset: Dataset, **kwargs: Any) -> Union[DataLoader, "_MpDe
         )
 
     Args:
-        dataset (Dataset): input torch dataset
-        **kwargs: keyword arguments for `torch DataLoader`_.
+        dataset: input torch dataset
+        kwargs: keyword arguments for `torch DataLoader`_.
 
     Returns:
         `torch DataLoader`_ or `XLA MpDeviceLoader`_ for XLA devices
@@ -154,11 +154,11 @@ def auto_model(model: nn.Module, sync_bn: bool = False, **kwargs: Any) -> nn.Mod
         model = idist.auto_model(model)
 
     Args:
-        model (torch.nn.Module): model to adapt.
-        sync_bn (bool): if True, applies `torch convert_sync_batchnorm`_ to the model for native torch
+        model: model to adapt.
+        sync_bn: if True, applies `torch convert_sync_batchnorm`_ to the model for native torch
             distributed only. Default, False. Note, if using Nvidia/Apex, batchnorm conversion should be
             applied before calling ``amp.initialize``.
-        **kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_
+        kwargs: kwargs to model's wrapping class: `torch DistributedDataParallel`_ or `torch DataParallel`_
             if applicable. Please, make sure to use acceptable kwargs for given backend.
 
     Returns:
@@ -241,7 +241,7 @@ def auto_optim(optimizer: Optimizer) -> Optimizer:
         optimizer = idist.auto_optim(optimizer)
 
     Args:
-        optimizer (Optimizer): input torch optimizer
+        optimizer: input torch optimizer
 
     Returns:
         Optimizer
@@ -276,9 +276,9 @@ class DistributedProxySampler(DistributedSampler):
         Input sampler is assumed to have a constant size.
 
     Args:
-        sampler (Sampler): Input torch data sampler.
-        num_replicas (int, optional): Number of processes participating in distributed training.
-        rank (int, optional): Rank of the current process within ``num_replicas``.
+        sampler: Input torch data sampler.
+        num_replicas: Number of processes participating in distributed training.
+        rank: Rank of the current process within ``num_replicas``.
 
     """
 
diff --git a/ignite/distributed/launcher.py b/ignite/distributed/launcher.py
index 28f1a036c842..4d24979fee47 100644
--- a/ignite/distributed/launcher.py
+++ b/ignite/distributed/launcher.py
@@ -152,25 +152,25 @@ def training(local_rank, config, **kwargs):
     .. _horovodrun: https://horovod.readthedocs.io/en/latest/api.html#module-horovod.run
 
     Args:
-        backend (str, optional): backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
+        backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`. If None, no distributed
             configuration.
-        nproc_per_node (int, optional): optional argument, number of processes per
+        nproc_per_node: optional argument, number of processes per
             node to specify. If not None, :meth:`~ignite.distributed.Parallel.run` will spawn ``nproc_per_node``
             processes that run input function with its arguments.
-        nnodes (int, optional): optional argument, number of nodes participating in distributed configuration.
+        nnodes: optional argument, number of nodes participating in distributed configuration.
             If not None, :meth:`~ignite.distributed.Parallel.run` will spawn ``nproc_per_node``
             processes that run input function with its arguments. Total world size is `nproc_per_node * nnodes`.
             This option is only supported by native torch distributed module. For other modules, please setup
             ``spawn_kwargs`` with backend specific arguments.
-        node_rank (int, optional): optional argument, current machine index. Mandatory argument if ``nnodes`` is
+        node_rank: optional argument, current machine index. Mandatory argument if ``nnodes`` is
             specified and larger than one.
             This option is only supported by native torch distributed module. For other modules, please setup
             ``spawn_kwargs`` with backend specific arguments.
-        master_addr (str, optional): optional argument, master node TCP/IP address for torch native backends
+        master_addr: optional argument, master node TCP/IP address for torch native backends
             (`nccl`, `gloo`). Mandatory argument if ``nnodes`` is specified and larger than one.
-        master_port (int, optional): optional argument, master node port for torch native backends
+        master_port: optional argument, master node port for torch native backends
             (`nccl`, `gloo`). Mandatory argument if ``master_addr`` is specified.
-        **spawn_kwargs: kwargs to ``idist.spawn`` function.
+        spawn_kwargs: kwargs to ``idist.spawn`` function.
 
     .. versionchanged:: 0.4.2
         ``backend`` now accepts `horovod` distributed framework.
@@ -264,10 +264,10 @@ def training(local_rank, config, **kwargs):
                 parallel.run(training, config, a=1, b=2)
 
         Args:
-            func (Callable): function to execute. First argument of the function should be `local_rank` - local process
+            func: function to execute. First argument of the function should be `local_rank` - local process
                 index.
-            *args: positional arguments of ``func`` (without `local_rank`).
-            **kwargs: keyword arguments of ``func``.
+            args: positional arguments of ``func`` (without `local_rank`).
+            kwargs: keyword arguments of ``func``.
 
         """
         if self._spawn_params is not None and self.backend is not None:
diff --git a/ignite/distributed/utils.py b/ignite/distributed/utils.py
index 793d89e13793..1376ce102930 100644
--- a/ignite/distributed/utils.py
+++ b/ignite/distributed/utils.py
@@ -52,7 +52,7 @@ def sync(temporary: bool = False) -> None:
     This method should be used when distributed context is manually created or destroyed.
 
     Args:
-        temporary (bool): If True, distributed model synchronization is done every call of ``idist.get_*`` methods.
+        temporary: If True, distributed model synchronization is done every call of ``idist.get_*`` methods.
             This may have a negative performance impact.
     """
     global _model
@@ -285,15 +285,15 @@ def train_fn(local_rank, a, b, c, d=12):
             idist.spawn("xla-tpu", train_fn, args=(a, b, c), kwargs_dict={"d": 23}, nproc_per_node=8)
 
     Args:
-        backend (str): backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`
-        fn (function): function to called as the entrypoint of the spawned process.
+        backend: backend to use: `nccl`, `gloo`, `xla-tpu`, `horovod`
+        fn: function to called as the entrypoint of the spawned process.
             This function must be defined at the top level of a module so it can be pickled and spawned.
             This is a requirement imposed by multiprocessing. The function is called as ``fn(i, *args, **kwargs_dict)``,
             where `i` is the process index and args is the passed through tuple of arguments.
-        args (tuple): arguments passed to `fn`.
-        kwargs_dict (Mapping): kwargs passed to `fn`.
-        nproc_per_node (int): number of processes to spawn on a single node. Default, 1.
-        **kwargs: acceptable kwargs according to provided backend:
+        args: arguments passed to `fn`.
+        kwargs_dict: kwargs passed to `fn`.
+        nproc_per_node: number of processes to spawn on a single node. Default, 1.
+        kwargs: acceptable kwargs according to provided backend:
 
             - | "nccl" or "gloo" : `nnodes` (default, 1), `node_rank` (default, 0), `master_addr`
               | (default, "127.0.0.1"), `master_port` (default, 2222), `timeout` to `dist.init_process_group`_ function
@@ -329,8 +329,8 @@ def all_reduce(tensor: Union[torch.Tensor, float], op: str = "SUM") -> Union[tor
     """Helper method to perform all reduce operation.
 
     Args:
-        tensor (torch.Tensor or number): tensor or number to collect across participating processes.
-        op (str): reduction operation, "SUM" by default. Possible values: "SUM", "PRODUCT", "MIN", "MAX", "AND", "OR".
+        tensor: tensor or number to collect across participating processes.
+        op: reduction operation, "SUM" by default. Possible values: "SUM", "PRODUCT", "MIN", "MAX", "AND", "OR".
             Please, several values are not supported for the backend like "horovod".
 
     Returns:
@@ -347,7 +347,7 @@ def all_gather(tensor: Union[torch.Tensor, float, str]) -> Union[torch.Tensor, f
     """Helper method to perform all gather operation.
 
     Args:
-        tensor (torch.Tensor or number or str): tensor or number or str to collect across participating processes.
+        tensor: tensor or number or str to collect across participating processes.
 
     Returns:
         torch.Tensor of shape ``(world_size * tensor.shape[0], tensor.shape[1], ...)`` if input is a tensor or
@@ -365,9 +365,9 @@ def broadcast(tensor: Union[torch.Tensor, float, str], src: int = 0) -> Union[to
     """Helper method to perform broadcast operation.
 
     Args:
-        tensor (torch.Tensor or number or str): tensor or number or str to broadcast to participating processes.
+        tensor: tensor or number or str to broadcast to participating processes.
             Make sure to respect dtype of torch tensor input for all processes, otherwise execution will crash.
-        src (int): source rank. Default, 0.
+        src: source rank. Default, 0.
 
     Returns:
         torch.Tensor or string or number
@@ -434,7 +434,7 @@ def run(local_rank, *args, **kwargs):
                 # ...
 
     Args:
-        index (int): local rank or current process index
+        index: local rank or current process index
 
     """
     from ignite.distributed.comp_models.base import ComputationModel
@@ -487,8 +487,8 @@ def train_fn(local_rank, a, b, c):
 
 
     Args:
-        backend (str, optional): backend: `nccl`, `gloo`, `xla-tpu`, `horovod`.
-        **kwargs: acceptable kwargs according to provided backend:
+        backend: backend: `nccl`, `gloo`, `xla-tpu`, `horovod`.
+        kwargs: acceptable kwargs according to provided backend:
 
             - "nccl" or "gloo" : timeout(=timedelta(minutes=30)).
 
@@ -543,8 +543,8 @@ def one_rank_only(rank: int = 0, with_barrier: bool = False) -> Callable:
     """Decorator to filter handlers wrt a rank number
 
     Args:
-        rank (int): rank number of the handler (default: 0).
-        with_barrier (bool): synchronisation with a barrier (default: False).
+        rank: rank number of the handler (default: 0).
+        with_barrier: synchronisation with a barrier (default: False).
 
     .. code-block:: python