ZhiyuanChen · Mar 24, 2025
diff --git a/‎.github/workflows/push.yaml
+1-1 b/‎.github/workflows/push.yaml
+1-1
diff --git a/‎danling/__init__.py
+7-3 b/‎danling/__init__.py
+7-3
diff --git a/‎danling/runner/defaults.py ‎danling/defaults.py
+8-7 b/‎danling/runner/defaults.py ‎danling/defaults.py
+8-7
diff --git a/‎danling/modules/mlp/dense.py
+3-3 b/‎danling/modules/mlp/dense.py
+3-3
diff --git a/‎danling/runner/README.md
+7-7 b/‎danling/runner/README.md
+7-7
diff --git a/‎danling/runner/__init__.py
+7-4 b/‎danling/runner/__init__.py
+7-4
diff --git a/‎danling/runner/accelerate_runner.py
+115-492 b/‎danling/runner/accelerate_runner.py
+115-492
diff --git a/‎danling/runner/base_runner.py
+547-404 b/‎danling/runner/base_runner.py
+547-404
diff --git a/‎danling/runner/state.py ‎danling/runner/config.py
+22-41 b/‎danling/runner/state.py ‎danling/runner/config.py
+22-41
diff --git a/‎danling/runner/deepspeed_runner.py
+211 b/‎danling/runner/deepspeed_runner.py
+211
diff --git a/‎danling/runner/runner.py
+79 b/‎danling/runner/runner.py
+79
diff --git a/‎danling/runner/torch_runner.py
+662-2 b/‎danling/runner/torch_runner.py
+662-2
diff --git a/‎danling/runner/utils.py
+47-4 b/‎danling/runner/utils.py
+47-4
diff --git a/‎danling/tensors/nested_tensor.py
+3 b/‎danling/tensors/nested_tensor.py
+3
diff --git a/‎danling/utils/__init__.py
-2 b/‎danling/utils/__init__.py
-2
diff --git a/‎demo/accelerate_imdb.py
+121 b/‎demo/accelerate_imdb.py
+121
diff --git a/‎demo/vision/torch_mnist.py ‎demo/torch_mnist.py
+8-8 b/‎demo/vision/torch_mnist.py ‎demo/torch_mnist.py
+8-8
diff --git a/‎docs/docs/runner/state.md ‎docs/docs/runner/config.md
+2-2 b/‎docs/docs/runner/state.md ‎docs/docs/runner/config.md
+2-2
diff --git a/‎docs/docs/runner/deepspeed_runner.md
+9 b/‎docs/docs/runner/deepspeed_runner.md
+9
diff --git a/‎docs/docs/runner/runner.md
+9 b/‎docs/docs/runner/runner.md
+9
diff --git a/‎docs/docs/runner/torch_runner.md
+9 b/‎docs/docs/runner/torch_runner.md
+9
diff --git a/‎docs/mkdocs.yml
+6-2 b/‎docs/mkdocs.yml
+6-2
diff --git a/‎pyproject.toml
+12-1 b/‎pyproject.toml
+12-1
diff --git a/‎requirements.txt
+12 b/‎requirements.txt
+12
diff --git a/‎tests/optim/test_lr_scheduler.py
+2-2 b/‎tests/optim/test_lr_scheduler.py
+2-2
diff --git a/‎tests/runner/test_base_runner.py
+3-4 b/‎tests/runner/test_base_runner.py
+3-4
diff --git a/‎danling/utils/defaults.py ‎tests/runner/test_imdb.py
+16-1 b/‎danling/utils/defaults.py ‎tests/runner/test_imdb.py
+16-1
diff --git a/‎tests/runner/test_mnist.py
+1-1 b/‎tests/runner/test_mnist.py
+1-1
@@ -24,7 +24,7 @@ jobs:
       - name: Install dependencies
         run: pip install -r requirements.txt && pip install -e .
       - name: Install dependencies for testing
-        run: pip install pytest pytest-cov torch torcheval torchmetrics torchvision accelerate
+        run: pip install pytest pytest-cov
       - name: pytest
         run: pytest --cov=materialx --cov-report=xml --cov-report=html .
       - name: Upload coverage report for documentation
 
@@ -19,7 +19,7 @@
 
 from lazy_imports import try_import
 
-from danling import metrics, modules, optim, registry, runner, tensors, typing, utils
+from danling import defaults, metrics, modules, optim, registry, runner, tensors, typing, utils
 
 from .metrics import (
     AverageMeter,
@@ -31,7 +31,7 @@
 )
 from .optim import LRScheduler
 from .registry import GlobalRegistry, Registry
-from .runner import AccelerateRunner, BaseRunner, TorchRunner
+from .runner import AccelerateRunner, BaseRunner, Config, DeepSpeedRunner, Runner, TorchRunner
 from .tensors import NestedTensor, PNTensor, tensor
 from .utils import (
     catch,
@@ -49,6 +49,7 @@
     from .metrics import Metrics, MultiTaskMetrics
 
 __all__ = [
+    "defaults",
     "metrics",
     "modules",
     "optim",
@@ -57,9 +58,12 @@
     "tensors",
     "utils",
     "typing",
+    "Config",
+    "Runner",
     "BaseRunner",
-    "AccelerateRunner",
     "TorchRunner",
+    "AccelerateRunner",
+    "DeepSpeedRunner",
     "LRScheduler",
     "Registry",
     "GlobalRegistry",
 
@@ -17,14 +17,15 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 # See the LICENSE file for more details.
 
-DEFAULT_RUN_NAME = "Run"
-DEFAULT_EXPERIMENT_NAME = "DanLing"
-DEFAULT_EXPERIMENT_ID = "xxxxxxxxxxxxxxxx"
-DEFAULT_IGNORED_KEYS_IN_HASH = {
+RUN_NAME = "Run"
+EXPERIMENT_NAME = "DanLing"
+EXPERIMENT_ID = "xxxxxxxxxxxxxxxx"
+SEED = 1016
+IGNORED_NAMES_IN_METRICS = ("index", "epochs", "steps")
+IGNORED_NAMES_IN_HASH = {
     "timestamp",
-    "iters",
-    "steps",
-    "epochs",
+    "epoch",
+    "step",
     "results",
     "score_split",
     "score",
 
@@ -35,10 +35,10 @@ def __init__(
         super().__init__()
         self.residual = residual
         self.linear = nn.Linear(in_features, out_features, bias=bias)
-        self.norm = getattr(nn, norm)(out_features) if norm else nn.Identity()
-        self.activation = getattr(nn, activation)() if activation else nn.Identity()
+        self.norm = getattr(nn, norm)(out_features) if norm else None
+        self.activation = getattr(nn, activation)() if activation else None
         self.dropout = nn.Dropout(dropout)
-        self.pool = getattr(nn, pool)(out_features) if pool else nn.Identity() if self.residual else None
+        self.pool = getattr(nn, pool)(out_features) if self.residual else None
 
     def forward(self, x):
         out = self.linear(x)
 
@@ -4,26 +4,26 @@ The Runner of DanLing sets up the basic environment for running neural networks.
 
 ## Components
 
-For cross-platform compatibilities, DanLing features a two-level Runner + RunnerState system.
+For cross-platform compatibilities, DanLing features a two-level Runner + Config system.
 
 ### PlatformRunner
 
 PlatformRunner implements platform-specific features like `step` and `prepare`.
 
-The Runner contains all runtime information that is irrelevant to the checkpoint (e.g. `world_size`, `rank`, etc.). All other information should be saved in `RunnerState`.
+The Runner contains all runtime information that is irrelevant to the checkpoint (e.g. `world_size`, `rank`, etc.). All other information should be saved in `Config`.
 
 Currently, only [`AccelerateRunner`][danling.runner.AccelerateRunner] is supported.
 
 ### [`BaseRunner`][danling.runner.BaseRunner]
 
-[`BaseRunner`](danling.runner.BaseRunner) defines shared attributes and implements platform-agnostic features, including `init_logging`, `results` and `scores`.
+[`BaseRunner`][danling.runner.BaseRunner] defines shared attributes and implements platform-agnostic features, including `init_logging`, `results` and `scores`.
 
-### [`RunnerState`][danling.runner.RunnerState]
+### [`Config`][danling.runner.Config]
 
-[`RunnerState`][danling.runner.RunnerState] stores the state of a run (e.g. `epochs`, `run_id`, `network`, etc.).
+[`Config`][danling.runner.Config] stores the state of a run (e.g. `epoch`, `run_id`, `network`, etc.).
 
-With `RunnerState` and corresponding weights, you can resume a run from any point.
-Therefore, all members in `RunnerState` will be saved in the checkpoint, and thus should be json serialisable.
+With `Config` and corresponding weights, you can resume a run from any point.
+Therefore, all members in `Config` will be saved in the checkpoint, and thus should be json serialisable.
 
 ## Experiments Management
 
 
@@ -17,19 +17,22 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 # See the LICENSE file for more details.
 
-from . import defaults
 from .accelerate_runner import AccelerateRunner
 from .base_runner import BaseRunner
-from .state import RunnerState
+from .config import Config
+from .deepspeed_runner import DeepSpeedRunner
+from .runner import Runner
 from .torch_runner import TorchRunner
 from .utils import on_local_main_process, on_main_process
 
 __all__ = [
-    "RunnerState",
+    "Config",
+    "Runner",
     "BaseRunner",
+    "TorchRunner",
     "AccelerateRunner",
+    "DeepSpeedRunner",
     "TorchRunner",
     "on_main_process",
     "on_local_main_process",
-    "defaults",
 ]
@@ -19,27 +19,27 @@
 
 from __future__ import annotations
 
-from random import randint
 from typing import Optional
 from uuid import UUID, uuid5
 
-from chanfig import NestedDict
+import chanfig
+
+from danling import defaults
 
-from . import defaults
 from .utils import get_git_hash
 
 
-class RunnerState(NestedDict):  # pylint: disable=too-many-instance-attributes
+class Config(chanfig.Config):  # pylint: disable=too-many-instance-attributes
     r"""
-    `RunnerState` is a `NestedDict` that contains all states of a `Runner`.
+    `Config` is a [`Config`][chanfig.Config] that contains all states of a `Runner`.
 
-    `RunnerState` is designed to store all critical information of a Run so that you can resume a run
+    `Config` is designed to store all critical information of a Run so that you can resume a run
     from a state and corresponding weights or even restart a run from a state.
 
-    `RunnerState` is also designed to be serialisable and hashable, so that you can save it to a file.
-    `RunnerState` is saved in checkpoint together with weights by default.
+    `Config` is also designed to be serialisable and hashable, so that you can save it to a file.
+    `Config` is saved in checkpoint together with weights by default.
 
-    Since `RunnerState` is a [`NestedDict`][chanfig.NestedDict], you can access its attributes by
+    Since `Config` is a [`Config`][chanfig.Config], you can access its attributes by
     `state["key"]` or `state.key`.
 
     Attributes: General:
@@ -59,18 +59,14 @@ class RunnerState(NestedDict):  # pylint: disable=too-many-instance-attributes
             Defaults to `False`.
 
     Attributes: Progress:
-        iters (int): The number of data samples processed.
-            equals to `steps` when `batch_size = 1`.
-        steps (int): The number of `step` calls.
+        steps (int): The number of `steps` calls.
         epochs (int): The number of complete passes over the datasets.
-        iter_end (int): End running iters.
-            Note that `step_end` not initialised since this variable may not apply to some Runners.
-        step_end (int): End running steps.
+        step_end (int): End running step.
             Note that `step_end` not initialised since this variable may not apply to some Runners.
-        epoch_end (int): End running epochs.
+        epoch_end (int): End running epoch.
             Note that `epoch_end` not initialised since this variable may not apply to some Runners.
 
-    In general you should only use one of `iter_end`, `step_end`, `epoch_end` to indicate the length of running.
+    In general you should use either `step_end` or `epoch_end` to indicate the length of running.
 
     Attributes: IO:
         project_root (str): The root directory for all experiments.
@@ -98,29 +94,26 @@ class RunnerState(NestedDict):  # pylint: disable=too-many-instance-attributes
             If <= 0, save only the latest and the best checkpoints.
 
     Note:
-        `RunnerState` is a `NestedDict`, so you can access its attributes by `state["name"]` or `state.name`.
+        `Config` is a [`Config`][chanfig.Config], so you can access its attributes by `state["name"]` or `state.name`.
 
     See Also:
         [`BaseRunner`][danling.runner.BaseRunner]: The base runner class.
     """
 
     # DO NOT set default value in class, as they won't be stored in `__dict__`.
 
-    run_name: str = defaults.DEFAULT_RUN_NAME
+    run_name: str = defaults.RUN_NAME
     run_id: str
-    experiment_name: str = defaults.DEFAULT_EXPERIMENT_NAME
+    experiment_name: str = defaults.EXPERIMENT_NAME
     experiment_id: str
 
-    seed: int
+    seed: Optional[int] = None
     deterministic: bool = False
 
-    iters: int = 0
     steps: int = 0
     epochs: int = 0
-    iter_begin: int = 0
     step_begin: int = 0
     epoch_begin: int = 0
-    iter_end: Optional[int] = None
     step_end: Optional[int] = None
     epoch_end: Optional[int] = None
 
@@ -134,24 +127,12 @@ class RunnerState(NestedDict):  # pylint: disable=too-many-instance-attributes
     log_interval: Optional[int] = None
     save_interval: Optional[int] = None
 
-    distributed: Optional[bool] = None
-    dist_backend: Optional[str] = None
-    init_method: Optional[str] = None
-    master_addr: Optional[str] = None
-    master_port: Optional[int] = None
-
-    def __init__(self, *args, **kwargs):
-        for k, v in self.__class__.__dict__.items():
-            if not (k.startswith("__") and k.endswith("__")) and (not (isinstance(v, property) or callable(v))):
-                self.set(k, v)
-        if "seed" not in self:
-            self.seed = randint(0, 2**32 - 1)
-        super().__init__(*args, **kwargs)
+    def __post_init__(self):
         if "experiment_id" not in self:
-            self.experiment_id = get_git_hash() or defaults.DEFAULT_EXPERIMENT_ID
+            self.experiment_id = get_git_hash() or defaults.EXPERIMENT_ID
         if "run_id" not in self:
             self.run_id = self.run_uuid.hex
-        self.setattr("ignored_keys_in_hash", defaults.DEFAULT_IGNORED_KEYS_IN_HASH)
+        self.setattr("ignored_keys_in_hash", defaults.IGNORED_NAMES_IN_HASH)
 
     @property
     def experiment_uuid(self) -> UUID:
@@ -167,8 +148,8 @@ def run_uuid(self) -> UUID:
         UUID of the run.
         """
 
-        ignored_keys_in_hash = self.getattr("ignored_keys_in_hash", defaults.DEFAULT_IGNORED_KEYS_IN_HASH)
-        state: NestedDict = NestedDict({k: v for k, v in self.dict().items() if k not in ignored_keys_in_hash})
+        ignored_keys_in_hash = self.getattr("ignored_keys_in_hash", defaults.IGNORED_NAMES_IN_HASH)
+        state: chanfig.Config = chanfig.Config({k: v for k, v in self.dict().items() if k not in ignored_keys_in_hash})
         return uuid5(self.experiment_uuid, state.yamls())
 
     def __hash__(self) -> int:
 
@@ -0,0 +1,211 @@
+# DanLing
+# Copyright (C) 2022-Present  DanLing
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the following licenses:
+# - The Unlicense
+# - GNU Affero General Public License v3.0 or later
+# - GNU General Public License v2.0 or later
+# - BSD 4-Clause "Original" or "Old" License
+# - MIT License
+# - Apache License 2.0
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the LICENSE file for more details.
+
+from __future__ import annotations
+
+import os
+import shutil
+
+import torch
+from chanfig import NestedDict
+from lazy_imports import try_import
+from torch import distributed as dist
+from torch import nn
+from torch.nn.utils import clip_grad_value_
+
+from danling.runner.config import Config
+from danling.utils import catch
+
+from .torch_runner import TorchRunner
+
+with try_import() as ds:
+    import deepspeed
+
+
+class DeepSpeedRunner(TorchRunner):
+
+    def __init__(self, config: Config) -> None:
+        ds.check()
+        super().__init__(config)
+
+    def init_distributed(self) -> None:
+        r"""
+        Set up distributed training.
+
+        Initialise process group and set up DDP variables.
+        """
+
+        backend = self.config.get("backend", os.getenv("BACKEND"))
+        init_method = self.config.get("init_method", os.getenv("INIT_METHOD"))
+        world_size = int(self.config.get("world_size", os.getenv("WORLD_SIZE", "1")))
+        rank = int(self.config.get("rank", os.getenv("RANK", "0")))
+        if world_size > 1:
+            if torch.cuda.is_available():
+                torch.cuda.set_device(self.get_local_rank())
+            deepspeed.init_distributed(dist_backend=backend, init_method=init_method, world_size=world_size, rank=rank)
+            object_list = [self.id, self.timestamp]
+            dist.broadcast_object_list(object_list)
+            self.id, self.timestamp = object_list
+
+    def __post_init__(self):
+        super().__post_init__()
+        self.config.deepspeed = self.get_deepspeed_config()
+        self.model, self.optimizer, _, self.scheduler = deepspeed.initialize(
+            model=self.model,
+            optimizer=self.optimizer,
+            lr_scheduler=self.scheduler,
+            config=self.config.deepspeed,
+        )
+
+    def advance(self, loss) -> None:
+        self.backward(loss)
+        if self.config.get("max_grad_value") is not None:
+            clip_grad_value_(self.model.parameters(), self.config["max_grad_value"])
+        self.model.step()
+        if self.ema is not None:
+            self.ema.update()
+        self.config.steps = self.model.global_steps
+
+    def backward(self, loss: torch.Tensor) -> None:
+        return self.model.backward(loss)
+
+    def get_local_rank(self) -> int:
+        local_rank = self.config.get("local_rank", os.getenv("LOCAL_RANK"))
+        if local_rank is not None:
+            return int(local_rank)
+        rank = self.config.get("rank", os.getenv("RANK"))
+        world_size = self.config.get("world_size", os.getenv("WORLD_SIZE"))
+        if world_size is None or rank is None:
+            raise ValueError("Please provide either `local_rank` or `world_size` and `rank`")
+        return int(world_size) % int(rank)
+
+    def unwrap(self, model: nn.Module) -> nn.Module:
+        while isinstance(model, (deepspeed.DeepSpeedEngine, nn.parallel.DistributedDataParallel)):
+            model = model.module
+        return model
+
+    @property
+    def deepspeed(self) -> NestedDict | None:
+        if isinstance(self.model, deepspeed.DeepSpeedEngine):
+            return self.model.config
+        return None
+
+    @catch
+    def save_checkpoint(self, name: str = "latest", epoch: int | None = None, save_best: bool = True) -> None:
+        r"""
+        Save checkpoint to `self.checkpoint_dir`.
+
+        Args:
+            name: Name of the checkpoint. Defaults to `"latest"`.
+            epoch: Epoch to save. Defaults to `self.epochs`.
+            save_best: If `True`, when `self.is_best` is `True`, the checkpoint will also be copied to
+                `self.checkpoint_dir/best`.
+
+        If `self.config.save_interval` is positive and `epochs + 1` is a multiple of `save_interval`,
+        the checkpoint will also be copied to `self.checkpoint_dir/epoch-{epochs}`.
+        """
+
+        epoch = epoch or self.epochs
+        save_interval = self.config.get("save_interval", -1)
+        latest_path = os.path.join(self.checkpoint_dir, name)
+        os.makedirs(latest_path, exist_ok=True)
+        self.yaml(os.path.join(latest_path, "runner.yaml"))
+        self.model.save_checkpoint(
+            self.checkpoint_dir, tag=name, client_state={"runner": self.config.dict()}, save_latest=False
+        )
+        if save_interval > 0 and (epoch + 1) % save_interval == 0:
+            save_path = os.path.join(self.checkpoint_dir, f"epoch-{epoch}")
+            shutil.copytree(latest_path, save_path, dirs_exist_ok=True)
+        if save_best and self.is_best:
+            best_path = os.path.join(self.checkpoint_dir, "best")
+            shutil.copytree(latest_path, best_path, dirs_exist_ok=True)
+
+    def load_checkpoint(self, checkpoint: bytes | str | os.PathLike, *args, **kwargs) -> None:  # type: ignore[override]
+        """
+        Load model, optimizer, and scheduler from checkpoint.
+
+        Args:
+            checkpoint: Checkpoint (or its path) to load.
+            *args: Additional arguments to pass to `self.load`.
+            **kwargs: Additional keyword arguments to pass to `self.load`.
+
+        Raises:
+            ValueError: If `model` is not defined.
+            ValueError: If `model` is not an instance of `deepspeed.DeepSpeedEngine`.
+
+        See Also:
+            [`from_checkpoint`][danling.BaseRunner.from_checkpoint]: Build runner from checkpoint.
+            [`load_pretrained`][danling.BaseRunner.load_pretrained]: Load model parameters from pretrained checkpoint.
+        """
+
+        if self.model is None:
+            raise ValueError("model is not defined")
+        if not isinstance(self.model, deepspeed.DeepSpeedEngine):
+            raise ValueError("model is not an instance of `deepspeed.DeepSpeedEngine`")
+
+        self.model.load_checkpoint(checkpoint)
+        self.config.checkpoint = checkpoint
+
+    def load_pretrained(self, checkpoint: bytes | str | os.PathLike, *args, **kwargs) -> None:  # type: ignore[override]
+        """
+        Load model from pretrained checkpoint.
+
+        This method only loads the model weights.
+
+        Args:
+            checkpoint: Pretrained checkpoint directory.
+            *args: Additional arguments to pass to `self.load`.
+            **kwargs: Additional keyword arguments to pass to `self.load`.
+
+        Raises:
+            ValueError: If `model` is not defined.
+
+        See Also:
+            [`load_checkpoint`][danling.BaseRunner.load_checkpoint]: Load model, optimizer, and scheduler from
+                checkpoint.
+        """
+
+        if self.model is None:
+            raise ValueError("model is not defined")
+
+        self.model.load_checkpoint(checkpoint, load_module_only=True)
+        self.config.pretrained = checkpoint
+
+    def load_config(
+        self, checkpoint: bytes | str | os.PathLike, overwrite: bool = False, *args, **kwargs  # type: ignore[override]
+    ) -> None:
+        r"""
+        Load config from checkpoint.
+
+        Args:
+            checkpoint: Checkpoint (or its path) to load.
+            overwrite: If `True`, overwrite the current config with the loaded config.
+                Defaults to `False`.
+            *args: Additional arguments to pass to `self.load`.
+            **kwargs: Additional keyword arguments to pass to `self.load`.
+
+        Raises:
+            FileNotFoundError: If `checkpoint` does not exists.
+        """
+
+        if isinstance(checkpoint, bytes):
+            checkpoint = checkpoint.decode()
+
+        config = self.load(os.path.join(checkpoint, "runner.yaml"), *args, **kwargs)
+        self.config.merge(config, overwrite=overwrite)
+        self.step_begin = config["steps"] + 1
+        self.epoch_begin = config["epochs"] + 1
@@ -0,0 +1,79 @@
+# DanLing
+# Copyright (C) 2022-Present  DanLing
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the following licenses:
+# - The Unlicense
+# - GNU Affero General Public License v3.0 or later
+# - GNU General Public License v2.0 or later
+# - BSD 4-Clause "Original" or "Old" License
+# - MIT License
+# - Apache License 2.0
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the LICENSE file for more details.
+
+from __future__ import annotations
+
+from lazy_imports import try_import
+
+from .base_runner import BaseRunner
+from .config import Config
+from .torch_runner import TorchRunner
+
+with try_import() as ac:
+    import accelerate  # noqa: F401
+
+    from .accelerate_runner import AccelerateRunner
+
+with try_import() as ds:
+    import deepspeed  # noqa: F401
+
+    from .deepspeed_runner import DeepSpeedRunner
+
+
+class Runner(BaseRunner):
+    r"""
+    Dynamic runner class that selects the appropriate platform based on configuration.
+
+    This runner dynamically changes its class to combine with the appropriate platform
+    (torch, accelerate, or deepspeed) based on the 'platform' configuration option.
+
+    Valid platform options are:
+
+    - "auto" (default)
+    - "torch"
+    - "accelerate"
+    - "deepspeed"
+
+    Examples:
+        >>> config = Config({"platform": "accelerate"})
+        >>> runner = Runner(config)
+
+    See Also:
+        - [`BaseRunner`][danling.runner.BaseRunner]: Base class for all runners.
+        - [`TorchRunner`][danling.runner.TorchRunner]: Runner for PyTorch.
+        - [`AccelerateRunner`][danling.runner.AccelerateRunner]: Runner for Accelerate.
+        - [`DeepSpeedRunner`][danling.runner.DeepSpeedRunner]: Runner for DeepSpeed.
+    """
+
+    def __init__(self, config: Config) -> None:
+        platform = config.get("platform", "auto").lower()
+
+        if platform == "auto":
+            platform = "deepspeed" if ds.is_successful() else "torch"
+
+        if platform == "accelerate":
+            ac.check()
+            self.__class__ = type("AccelerateRunner", (self.__class__, AccelerateRunner), {})
+        elif platform == "deepspeed":
+            ds.check()
+            self.__class__ = type("DeepSpeedRunner", (self.__class__, DeepSpeedRunner), {})
+        elif platform == "torch":
+            self.__class__ = type("TorchRunner", (self.__class__, TorchRunner), {})
+        else:
+            raise ValueError(f"Unknown platform: {platform}. Valid options are: torch, accelerate, deepspeed")
+
+        super().__init__(config)
@@ -21,13 +21,18 @@
 
 import os
 import sys
+from collections.abc import Mapping
 from contextlib import suppress
 from datetime import datetime
 from enum import auto
 from functools import wraps
+from math import isnan
 from typing import Any
 from warnings import warn
 
+import torch
+from chanfig import FlatDict, NestedDict
+
 from danling.utils import base62
 
 try:
@@ -49,13 +54,13 @@ class RunnerMode(StrEnum):  # pylint: disable=too-few-public-methods
 
     Attributes:
         train: Training mode.
-        eval: Evaluation mode.
-        inf: Inference mode.
+        evaluate: Evaluation mode.
+        infer: Inference mode.
     """
 
     train = auto()
-    eval = auto()
-    inf = auto()
+    evaluate = auto()
+    infer = auto()
 
 
 def get_time_str() -> str:
@@ -123,3 +128,41 @@ def wrapper(self, *args, **kwargs) -> Any | None:
         return None
 
     return wrapper
+
+
+def format_result(result, format_spec: str = ".4f", depth: int = 0) -> str:
+    longest_key = max(len(k) for k in result.keys())
+    repr_list = [_format_result(result, format_spec)]
+    for k, v in result.items():
+        if isinstance(v, Mapping):
+            initials = " " * (longest_key - len(k)) + "\t" * depth
+            repr_list.append(f"{initials}{k}: {format_result(v, format_spec, depth + 1)}")
+    return "\n".join(repr_list)
+
+
+def _format_result(result, format_spec: str = ".4f") -> str:
+    repr_str = ""
+    for k, v in result.items():
+        if isinstance(v, (Mapping,)):
+            continue
+        padding = 1
+        if isinstance(v, (float,)):
+            is_negative = v < 0 if not isnan(v) else False
+            v = format(v, format_spec) if not isnan(v) else "  NaN  "
+            padding = padding if is_negative else padding + 1
+        repr_str += f"\t{k}:{' ' * padding}{v}"
+    return repr_str
+
+
+def to_device(data: Any, device: torch.device):
+    if isinstance(data, list):
+        return [to_device(i, device) for i in data]
+    if isinstance(data, tuple):
+        return tuple(to_device(i, device) for i in data)
+    if isinstance(data, NestedDict):
+        return NestedDict({k: to_device(v, device) for k, v in data.all_items()})
+    if isinstance(data, dict):
+        return FlatDict({k: to_device(v, device) for k, v in data.items()})
+    if hasattr(data, "to"):
+        return data.to(device)
+    return data
@@ -1146,6 +1146,9 @@ def reshape(self, *shape) -> Tensor:
 
         return self.tensor.reshape(*shape)
 
+    def __iter__(self):
+        return iter(self._storage)
+
 
 NestedTensorFunc = TorchFuncRegistry()
 
 
@@ -22,7 +22,6 @@
 except ImportError:
     from cached_property import cached_property  # type: ignore
 
-from . import defaults
 from .basex import Base58, Base62, Base64, BaseX, base58, base62, base64
 from .contextmanagers import debug
 from .decorators import catch, flexible_decorator, method_cache
@@ -55,5 +54,4 @@
     "base58",
     "base62",
     "base64",
-    "defaults",
 ]
@@ -0,0 +1,121 @@
+# DanLing
+# Copyright (C) 2022-Present  DanLing
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the following licenses:
+# - The Unlicense
+# - GNU Affero General Public License v3.0 or later
+# - GNU General Public License v2.0 or later
+# - BSD 4-Clause "Original" or "Old" License
+# - MIT License
+# - Apache License 2.0
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the LICENSE file for more details.
+
+import torch
+from chanfig import Registry
+from datasets import load_dataset
+from torch import nn, optim
+from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer
+
+import danling as dl
+
+OPTIMIZERS = Registry()
+OPTIMIZERS.register(optim.AdamW, "adamw")
+OPTIMIZERS.register(optim.SGD, "sgd")
+
+
+class IMDBConfig(dl.Config):
+    epoch_end: int = 2
+    log: bool = False
+    tensorboard: bool = False
+    log_interval: int = 1000
+    score_split: str = "val"
+    score_name: str = "loss"
+    debug: bool = False
+    patience: int = 1
+
+    def __init__(self):
+        super().__init__()
+        self.pretrained = "prajjwal1/bert-tiny"
+        self.dataset.path = "stanfordnlp/imdb"
+        self.dataloader.batch_size = 8
+        self.optim.name = "adamw"
+        self.optim.lr = 1e-3
+        self.optim.weight_decay = 1e-4
+        self.sched.strategy = "cosine"
+
+    def post(self):
+        super().post()
+        self.transformers = AutoConfig.from_pretrained(self.pretrained)
+        self.experiment_name = f"{self.pretrained}_{self.optim.name}@{self.optim.lr}"
+
+
+class IMDBRunner(dl.AccelerateRunner):
+    def __init__(self, config: dl.Config):
+        super().__init__(config)
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained)
+        self.datasets.train = load_dataset(split="train", **self.dataset)
+        self.datasets.val = load_dataset(split="train", **self.dataset)
+        # only run on a few samples to speed up testing process
+        self.datasets.train._data = self.datasets.train._data[:64]
+        self.datasets.val._data = self.datasets.val._data[:64]
+        self.datasets.train = self.preprocess_data(self.datasets.train)
+        self.datasets.val = self.preprocess_data(self.datasets.val)
+
+        self.model = AutoModelForSequenceClassification.from_config(self.config.transformers)
+        self.optimizer = OPTIMIZERS.build(params=self.model.parameters(), **self.optim)
+        self.scheduler = dl.optim.LRScheduler(self.optimizer, total_steps=self.total_steps, **self.sched)
+        self.criterion = nn.CrossEntropyLoss()
+
+        self.metrics = dl.metrics.binary_metrics()
+        self.meters.loss.reset()
+        self.meters.time.reset()
+
+    def preprocess_data(self, dataset):
+        def tokenization(example):
+            example["text"] = self.tokenizer(example["text"], truncation=True, max_length=510)["input_ids"]
+            return example
+
+        def transform(data):
+            text = dl.NestedTensor(data.pop("text"))
+            data["input_ids"] = text.tensor
+            data["attention_mask"] = text.mask
+            data["labels"] = torch.tensor(data.pop("label"))
+            return data
+
+        dataset = dataset.map(tokenization, batched=True)
+        dataset.set_transform(transform)
+        dataset.__getitems__ = dataset.__getitem__
+        return dataset
+
+    def train_step(self, data) -> torch.Tensor:
+        with self.autocast(), self.accumulate():
+            pred = self.model(**data)
+            loss = pred["loss"]
+            self.advance(loss)
+            self.metrics.update(pred["logits"][:, 0], data["labels"])
+        return pred, loss
+
+    def evaluate_step(self, data) -> torch.Tensor:
+        pred = self.model(**data)
+        loss = pred["loss"]
+        self.metrics.update(pred["logits"][:, 0], data["labels"])
+        return pred, loss
+
+    @staticmethod
+    def collate_fn(batch):
+        return batch
+
+
+if __name__ == "__main__":
+    config = IMDBConfig()
+    config.parse()
+    with dl.debug(config.get("debug", False)):
+        runner = IMDBRunner(config)
+        runner.train()
+        runner.evaluate(["val"])
@@ -18,7 +18,7 @@
 # See the LICENSE file for more details.
 
 import torchvision
-from chanfig import Config, Registry
+from chanfig import Registry
 from torch import nn, optim
 
 import danling as dl
@@ -28,11 +28,10 @@
 OPTIMIZERS.register(optim.SGD, "sgd")
 
 
-class MNISTConfig(Config):
+class MNISTConfig(dl.Config):
     epoch_end: int = 2
     log: bool = False
     tensorboard: bool = False
-    log_interval: int = 1000
     score_split: str = "val"
     score_name: str = "loss"
     debug: bool = False
@@ -43,18 +42,19 @@ def __init__(self):
         self.network.name = "resnet18"
         self.dataset.download = True
         self.dataset.root = "data"
-        self.dataloader.batch_size = 8
+        self.dataloader.batch_size = 256
         self.optim.name = "adamw"
         self.optim.lr = 1e-3
         self.optim.weight_decay = 1e-4
         self.sched.strategy = "cosine"
 
     def post(self):
+        super().post()
         self.experiment_name = f"{self.network.name}_{self.optim.name}@{self.optim.lr}"
 
 
 class MNISTRunner(dl.TorchRunner):
-    def __init__(self, config: Config):
+    def __init__(self, config: dl.Config):
         super().__init__(config)
 
         self.dataset.transform = torchvision.transforms.Compose(
@@ -66,13 +66,13 @@ def __init__(self, config: Config):
         self.datasets.train = torchvision.datasets.MNIST(train=True, **self.dataset)
         self.datasets.val = torchvision.datasets.MNIST(train=False, **self.dataset)
         # only run on a few samples to speed up testing process
-        self.datasets.train.data = self.datasets.train.data[:64]
-        self.datasets.val.data = self.datasets.val.data[:64]
+        self.datasets.train.data = self.datasets.train.data[:100]
+        self.datasets.val.data = self.datasets.val.data[:100]
 
         self.model = getattr(torchvision.models, self.network.name)(pretrained=False, num_classes=10)
         self.model.conv1 = nn.Conv2d(1, 64, 1, bias=False)
         self.optimizer = OPTIMIZERS.build(params=self.model.parameters(), **self.optim)
-        self.scheduler = dl.optim.LRScheduler(self.optimizer, total_steps=self.trainable_steps, **self.sched)
+        self.scheduler = dl.optim.LRScheduler(self.optimizer, total_steps=self.total_steps, **self.sched)
         self.criterion = nn.CrossEntropyLoss()
 
         self.metrics = dl.metrics.multiclass_metrics(num_classes=10)
 
@@ -4,6 +4,6 @@ authors:
 date: 2022-05-04
 ---
 
-# RunnerState
+# Config
 
-::: danling.runner.state
+::: danling.runner.config
@@ -0,0 +1,9 @@
+---
+authors:
+  - Zhiyuan Chen
+date: 2022-05-04
+---
+
+# DeepSpeedRunner
+
+::: danling.runner.DeepSpeedRunner
@@ -0,0 +1,9 @@
+---
+authors:
+  - Zhiyuan Chen
+date: 2022-05-04
+---
+
+# Runner
+
+::: danling.runner.Runner
@@ -0,0 +1,9 @@
+---
+authors:
+  - Zhiyuan Chen
+date: 2022-05-04
+---
+
+# TorchRunner
+
+::: danling.runner.TorchRunner
@@ -11,9 +11,12 @@ nav:
   - DanLing: index.md
   - Runner:
       - runner/index.md
-      - RunnerState: runner/runner_state.md
-      - BaseRunner: runner/base_runner.md
+      - Config: runner/config.md
+      - Runner: runner/runner.md
+      - TorchRunner: runner/torch_runner.md
+      - DeepSpeedRunner: runner/deepspeed_runner.md
       - AccelerateRunner: runner/accelerate_runner.md
+      - BaseRunner: runner/base_runner.md
       - Utilities: runner/utils.md
   - Tensors:
       - NestedTensor: tensors/nested_tensor.md
@@ -196,6 +199,7 @@ plugins:
             - https://pytorch.org/docs/stable/objects.inv
             - https://pytorch.org/torcheval/stable/objects.inv
             - https://huggingface.co/docs/transformers/master/en/objects.inv
+            - https://huggingface.co/docs/accelerate/master/en/objects.inv
             - https://chanfig.danling.org/objects.inv
             - https://lightning.ai/docs/torchmetrics/stable/objects.inv
           rendering:
 
@@ -47,6 +47,18 @@ dependencies = [
   "strenum; python_version<'3.11'",
   "tqdm",
 ]
+optional-dependencies.accelerate = [
+  "accelerate",
+  "torch",
+  "torcheval",
+  "torchmetrics",
+]
+optional-dependencies.deepspeed = [
+  "deepspeed",
+  "torch",
+  "torcheval",
+  "torchmetrics",
+]
 optional-dependencies.jax = [
   "flax",
   "jax",
@@ -55,7 +67,6 @@ optional-dependencies.tensorflow = [
   "tensorflow",
 ]
 optional-dependencies.torch = [
-  "accelerate",
   "torch",
   "torcheval",
   "torchmetrics",
 
@@ -1,7 +1,19 @@
+# This file is for testing purposes only.
+# Please refer to pyproject.toml for the actual dependencies.
+
+accelerate
 cached-property; python_version < "3.8"
 chanfig >= 0.0.96
+datasets
 gitpython
 lazy-imports
+portalocker>=2.0.0
 strenum; python_version < "3.11"
+torch
+# torchdata
 torcheval
+torchmetrics
+# torchtext
+torchvision
 tqdm
+transformers
@@ -34,11 +34,11 @@
 class Test:
     optimizer = optim.SGD([{"params": torch.tensor([0])}], lr=1, momentum=0.9)
 
-    def _get_lrs(self, strategy, method, steps: int = 100, final_lr_ratio: float = 0.001):
+    def _get_lrs(self, strategy, method, total_steps: int = 100, final_lr_ratio: float = 0.001):
         lrs = []
         scheduler = LRScheduler(
             self.optimizer,
-            total_steps=steps,
+            total_steps=total_steps,
             final_lr_ratio=final_lr_ratio,
             strategy=strategy,
             method=method,
 
@@ -17,7 +17,6 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 # See the LICENSE file for more details.
 
-from chanfig import Config as Config_
 from chanfig import NestedDict
 
 import danling as dl
@@ -30,7 +29,7 @@ def init_distributed(self) -> None:
         pass
 
 
-class Config(Config_):
+class Config(dl.Config):
     __test__ = False
 
     def __init__(self):
@@ -107,7 +106,7 @@ def test_results(self):
 
     def test_conflict(self):
         runner = self.runner
-        state = runner.state
+        config = runner.config
         runner.conflict = False
         assert not runner.conflict
-        assert state.conflict == 1
+        assert config.conflict == 1
@@ -17,4 +17,19 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 # See the LICENSE file for more details.
 
-DEFAULT_EXCLUDE = (KeyboardInterrupt, SystemExit)
+import sys
+
+sys.path.insert(0, "demo")
+
+from accelerate_imdb import IMDBConfig, IMDBRunner  # noqa: E402
+
+
+class Test:
+    config = IMDBConfig().boot()
+    runner = IMDBRunner(config)
+
+    def test_train(self):
+        self.runner.train()
+
+    def test_evaluate(self):
+        self.runner.evaluate(["val"])
@@ -19,7 +19,7 @@
 
 import sys
 
-sys.path.insert(0, "demo/vision")
+sys.path.insert(0, "demo")
 
 from torch_mnist import MNISTConfig, MNISTRunner  # noqa: E402