Eclectic-Sheep · belerico · Nov 24, 2023 · Nov 23, 2023 · Nov 23, 2023 · Nov 24, 2023
@@ -1,8 +1,9 @@
 import gymnasium as gym
 import hydra
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 
 from sheeprl.utils.env import make_env
+from sheeprl.utils.utils import dotdict
 
 
 @hydra.main(version_base="1.3", config_path="../sheeprl/configs", config_name="env_config")
@@ -23,6 +24,7 @@ def main(cfg: DictConfig) -> None:
         "droq",
         "ppo_recurrent",
     }:
+        cfg = dotdict(OmegaConf.to_container(cfg, resolve=True))
         env: gym.Env = make_env(cfg, cfg.seed, 0)()
     else:
         raise ValueError(

@@ -106,7 +106,6 @@ defaults:
   - exp: ???
 
 num_threads: 1
-total_steps: ???
 
 # Set it to True to run a single optimization step
 dry_run: False
@@ -119,14 +118,6 @@ torch_deterministic: False
 exp_name: "default"
 run_name: ${now:%Y-%m-%d_%H-%M-%S}_${exp_name}_${seed}
 root_dir: ${algo.name}/${env.id}
-
-# Encoder and decoder keys
-cnn_keys:
-  encoder: []
-  decoder: ${cnn_keys.encoder}
-mlp_keys:
-  encoder: []
-  decoder: ${mlp_keys.encoder}
 ```
 
 ### Algorithms
@@ -148,10 +139,17 @@ lmbda: 0.95
 horizon: 15
 
 # Training recipe
+train_every: 16
 learning_starts: 65536
 per_rank_pretrain_steps: 1
 per_rank_gradient_steps: 1
-train_every: 16
+per_rank_sequence_length: ???
+
+# Encoder and decoder keys
+cnn_keys:
+  decoder: ${algo.cnn_keys.encoder}
+mlp_keys:
+  decoder: ${algo.mlp_keys.encoder}
 
 # Model related parameters
 layer_norm: True
@@ -237,14 +235,17 @@ actor:
   ent_coef: 3e-4
   min_std: 0.1
   init_std: 0.0
-  distribution: "auto"
   objective_mix: 1.0
   dense_act: ${algo.dense_act}
   mlp_layers: ${algo.mlp_layers}
   layer_norm: ${algo.layer_norm}
   dense_units: ${algo.dense_units}
   clip_gradients: 100.0
-
+  expl_amount: 0.0
+  expl_min: 0.0
+  expl_decay: False
+  max_step_expl_decay: 0
+
   # Disttributed percentile model (used to scale the values)
   moments:
     decay: 0.99
@@ -278,10 +279,6 @@ critic:
 
 # Player agent (it interacts with the environment)
 player:
-  expl_min: 0.0
-  expl_amount: 0.0
-  expl_decay: False
-  max_step_expl_decay: 0
   discrete_size: ${algo.world_model.discrete_size}
 ```
 
@@ -301,9 +298,23 @@ is:
 * the content of the `sheeprl/configs/algo/default.yaml` config will be inserted in the current config and whenever a naming collision happens, for example when the same field is defined in both configurations, those will be resolved by keeping the value defined in the current config. This behaviour is specified by letting the `_self_` keyword be the last one in the `defaults` list
 * `/optim@world_model.optimizer: adam` (and similar) means that the `adam` config, found in the `sheeprl/configs/optim` folder, will be inserted in this config under the `world_model.optimizer` field, so that one can access it at runtime as `cfg.algo.world_model.optimizer`. As in the previous point, the fields `lr`, `eps`, and `weight_decay` will be overwritten by the one specified in this config
 
+The default configuration for all the algorithms is the following:
+
+```yaml
+name: ???
+total_steps: ???
+per_rank_batch_size: ???
+
+# Encoder and decoder keys
+cnn_keys:
+  encoder: []
+mlp_keys:
+  encoder: []
+```
+
 > **Warning**
 >
-> Every algorithm config **must** contain the field `name` 
+> Every algorithm config **must** contain the field `name`, the total number of steps `total_steps` and the batch size `per_rank_batch_size`
 
 ### Environment
 
@@ -379,7 +390,6 @@ defaults:
 
 # Experiment
 seed: 5
-total_steps: 100000
 
 # Environment
 env:
@@ -399,6 +409,7 @@ buffer:
 # Algorithm
 algo:
   learning_starts: 1024
+  total_steps: 100000
   train_every: 1
   dense_units: 512
   mlp_layers: 2

@@ -24,5 +24,5 @@ The list of selectable algorithms is given below:
 Once you have chosen the algorithm you want to train, you can start the train, for instance, of the ppo agent by running:
 
 ```bash
-python sheeprl.py exp=ppo env=atari env.id=PongNoFrameskip-v4 cnn_keys.encoder=[rgb] fabric.accelerator=cpu fabric.strategy=ddp fabric.devices=2
+python sheeprl.py exp=ppo env=atari env.id=PongNoFrameskip-v4 algo.cnn_keys.encoder=[rgb] fabric.accelerator=cpu fabric.strategy=ddp fabric.devices=2
 ```
@@ -54,14 +54,14 @@ The observation space is slightly modified to be compatible with our algorithms,
 ## Multi-environments / Distributed training
 In order to train your agent with multiple environments or to perform distributed training, you have to specify to the `diambra run` command the number of environments you want to instantiate  (through the `-s` cli argument). So, you have to multiply the number of environments per single process and the number of processes you want to launch (the number of *player* processes for decoupled algorithms). Thus, in the case of coupled algorithms (e.g., `dreamer_v2`), if you want to distribute your training among $2$ processes each one containing $4$ environments, the total number of environments will be: $2 \cdot 4 = 8$. The command will be:
 ```bash
-diambra run -s=8 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 env.sync_env=True cnn_keys.encoder=[frame] fabric.devices=2
+diambra run -s=8 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 env.sync_env=True algo.cnn_keys.encoder=[frame] fabric.devices=2
 ```
 
 ## Args
 The IDs of the DIAMBRA environments are specified [here](https://docs.diambra.ai/envs/games/). To train your agent on a DIAMBRA environment you have to select the DIAMBRA configs with the argument `env=diambra`, then set the `env.id` argument to the environment ID, e.g., to train your agent on the *Dead Or Alive ++* game, you have to set the `env.id` argument to `doapp` (i.e., `env.id=doapp`).
 
 ```bash
-diambra run -s=4 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 cnn_keys.encoder=[frame]
+diambra run -s=4 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 algo.cnn_keys.encoder=[frame]
 ```
 
 Another possibility is to create a new config file in the `sheeprl/configs/exp` folder, where you specify all the configs you want to use in your experiment. An example of a custom configuration file is available [here](../sheeprl/configs/exp/dreamer_v3_L_doapp.yaml).
@@ -120,5 +120,5 @@ diambra run -s=4 python sheeprl.py exp=custom_exp env.num_envs=4
 ## Headless machines
 
 If you work on a headless machine, you need to software renderer. We recommend to adopt one of the following solutions:
-1. Install the `xvfb` software with the `sudo apt install xvfb` command and prefix the training command with `xvfb-run`. For instance, to train DreamerV2 on the navigate task on a headless machine, you need to run the following command: `xvfb-run diambra run python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.sync_env=True env.num_envs=1 cnn_keys.encoder=[frame] fabric.devices=1`
+1. Install the `xvfb` software with the `sudo apt install xvfb` command and prefix the training command with `xvfb-run`. For instance, to train DreamerV2 on the navigate task on a headless machine, you need to run the following command: `xvfb-run diambra run python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.sync_env=True env.num_envs=1 algo.cnn_keys.encoder=[frame] fabric.devices=1`
 2. Exploit the [PyVirtualDisplay](https://github.com/ponty/PyVirtualDisplay) package.
@@ -23,12 +23,12 @@ For more information: [https://github.com/deepmind/dm_control](https://github.co
 In order to train your agents on the [MuJoCo environments](https://gymnasium.farama.org/environments/mujoco/) provided by Gymnasium, it is sufficient to select the *GYM* environment (`env=gym`) and set the `env.id` to the name of the environment you want to use. For instance, `"Walker2d-v4"` if you want to train your agent in the *walker walk* environment.
 
 ```bash
-python sheeprl.py exp=dreamer_v3 env=gym env.id=Walker2d-v4 cnn_keys.encoder=[rgb]
+python sheeprl.py exp=dreamer_v3 env=gym env.id=Walker2d-v4 algo.cnn_keys.encoder=[rgb]
 ```
 
 ## DeepMind Control
 In order to train your agents on the [DeepMind control suite](https://github.com/deepmind/dm_control/blob/main/dm_control/suite/README.md), you have to select the *DMC* environment (`env=dmc`) and to set the id of the environment you want to use. A list of the available environments can be found [here](https://arxiv.org/abs/1801.00690). For instance, if you want to train your agent on the *walker walk* environment, you need to set the `env.id` to `"walker_walk"`.
 
 ```bash
-python sheeprl.py exp=dreamer_v3 env=dmc env.id=walker_walk cnn_keys.encoder=[rgb]
+python sheeprl.py exp=dreamer_v3 env=dmc env.id=walker_walk algo.cnn_keys.encoder=[rgb]
 ```
@@ -34,7 +34,7 @@ It is possible to train your agents on all the tasks provided by MineDojo. You n
 For instance, you can use the following command to select the MineDojo open-ended environment.
 
 ```bash
-python sheeprl.py exp=p2e_dv2 env=minedojo env.id=open-ended algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor cnn_keys.encoder=[rgb]
+python sheeprl.py exp=p2e_dv2 env=minedojo env.id=open-ended algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor algo.cnn_keys.encoder=[rgb]
 ```
 
 ### Observation Space
@@ -73,5 +73,5 @@ For more information about the MineDojo action space, check [here](https://docs.
 
 If you work on a headless machine, you need to software renderer. We recommend to adopt one of the following solutions:
 
-1. Install the `xvfb` software with the `sudo apt install xvfb` command and prefix the training command with `xvfb-run`. For instance, to train DreamerV2 on the navigate task on a headless machine, you need to run the following command: `xvfb-run python sheeprl.py exp=p2e_dv2 fabric.devices=1 env=minedojo env.id=open-ended cnn_keys.encoder=[rgb] algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor`, or `MINEDOJO_HEADLESS=1 python sheeprl.py exp=p2e_dv2 fabric.devices=1 env=minedojo env.id=open-ended cnn_keys.encoder=[rgb] algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor`.
+1. Install the `xvfb` software with the `sudo apt install xvfb` command and prefix the training command with `xvfb-run`. For instance, to train DreamerV2 on the navigate task on a headless machine, you need to run the following command: `xvfb-run python sheeprl.py exp=p2e_dv2 fabric.devices=1 env=minedojo env.id=open-ended algo.cnn_keys.encoder=[rgb] algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor`, or `MINEDOJO_HEADLESS=1 python sheeprl.py exp=p2e_dv2 fabric.devices=1 env=minedojo env.id=open-ended algo.cnn_keys.encoder=[rgb] algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor`.
 2. Exploit the [PyVirtualDisplay](https://github.com/ponty/PyVirtualDisplay) package.
@@ -54,5 +54,5 @@ Finally, we added sticky actions for the `jump` and `attack` actions. You can se
 ## Headless machines
 
 If you work on a headless machine, you need to software renderer. We recommend to adopt one of the following solutions:
-1. Install the `xvfb` software with the `sudo apt install xvfb` command and prefix the training command with `xvfb-run`. For instance, to train DreamerV2 on the navigate task on a headless machine, you need to run the following command: `xvfb-run python sheeprl.py exp=dreamer_v3 fabric.devices=1 env=minerl env.id=custom_navigate cnn_keys.encoder=[rgb]`.
+1. Install the `xvfb` software with the `sudo apt install xvfb` command and prefix the training command with `xvfb-run`. For instance, to train DreamerV2 on the navigate task on a headless machine, you need to run the following command: `xvfb-run python sheeprl.py exp=dreamer_v3 fabric.devices=1 env=minerl env.id=custom_navigate algo.cnn_keys.encoder=[rgb]`.
 2. Exploit the [PyVirtualDisplay](https://github.com/ponty/PyVirtualDisplay) package.
@@ -146,7 +146,7 @@ def sota_main(fabric: Fabric, cfg: Dict[str, Any]):
     policy_step = 0
     last_checkpoint = 0
     policy_steps_per_update = int(cfg.env.num_envs * cfg.algo.rollout_steps * world_size)
-    num_updates = cfg.total_steps // policy_steps_per_update if not cfg.dry_run else 1
+    num_updates = cfg.algo.total_steps // policy_steps_per_update if not cfg.dry_run else 1
 
     # Warning for log and checkpoint every
     if cfg.metric.log_every % policy_steps_per_update != 0:
@@ -170,9 +170,9 @@ def sota_main(fabric: Fabric, cfg: Dict[str, Any]):
     for k in o.keys():
         if k in obs_keys:
             torch_obs = torch.from_numpy(o[k]).to(fabric.device)
-            if k in cfg.cnn_keys.encoder:
+            if k in cfg.algo.cnn_keys.encoder:
                 torch_obs = torch_obs.view(cfg.env.num_envs, -1, *torch_obs.shape[-2:])
-            if k in cfg.mlp_keys.encoder:
+            if k in cfg.algo.mlp_keys.encoder:
                 torch_obs = torch_obs.float()
             step_data[k] = torch_obs
             next_obs[k] = torch_obs
@@ -212,9 +212,9 @@ def sota_main(fabric: Fabric, cfg: Dict[str, Any]):
             for k in o.keys():
                 if k in obs_keys:
                     torch_obs = torch.from_numpy(o[k]).to(fabric.device)
-                    if k in cfg.cnn_keys.encoder:
+                    if k in cfg.algo.cnn_keys.encoder:
                         torch_obs = torch_obs.view(cfg.env.num_envs, -1, *torch_obs.shape[-2:])
-                    if k in cfg.mlp_keys.encoder:
+                    if k in cfg.algo.mlp_keys.encoder:
                         torch_obs = torch_obs.float()
                     step_data[k] = torch_obs
                     obs[k] = torch_obs
@@ -382,8 +382,10 @@ defaults:
   - override /env: atari
   - _self_
 
-total_steps: 65536
-per_rank_batch_size: 64
+algo:
+  total_steps: 65536
+  per_rank_batch_size: 64
+
 buffer:
   share_data: False