From a1736f8e2e9caf95fbb1b72b62e899abbdcf5172 Mon Sep 17 00:00:00 2001
From: michele-milesi <74559684+michele-milesi@users.noreply.github.com>
Date: Wed, 4 Oct 2023 17:53:02 +0200
Subject: [PATCH] Docs/update (#115)

* docs: update

* fix: dependencies

* fix: version

* fix: added swig in pyproject.toml
---
 README.md                       |  2 +-
 howto/learn_in_diambra.md       | 10 +++++-----
 howto/learn_in_dmc.md           |  8 ++++++--
 howto/learn_in_minedojo.md      |  2 +-
 howto/register_new_algorithm.md |  2 +-
 howto/select_observations.md    |  6 +++---
 howto/work_with_steps.md        |  4 ++--
 pyproject.toml                  |  4 ++--
 sheeprl/__init__.py             |  2 +-
 9 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/README.md b/README.md
index 11bbabf4..f1f40be0 100644
--- a/README.md
+++ b/README.md
@@ -147,7 +147,7 @@ pip install "sheeprl[atari,mujoco,dev,test] @ git+https://github.com/Eclectic-Sh
 >
 > If you want to install the *minedojo* or *minerl* environment support, Java JDK 8 is required: you can install it by following the instructions at this [link](https://docs.minedojo.org/sections/getting_started/install.html#on-ubuntu-20-04).
 >
-> **MineRL**, **MineDojo**, and **DIAMBRA** environments have **conflicting requirements**, so **DO NOT install them together** with the `pip install -e .[minerl,minedojo,diambra]` command, but instead **install them individually** with either the command `pip install -e .[minerl]` or `pip install -e .[minedojo]` or `pip install -e .[diambra]` before running an experiment with the MineRL or MineDojo or DIAMBRA environment, respectively.
+> **MineRL** and **MineDojo** environments have **conflicting requirements**, so **DO NOT install them together** with the `pip install -e .[minerl,minedojo]` command, but instead **install them individually** with either the command `pip install -e .[minerl]` or `pip install -e .[minedojo]` before running an experiment with the MineRL or MineDojo environment, respectively.
 
 </details>  
 
diff --git a/howto/learn_in_diambra.md b/howto/learn_in_diambra.md
index 746eab44..4cb13d3f 100644
--- a/howto/learn_in_diambra.md
+++ b/howto/learn_in_diambra.md
@@ -61,7 +61,7 @@ diambra run -s=8 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.n
 The IDs of the DIAMBRA environments are specified [here](https://docs.diambra.ai/envs/games/). To train your agent on a DIAMBRA environment you have to select the diambra configs with the argument `env=diambra`, then set the `env.id` argument to the environment ID, e.g., to train your agent on the *Dead Or Alive ++* game, you have to set the `env.id` argument to `doapp` (i.e., `env.id=doapp`).
 
 ```bash
-diambra run -s=4 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4
+diambra run -s=4 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 cnn_keys.encoder=[frame]
 ```
 
 Another possibility is to create a new config file in the `sheeprl/configs/exp` folder, where you specify all the configs you want to use in your experiment. An example of custom configuration file is available [here](../sheeprl/configs/exp/dreamer_v3_L_doapp.yaml).
@@ -72,7 +72,7 @@ To modify the default settings or add other wrappers, you have to add the settin
 For insance, in the following example, we create the `custom_exp.yaml` file in the `sheeprl/configs/exp` folder where the we select the diambra environment, in addition, the player one is selected and a step ratio of $5$ is choosen. Moreover, the rewards are normalized by a factor of $0.3$.
 
 
-```diff
+```yaml
 # @package _global_
 
 defaults:
@@ -81,15 +81,15 @@ defaults:
     - _self_
 
 env:
-    env:
     id: doapp
+    wrapper:
     diambra_settings:
         characters: Kasumi
         step_ratio: 5
         role: diambra.arena.Roles.P1
     diambra_wrappers:
-        reward_normalization: True
-        reward_normalization_factor: 0.3
+        normalize_reward: True
+        normalization_factor: 0.3
 ```
 
 Now, to run your experiment, you have to execute the following command:
diff --git a/howto/learn_in_dmc.md b/howto/learn_in_dmc.md
index a417116c..7759120a 100644
--- a/howto/learn_in_dmc.md
+++ b/howto/learn_in_dmc.md
@@ -8,11 +8,15 @@ First you should install the proper environments:
 
 MuJoCo/DMC supports three different OpenGL rendering backends: EGL (headless), GLFW (windowed), OSMesa (headless).
 For each of them, you need to install some pakages:
-- GLFW: `sudo apt-get install libglfw3 libglew2.0`
-- EGL: `sudo apt-get install libglew2.0`
+- GLFW: `sudo apt-get install libglfw3 libglew2.2`
+- EGL: `sudo apt-get install libglew2.2`
 - OSMesa: `sudo apt-get install libgl1-mesa-glx libosmesa6`
 In order to use one of these rendering backends, you need to set the `MUJOCO_GL` environment variable to `"glfw"`, `"egl"`, `"osmesa"`, respectively.
 
+> **Note**
+>
+> The `libglew2.2` could have a different name, based on your OS (e.g., `libglew2.2` is for Ubuntu 22.04.2 LTS).
+
 For more information: [https://github.com/deepmind/dm_control](https://github.com/deepmind/dm_control) and [https://mujoco.readthedocs.io/en/stable/programming/index.html#using-opengl](https://mujoco.readthedocs.io/en/stable/programming/index.html#using-opengl)
 
 ## MuJoCo Gymnasium
diff --git a/howto/learn_in_minedojo.md b/howto/learn_in_minedojo.md
index 8d3dca54..03f7dc77 100644
--- a/howto/learn_in_minedojo.md
+++ b/howto/learn_in_minedojo.md
@@ -29,7 +29,7 @@ It is possible to train your agents on all the tasks provided by MineDojo. You n
 For instance, you can use the following command to select the MineDojo open-ended environment.
 
 ```bash
-python sheeprl.py exp=p2e_dv2 env=minedojo env.id=open-ened algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor cnn_keys.encoder=[rgb]
+python sheeprl.py exp=p2e_dv2 env=minedojo env.id=open-ended algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor cnn_keys.encoder=[rgb]
 ```
 
 ### Observation Space
diff --git a/howto/register_new_algorithm.md b/howto/register_new_algorithm.md
index 47ae06c1..3cac48ed 100644
--- a/howto/register_new_algorithm.md
+++ b/howto/register_new_algorithm.md
@@ -431,7 +431,7 @@ np.float = np.float32
 np.int = np.int64
 np.bool = bool
 
-__version__ = "0.3.2"
+__version__ = "0.4.3"
 ```
 
 Then if you run `python sheeprl/available_agents.py` you should see that `sota` appears in the list of all the available agents:
diff --git a/howto/select_observations.md b/howto/select_observations.md
index 6220dd45..a1a7dd81 100644
--- a/howto/select_observations.md
+++ b/howto/select_observations.md
@@ -27,9 +27,9 @@ You just need to pass the `mlp_keys` and `cnn_keys` of the encoder and the decod
 >
 > We recommend to read [this](./work_with_multi-encoder_multi-decoder.md) to know how the encoder and decoder work with more observations.
 
-For instance, to train the ppo algorithm on the *doapp* task provided by *DIAMBRA* using image observations and only the `P1_oppHealth` and `P1_ownHealth` as vector observation, you have to run the following command:
+For instance, to train the ppo algorithm on the *doapp* task provided by *DIAMBRA* using image observations and only the `opp_health` and `own_health` as vector observation, you have to run the following command:
 ```bash
-python sheeprl.py exp=ppo env=diambra env.id=doapp cnn_keys.encoder=[frame] mlp_keys.encoder=[P1_oppHealth,P1_ownHealth]
+diambra run python sheeprl.py exp=ppo env=diambra env.id=doapp env.num_envs=1 cnn_keys.encoder=[frame] mlp_keys.encoder=[opp_health,own_health]
 ```
 
 > **Note**
@@ -40,7 +40,7 @@ It is important to know the observations the environment provides, for instance,
 > **Note**
 >
 > For some environments provided by gymnasium, e.g. `LunarLander-v2` or `CartPole-v1`, only vector observations are returned, but it is possible to extract the image observation from the render. To do this, it is sufficient to specify the `rgb` key to the `cnn_keys` args:
-> `python sheeprl.py cnn_keys.encoder=[rgb]`
+> `python sheeprl.py exp=... cnn_keys.encoder=[rgb]`
 
 #### Frame Stack
 For image observations it is possible to stack the last $n$ observations with the argument `frame_stack`. All the observations specified in the `cnn_keys` argument are stacked.
diff --git a/howto/work_with_steps.md b/howto/work_with_steps.md
index cad4f885..f56cf2e8 100644
--- a/howto/work_with_steps.md
+++ b/howto/work_with_steps.md
@@ -20,12 +20,12 @@ The hyper-parameters which refer to the *policy steps* are:
 
 * `total_steps`: the total number of policy steps to perform in an experiment. Effectively, this number will be divided in each process by $n \cdot m$ to obtain the number of training steps to be performed by each of them.
 * `exploration_steps`: the number of policy steps in which the agent explores the environment in the P2E algorithms.
-* `max_episode_steps`: the maximum number of policy steps an episode can last ($\text{max\_steps}$); when this number is reached a `terminated=True` is returned by the environment. This means that if you decide to have an action repeat greater than one ($\text{action\_repeat} > 1$), then the environment performs a maximum number of steps equal to: $\text{env\_steps} = \text{max\_steps} \cdot \text{action\_repeat}$.
+* `max_episode_steps`: the maximum number of policy steps an episode can last (`max_steps`); when this number is reached a `terminated=True` is returned by the environment. This means that if you decide to have an action repeat greater than one (`action_repeat > 1`), then the environment performs a maximum number of steps equal to: `env_steps = max_steps * action_repeat`$.
 * `learning_starts`: how many policy steps the agent has to perform before starting the training.
 * `train_every`: how many policy steps the agent has to perform between one training and the next.
 
 ## Gradient steps
-A *gradient step* consists of an update of the parameters of the agent, i.e., a call of the *train* function. The gradient step is proportional to the number of parallel processes, indeed, if there are $n$ parallel processes, $n \cdot \text{gradient\_steps}$ calls to the *train* method will be executed.
+A *gradient step* consists of an update of the parameters of the agent, i.e., a call of the *train* function. The gradient step is proportional to the number of parallel processes, indeed, if there are $n$ parallel processes, `n * gradient_steps` calls to the *train* method will be executed.
 
 The hyper-parameters which refer to the *gradient steps* are:
 * `algo.per_rank_gradient_steps`: the number of gradient steps per rank to perform in a single iteration.
diff --git a/pyproject.toml b/pyproject.toml
index 8afcd9e1..461a68ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 create = true
 in-project = true
 [build-system]
-requires = ["setuptools >= 61.0.0"]
+requires = ["setuptools >= 61.0.0", "swig==4.*"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -65,7 +65,7 @@ atari = [
   "gymnasium[other]==0.29.*",
 ]
 minedojo = ["minedojo==0.1", "importlib_resources==5.12.0"]
-minerl = ["minerl==0.4.4"]
+minerl = ["setuptools==66.0.0", "minerl==0.4.4"]
 diambra = ["diambra==0.0.16", "diambra-arena==2.2.1"]
 crafter = ["crafter==1.8.1"]
 
diff --git a/sheeprl/__init__.py b/sheeprl/__init__.py
index 21baeba6..23f37403 100644
--- a/sheeprl/__init__.py
+++ b/sheeprl/__init__.py
@@ -31,4 +31,4 @@
 np.int = np.int64
 np.bool = bool
 
-__version__ = "0.4.2"
+__version__ = "0.4.3"