From a1736f8e2e9caf95fbb1b72b62e899abbdcf5172 Mon Sep 17 00:00:00 2001 From: michele-milesi <74559684+michele-milesi@users.noreply.github.com> Date: Wed, 4 Oct 2023 17:53:02 +0200 Subject: [PATCH] Docs/update (#115) * docs: update * fix: dependencies * fix: version * fix: added swig in pyproject.toml --- README.md | 2 +- howto/learn_in_diambra.md | 10 +++++----- howto/learn_in_dmc.md | 8 ++++++-- howto/learn_in_minedojo.md | 2 +- howto/register_new_algorithm.md | 2 +- howto/select_observations.md | 6 +++--- howto/work_with_steps.md | 4 ++-- pyproject.toml | 4 ++-- sheeprl/__init__.py | 2 +- 9 files changed, 22 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 11bbabf4..f1f40be0 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ pip install "sheeprl[atari,mujoco,dev,test] @ git+https://github.com/Eclectic-Sh > > If you want to install the *minedojo* or *minerl* environment support, Java JDK 8 is required: you can install it by following the instructions at this [link](https://docs.minedojo.org/sections/getting_started/install.html#on-ubuntu-20-04). > -> **MineRL**, **MineDojo**, and **DIAMBRA** environments have **conflicting requirements**, so **DO NOT install them together** with the `pip install -e .[minerl,minedojo,diambra]` command, but instead **install them individually** with either the command `pip install -e .[minerl]` or `pip install -e .[minedojo]` or `pip install -e .[diambra]` before running an experiment with the MineRL or MineDojo or DIAMBRA environment, respectively. +> **MineRL** and **MineDojo** environments have **conflicting requirements**, so **DO NOT install them together** with the `pip install -e .[minerl,minedojo]` command, but instead **install them individually** with either the command `pip install -e .[minerl]` or `pip install -e .[minedojo]` before running an experiment with the MineRL or MineDojo environment, respectively. diff --git a/howto/learn_in_diambra.md b/howto/learn_in_diambra.md index 746eab44..4cb13d3f 100644 --- a/howto/learn_in_diambra.md +++ b/howto/learn_in_diambra.md @@ -61,7 +61,7 @@ diambra run -s=8 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.n The IDs of the DIAMBRA environments are specified [here](https://docs.diambra.ai/envs/games/). To train your agent on a DIAMBRA environment you have to select the diambra configs with the argument `env=diambra`, then set the `env.id` argument to the environment ID, e.g., to train your agent on the *Dead Or Alive ++* game, you have to set the `env.id` argument to `doapp` (i.e., `env.id=doapp`). ```bash -diambra run -s=4 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 +diambra run -s=4 python sheeprl.py exp=dreamer_v3 env=diambra env.id=doapp env.num_envs=4 cnn_keys.encoder=[frame] ``` Another possibility is to create a new config file in the `sheeprl/configs/exp` folder, where you specify all the configs you want to use in your experiment. An example of custom configuration file is available [here](../sheeprl/configs/exp/dreamer_v3_L_doapp.yaml). @@ -72,7 +72,7 @@ To modify the default settings or add other wrappers, you have to add the settin For insance, in the following example, we create the `custom_exp.yaml` file in the `sheeprl/configs/exp` folder where the we select the diambra environment, in addition, the player one is selected and a step ratio of $5$ is choosen. Moreover, the rewards are normalized by a factor of $0.3$. -```diff +```yaml # @package _global_ defaults: @@ -81,15 +81,15 @@ defaults: - _self_ env: - env: id: doapp + wrapper: diambra_settings: characters: Kasumi step_ratio: 5 role: diambra.arena.Roles.P1 diambra_wrappers: - reward_normalization: True - reward_normalization_factor: 0.3 + normalize_reward: True + normalization_factor: 0.3 ``` Now, to run your experiment, you have to execute the following command: diff --git a/howto/learn_in_dmc.md b/howto/learn_in_dmc.md index a417116c..7759120a 100644 --- a/howto/learn_in_dmc.md +++ b/howto/learn_in_dmc.md @@ -8,11 +8,15 @@ First you should install the proper environments: MuJoCo/DMC supports three different OpenGL rendering backends: EGL (headless), GLFW (windowed), OSMesa (headless). For each of them, you need to install some pakages: -- GLFW: `sudo apt-get install libglfw3 libglew2.0` -- EGL: `sudo apt-get install libglew2.0` +- GLFW: `sudo apt-get install libglfw3 libglew2.2` +- EGL: `sudo apt-get install libglew2.2` - OSMesa: `sudo apt-get install libgl1-mesa-glx libosmesa6` In order to use one of these rendering backends, you need to set the `MUJOCO_GL` environment variable to `"glfw"`, `"egl"`, `"osmesa"`, respectively. +> **Note** +> +> The `libglew2.2` could have a different name, based on your OS (e.g., `libglew2.2` is for Ubuntu 22.04.2 LTS). + For more information: [https://github.com/deepmind/dm_control](https://github.com/deepmind/dm_control) and [https://mujoco.readthedocs.io/en/stable/programming/index.html#using-opengl](https://mujoco.readthedocs.io/en/stable/programming/index.html#using-opengl) ## MuJoCo Gymnasium diff --git a/howto/learn_in_minedojo.md b/howto/learn_in_minedojo.md index 8d3dca54..03f7dc77 100644 --- a/howto/learn_in_minedojo.md +++ b/howto/learn_in_minedojo.md @@ -29,7 +29,7 @@ It is possible to train your agents on all the tasks provided by MineDojo. You n For instance, you can use the following command to select the MineDojo open-ended environment. ```bash -python sheeprl.py exp=p2e_dv2 env=minedojo env.id=open-ened algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor cnn_keys.encoder=[rgb] +python sheeprl.py exp=p2e_dv2 env=minedojo env.id=open-ended algo.actor.cls=sheeprl.algos.p2e_dv2.agent.MinedojoActor cnn_keys.encoder=[rgb] ``` ### Observation Space diff --git a/howto/register_new_algorithm.md b/howto/register_new_algorithm.md index 47ae06c1..3cac48ed 100644 --- a/howto/register_new_algorithm.md +++ b/howto/register_new_algorithm.md @@ -431,7 +431,7 @@ np.float = np.float32 np.int = np.int64 np.bool = bool -__version__ = "0.3.2" +__version__ = "0.4.3" ``` Then if you run `python sheeprl/available_agents.py` you should see that `sota` appears in the list of all the available agents: diff --git a/howto/select_observations.md b/howto/select_observations.md index 6220dd45..a1a7dd81 100644 --- a/howto/select_observations.md +++ b/howto/select_observations.md @@ -27,9 +27,9 @@ You just need to pass the `mlp_keys` and `cnn_keys` of the encoder and the decod > > We recommend to read [this](./work_with_multi-encoder_multi-decoder.md) to know how the encoder and decoder work with more observations. -For instance, to train the ppo algorithm on the *doapp* task provided by *DIAMBRA* using image observations and only the `P1_oppHealth` and `P1_ownHealth` as vector observation, you have to run the following command: +For instance, to train the ppo algorithm on the *doapp* task provided by *DIAMBRA* using image observations and only the `opp_health` and `own_health` as vector observation, you have to run the following command: ```bash -python sheeprl.py exp=ppo env=diambra env.id=doapp cnn_keys.encoder=[frame] mlp_keys.encoder=[P1_oppHealth,P1_ownHealth] +diambra run python sheeprl.py exp=ppo env=diambra env.id=doapp env.num_envs=1 cnn_keys.encoder=[frame] mlp_keys.encoder=[opp_health,own_health] ``` > **Note** @@ -40,7 +40,7 @@ It is important to know the observations the environment provides, for instance, > **Note** > > For some environments provided by gymnasium, e.g. `LunarLander-v2` or `CartPole-v1`, only vector observations are returned, but it is possible to extract the image observation from the render. To do this, it is sufficient to specify the `rgb` key to the `cnn_keys` args: -> `python sheeprl.py cnn_keys.encoder=[rgb]` +> `python sheeprl.py exp=... cnn_keys.encoder=[rgb]` #### Frame Stack For image observations it is possible to stack the last $n$ observations with the argument `frame_stack`. All the observations specified in the `cnn_keys` argument are stacked. diff --git a/howto/work_with_steps.md b/howto/work_with_steps.md index cad4f885..f56cf2e8 100644 --- a/howto/work_with_steps.md +++ b/howto/work_with_steps.md @@ -20,12 +20,12 @@ The hyper-parameters which refer to the *policy steps* are: * `total_steps`: the total number of policy steps to perform in an experiment. Effectively, this number will be divided in each process by $n \cdot m$ to obtain the number of training steps to be performed by each of them. * `exploration_steps`: the number of policy steps in which the agent explores the environment in the P2E algorithms. -* `max_episode_steps`: the maximum number of policy steps an episode can last ($\text{max\_steps}$); when this number is reached a `terminated=True` is returned by the environment. This means that if you decide to have an action repeat greater than one ($\text{action\_repeat} > 1$), then the environment performs a maximum number of steps equal to: $\text{env\_steps} = \text{max\_steps} \cdot \text{action\_repeat}$. +* `max_episode_steps`: the maximum number of policy steps an episode can last (`max_steps`); when this number is reached a `terminated=True` is returned by the environment. This means that if you decide to have an action repeat greater than one (`action_repeat > 1`), then the environment performs a maximum number of steps equal to: `env_steps = max_steps * action_repeat`$. * `learning_starts`: how many policy steps the agent has to perform before starting the training. * `train_every`: how many policy steps the agent has to perform between one training and the next. ## Gradient steps -A *gradient step* consists of an update of the parameters of the agent, i.e., a call of the *train* function. The gradient step is proportional to the number of parallel processes, indeed, if there are $n$ parallel processes, $n \cdot \text{gradient\_steps}$ calls to the *train* method will be executed. +A *gradient step* consists of an update of the parameters of the agent, i.e., a call of the *train* function. The gradient step is proportional to the number of parallel processes, indeed, if there are $n$ parallel processes, `n * gradient_steps` calls to the *train* method will be executed. The hyper-parameters which refer to the *gradient steps* are: * `algo.per_rank_gradient_steps`: the number of gradient steps per rank to perform in a single iteration. diff --git a/pyproject.toml b/pyproject.toml index 8afcd9e1..461a68ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ create = true in-project = true [build-system] -requires = ["setuptools >= 61.0.0"] +requires = ["setuptools >= 61.0.0", "swig==4.*"] build-backend = "setuptools.build_meta" [project] @@ -65,7 +65,7 @@ atari = [ "gymnasium[other]==0.29.*", ] minedojo = ["minedojo==0.1", "importlib_resources==5.12.0"] -minerl = ["minerl==0.4.4"] +minerl = ["setuptools==66.0.0", "minerl==0.4.4"] diambra = ["diambra==0.0.16", "diambra-arena==2.2.1"] crafter = ["crafter==1.8.1"] diff --git a/sheeprl/__init__.py b/sheeprl/__init__.py index 21baeba6..23f37403 100644 --- a/sheeprl/__init__.py +++ b/sheeprl/__init__.py @@ -31,4 +31,4 @@ np.int = np.int64 np.bool = bool -__version__ = "0.4.2" +__version__ = "0.4.3"