From 1715591c0f531489362a2aae0a98e441ea0481e1 Mon Sep 17 00:00:00 2001 From: Michele Milesi Date: Mon, 2 Oct 2023 11:43:52 +0200 Subject: [PATCH 1/3] update diambra-arena (v2.2.1) --- pyproject.toml | 2 +- sheeprl/configs/env/diambra.yaml | 23 +++-- ..._v3_L_doapp_128px_gray_combo_discrete.yaml | 45 +++++----- sheeprl/envs/diambra.py | 90 ++++++++++--------- 4 files changed, 90 insertions(+), 70 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a6b76b25..8afcd9e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ atari = [ ] minedojo = ["minedojo==0.1", "importlib_resources==5.12.0"] minerl = ["minerl==0.4.4"] -diambra = ["wheel==0.38.4", "setuptools<=66.0.0", "gym==0.21.0", "diambra==0.0.16", "diambra-arena==2.1.2"] +diambra = ["diambra==0.0.16", "diambra-arena==2.2.1"] crafter = ["crafter==1.8.1"] [tool.ruff] diff --git a/sheeprl/configs/env/diambra.yaml b/sheeprl/configs/env/diambra.yaml index 385d2059..17cf3357 100644 --- a/sheeprl/configs/env/diambra.yaml +++ b/sheeprl/configs/env/diambra.yaml @@ -12,15 +12,24 @@ action_repeat: 1 wrapper: _target_: sheeprl.envs.diambra.DiambraWrapper id: ${env.id} - action_space: discrete + action_space: diambra.arena.SpaceTypes.DISCRETE # or diambra.arena.SpaceTypes.MULTI_DISCRETE screen_size: ${env.screen_size} grayscale: ${env.grayscale} - attack_but_combination: False - sticky_actions: ${env.action_repeat} - seed: null + repeat_action: ${env.action_repeat} rank: null diambra_settings: - player: P1 + role: diambra.arena.Roles.P1 + step_ratio: 6 + difficulty: 4 + continue_game: 0.0 + show_final: False + outfits: 1 diambra_wrappers: - actions_stack: 12 - noop_max: 0 + stack_actions: 1 + no_op_max: 0 + no_attack_buttons_combinations: False + add_last_action: True + scale: False + exclude_image_scaling: False + process_discrete_binary: False + role_relative: False diff --git a/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml b/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml index 6f912a9c..fe6f10f9 100644 --- a/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml +++ b/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml @@ -19,10 +19,11 @@ env: screen_size: 128 reward_as_observation: True wrapper: - attack_but_combination: True diambra_settings: characters: Kasumi difficulty: 4 + diambra_wrappers: + no_attack_buttons_combinations: False # Checkpoint checkpoint: @@ -38,30 +39,30 @@ cnn_keys: - frame mlp_keys: encoder: - - reward - - P1_actions_attack - - P1_actions_move - - P1_oppChar - - P1_oppHealth - - P1_oppSide - - P1_oppWins - - P1_ownChar - - P1_ownHealth - - P1_ownSide - - P1_ownWins + - P1_character + - P1_health + - P1_side + - P1_wins + - P2_character + - P2_health + - P2_side + - P2_wins - stage + - timer + - action + - reward decoder: - - P1_actions_attack - - P1_actions_move - - P1_oppChar - - P1_oppHealth - - P1_oppSide - - P1_oppWins - - P1_ownChar - - P1_ownHealth - - P1_ownSide - - P1_ownWins + - P1_character + - P1_health + - P1_side + - P1_wins + - P2_character + - P2_health + - P2_side + - P2_wins - stage + - timer + - action # Algorithm algo: diff --git a/sheeprl/envs/diambra.py b/sheeprl/envs/diambra.py index f9c2f2ce..4966947a 100644 --- a/sheeprl/envs/diambra.py +++ b/sheeprl/envs/diambra.py @@ -11,9 +11,9 @@ import diambra import diambra.arena -import gym -import gymnasium +import gymnasium as gym import numpy as np +from diambra.arena import EnvironmentSettings, WrappersSettings from gymnasium import core from gymnasium.core import RenderFrame @@ -25,12 +25,12 @@ def __init__( action_space: str = "discrete", screen_size: Union[int, Tuple[int, int]] = 64, grayscale: bool = False, - attack_but_combination: bool = True, - sticky_actions: int = 1, - seed: Optional[int] = None, + repeat_action: int = 1, rank: int = 0, diambra_settings: Dict[str, Any] = {}, diambra_wrappers: Dict[str, Any] = {}, + render_mode: str = "rgb_array", + log_level: int = 0, ) -> None: super().__init__() @@ -39,45 +39,49 @@ def __init__( if diambra_settings.pop("frame_shape", None) is not None: warnings.warn("The DIAMBRA frame_shape setting is disabled") - settings = { + if diambra_settings.pop("n_players", None) is not None: + warnings.warn("The DIAMBRA n_players setting is disabled") + + role = diambra_settings.pop("role", None) + settings = EnvironmentSettings( **diambra_settings, - "action_space": action_space.lower(), - "attack_but_combination": attack_but_combination, - } - if sticky_actions > 1: + **{ + "game_id": id, + "action_space": eval(action_space), + "n_players": 1, + "role": eval(role), + "render_mode": render_mode, + }, + ) + if repeat_action > 1: if "step_ratio" not in settings or settings["step_ratio"] > 1: warnings.warn( - f"step_ratio parameter modified to 1 because the sticky action is active ({sticky_actions})" + f"step_ratio parameter modified to 1 because the sticky action is active ({repeat_action})" ) settings["step_ratio"] = 1 - if diambra_wrappers.pop("hwc_obs_resize", None) is not None: - warnings.warn("The DIAMBRA hwc_obs_resize wrapper is disabled") - if diambra_wrappers.pop("frame_stack", None) is not None: - warnings.warn("The DIAMBRA frame_stack wrapper is disabled") + if diambra_wrappers.pop("frame_shape", None) is not None: + warnings.warn("The DIAMBRA frame_shape wrapper is disabled") + if diambra_wrappers.pop("stack_frames", None) is not None: + warnings.warn("The DIAMBRA stack_frames wrapper is disabled") if diambra_wrappers.pop("dilation", None) is not None: warnings.warn("The DIAMBRA dilation wrapper is disabled") - wrappers = { + if diambra_wrappers.pop("flatten", None) is not None: + warnings.warn("The DIAMBRA flatten wrapper is disabled") + wrappers = WrappersSettings( **diambra_wrappers, - "flatten": True, - "sticky_actions": sticky_actions, - "hwc_obs_resize": screen_size + (1 if grayscale else 3,), - } - self._env = diambra.arena.make(id, settings, wrappers, seed=seed, rank=rank) + **{ + "flatten": True, + "repeat_action": repeat_action, + "frame_shape": screen_size + (int(grayscale),), + }, + ) + self._env = diambra.arena.make(id, settings, wrappers, rank=rank, render_mode=render_mode, log_level=log_level) # Observation and action space - self.action_space = ( - gymnasium.spaces.Discrete(self._env.action_space.n) - if action_space.lower() == "discrete" - else gymnasium.spaces.MultiDiscrete(self._env.action_space.nvec) - ) + self.action_space = self._env.action_space obs = {} for k in self._env.observation_space.spaces.keys(): - if isinstance(self._env.observation_space[k], gym.spaces.Box): - low = self._env.observation_space[k].low - high = self._env.observation_space[k].high - shape = self._env.observation_space[k].shape - dtype = self._env.observation_space[k].dtype - elif isinstance(self._env.observation_space[k], gym.spaces.Discrete): + if isinstance(self._env.observation_space[k], gym.spaces.Discrete): low = 0 high = self._env.observation_space[k].n - 1 shape = (1,) @@ -87,11 +91,15 @@ def __init__( high = self._env.observation_space[k].nvec - 1 shape = (len(high),) dtype = np.int32 - else: + elif not isinstance(self._env.observation_space[k], gym.spaces.Box): raise RuntimeError(f"Invalid observation space, got: {type(self._env.observation_space[k])}") - obs[k] = gymnasium.spaces.Box(low, high, shape, dtype) - self.observation_space = gymnasium.spaces.Dict(obs) - self.render_mode = "rgb_array" + obs[k] = ( + self._env.observation_space[k] + if isinstance(self._env.observation_space[k], gym.spaces.Box) + else gym.spaces.Box(low, high, shape, dtype) + ) + self.observation_space = gym.spaces.Dict(obs) + self.render_mode = render_mode def __getattr__(self, name): return getattr(self._env, name) @@ -103,17 +111,19 @@ def _convert_obs(self, obs: Dict[str, Union[int, np.ndarray]]) -> Dict[str, np.n } def step(self, action: Any) -> Tuple[Any, SupportsFloat, bool, bool, Dict[str, Any]]: - obs, reward, done, infos = self._env.step(action) + obs, reward, done, truncated, infos = self._env.step(action) infos["env_domain"] = "DIAMBRA" - return self._convert_obs(obs), reward, done or infos.get("env_done", False), False, infos + return self._convert_obs(obs), reward, done or infos.get("env_done", False), truncated, infos def render(self, mode: str = "rgb_array", **kwargs) -> Optional[Union[RenderFrame, List[RenderFrame]]]: - return self._env.render("rgb_array") + return self._env.render() def reset( self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None ) -> Tuple[Any, Dict[str, Any]]: - return self._convert_obs(self._env.reset()), {"env_domain": "DIAMBRA"} + obs, infos = self._env.reset(seed=seed, options=options) + infos["env_domain"] = "DIAMBRA" + return self._convert_obs(obs), infos def close(self) -> None: self._env.close() From 0aa704b1728bb13c10d517d816c862b530f69c3a Mon Sep 17 00:00:00 2001 From: Michele Milesi Date: Mon, 2 Oct 2023 12:13:30 +0200 Subject: [PATCH 2/3] fix: diambra wrapper --- sheeprl/configs/env/diambra.yaml | 2 +- ..._v3_L_doapp_128px_gray_combo_discrete.yaml | 35 ++++++++++--------- sheeprl/envs/diambra.py | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/sheeprl/configs/env/diambra.yaml b/sheeprl/configs/env/diambra.yaml index 17cf3357..771b8e85 100644 --- a/sheeprl/configs/env/diambra.yaml +++ b/sheeprl/configs/env/diambra.yaml @@ -32,4 +32,4 @@ wrapper: scale: False exclude_image_scaling: False process_discrete_binary: False - role_relative: False + role_relative: True diff --git a/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml b/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml index fe6f10f9..53e6bf86 100644 --- a/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml +++ b/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml @@ -20,7 +20,8 @@ env: reward_as_observation: True wrapper: diambra_settings: - characters: Kasumi + role: null + characters: null difficulty: 4 diambra_wrappers: no_attack_buttons_combinations: False @@ -39,27 +40,27 @@ cnn_keys: - frame mlp_keys: encoder: - - P1_character - - P1_health - - P1_side - - P1_wins - - P2_character - - P2_health - - P2_side - - P2_wins + - own_character + - own_health + - own_side + - own_wins + - opp_character + - opp_health + - opp_side + - opp_wins - stage - timer - action - reward decoder: - - P1_character - - P1_health - - P1_side - - P1_wins - - P2_character - - P2_health - - P2_side - - P2_wins + - own_character + - own_health + - own_side + - own_wins + - opp_character + - opp_health + - opp_side + - opp_wins - stage - timer - action diff --git a/sheeprl/envs/diambra.py b/sheeprl/envs/diambra.py index 4966947a..1bffc783 100644 --- a/sheeprl/envs/diambra.py +++ b/sheeprl/envs/diambra.py @@ -49,7 +49,7 @@ def __init__( "game_id": id, "action_space": eval(action_space), "n_players": 1, - "role": eval(role), + "role": eval(role) if role is not None else None, "render_mode": render_mode, }, ) From 4762687c891e9074f09db612c226427db171cb56 Mon Sep 17 00:00:00 2001 From: Michele Milesi Date: Mon, 2 Oct 2023 12:31:40 +0200 Subject: [PATCH 3/3] docs: update diambra howto --- howto/learn_in_diambra.md | 14 +++++++------- sheeprl/configs/env/diambra.yaml | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/howto/learn_in_diambra.md b/howto/learn_in_diambra.md index 22ce97db..746eab44 100644 --- a/howto/learn_in_diambra.md +++ b/howto/learn_in_diambra.md @@ -45,7 +45,7 @@ Each environment has its own observation and action space, so it is reccomended > > You have to be [registered](https://diambra.ai/register/) and logged in to acces the [DIAMRA documentation](https://docs.diambra.ai/). -The observation space is slightly modified to be compatible with our algorithms, in particular, the `gym.spaces.Box` observations are converted in `gymnasium.spaces.Box` observations, mantaining the dimensions, the range and the type of the observations. Moreover, the `gym.spaces.Discrete` observations are converted into `gymnasium.spaces.Box` observations with dimension `(1,)`, of type `int` and range from `0` to `n - 1`, where `n` is the number of options of the Discrete space. Finally, the `gym.spaces.MultiDiscrete` observations are converted into `gymnasium.spaces.Box` observations with dimension `(k,)` where `k` is the length of the MultiDiscrete space, of type `int` and range from `0` to `n[i] - 1` where `n[i]` is the number of options of the *i-th* element of the MultiDiscrete. +The observation space is slightly modified to be compatible with our algorithms, in particular, the `gymnasium.spaces.Discrete` observations are converted into `gymnasium.spaces.Box` observations with dimension `(1,)`, of type `int` and range from `0` to `n - 1`, where `n` is the number of options of the Discrete space. Finally, the `gymnasium.spaces.MultiDiscrete` observations are converted into `gymnasium.spaces.Box` observations with dimension `(k,)` where `k` is the length of the MultiDiscrete space, of type `int` and range from `0` to `n[i] - 1` where `n[i]` is the number of options of the *i-th* element of the MultiDiscrete. > **Note** > @@ -86,7 +86,7 @@ env: diambra_settings: characters: Kasumi step_ratio: 5 - player: P1 + role: diambra.arena.Roles.P1 diambra_wrappers: reward_normalization: True reward_normalization_factor: 0.3 @@ -102,14 +102,14 @@ diambra run -s=4 python sheeprl.py exp=custom_exp env.num_envs=4 > Some settings and wrappers are included in the cli arguments when the command is launched. These settings/wrappers cannot be specified in the `diambra_settings` and `diambra_wrappers` parameters, respectively. > The settings/wrappers you cannot specify in the `diambra_settings` and `diambra_wrappers` parameters are the following: > * `action_space` (settings): you can set it with the `env.wrapper.action_space` argument. -> * `attack_but_combination` (settings): you can set it with the `env.wrapper.attack_but_combination` argument. -> * `frame_shape` (settings): you can set it with the `env.screen_size` argument. +> * `n_players` (settings): you cannot set it, since it is always `1`. +> * `frame_shape` (settings and wrappers): you can set it with the `env.screen_size` argument. > * `flatten` (wrappers): you cannot set it, since it is always `True`. -> * `sticky_actions` (wrappers): you can set it with the `env.action_repeat` argument. -> * `frame_stack` (wrappers): you can set it with the `env.frame_stack` argument. +> * `repeat_action` (wrappers): you can set it with the `env.action_repeat` argument. +> * `stack_frames` (wrappers): you can set it with the `env.stack_frames` argument. > * `dilation` (wrappers): you can set it with the `env.frame_stack_dilation` argument > -> When you set the `action_repeat` cli argument greater than one (i.e., the `sticky_actions` DIAMBRA wrapper), the `step_ratio` diambra setting is automatically modified to $1$ because it is a DIAMBRA requirement. +> When you set the `action_repeat` cli argument greater than one (i.e., the `repeat_action` DIAMBRA wrapper), the `step_ratio` diambra setting is automatically modified to $1$ because it is a DIAMBRA requirement. > > **Important** > diff --git a/sheeprl/configs/env/diambra.yaml b/sheeprl/configs/env/diambra.yaml index 771b8e85..8234f60b 100644 --- a/sheeprl/configs/env/diambra.yaml +++ b/sheeprl/configs/env/diambra.yaml @@ -17,6 +17,7 @@ wrapper: grayscale: ${env.grayscale} repeat_action: ${env.action_repeat} rank: null + log_level: 0 diambra_settings: role: diambra.arena.Roles.P1 step_ratio: 6