Merge pull request #133 from BDonnot/bd_dev

Adressing issue 196 and 185
Grid2op · Apr 26, 2021 · 2995859 · 2995859
2 parents f7c768a + e732ad8
commit 2995859
Show file tree

Hide file tree

Showing 18 changed files with 332 additions and 50 deletions.
diff --git a/.gitignore b/.gitignore
@@ -303,6 +303,8 @@ test_bug_discord1.py
 test_networkx.py
 test_issue185.py
 test_can_make_opponent.py
+enigma_nili.py
+test_issue196.py
 
 # profiling files
 **.prof
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -24,10 +24,22 @@ Change Log
 
 [1.5.2] - 2021-xx-yy
 -----------------------
-
+- [FIXED]: `Issue#196 <https://github.com/rte-france/Grid2Op/issues/196>`_ an issue related to the
+  low / high of the observation if using the gym_compat module. Some more protections
+  are enforced now.
+- [FIXED]: `Issue#196 <https://github.com/rte-france/Grid2Op/issues/196>`_ an issue related the scaling when negative
+  numbers are used (in these cases low / max would be mixed up)
+- [IMPROVED]: on windows at least, grid2op does not work with gym < 0.17.2 Checks are performed in order to make sure
+  the installed open ai gym package meets this requirement (see issue
+  `Issue#185 <https://github.com/rte-france/Grid2Op/issues/185>`_ )
+- [IMPROVED] the seed of openAI gym for composed action space (see issue `https://github.com/openai/gym/issues/2166`):
+  waiting for an official fix, grid2op will use the solution proposed there
+  https://github.com/openai/gym/issues/2166#issuecomment-803984619 )
 
 [1.5.1] - 2021-04-15
 -----------------------
+- [FIXED]: `Issue#194 <https://github.com/rte-france/Grid2Op/issues/194>`_: (post release): change the name
+  of the file `platform.py` that could be mixed with the python "platform" module to `_glop_platform_info.py`
 - [FIXED]: `Issue #187 <https://github.com/rte-france/Grid2Op/issues/187>`_: improve the computation and the
   documentation of the `RedispReward`. This has an impact on the `env.reward_range` of all environments using this
   reward, because the old "reward_max" was not correct.

diff --git a/getting_started/11_IntegrationWithExistingRLFrameworks.ipynb b/getting_started/11_IntegrationWithExistingRLFrameworks.ipynb
@@ -505,7 +505,89 @@
     "            trainer.train()\n",
     "    finally:   \n",
     "        # shutdown ray\n",
-    "        ray.shutdown()"
+    "        ray.shutdown()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Because we are approximating a physical system with real equations, and limited computational power\n",
+    "regardless of the \"backend\" / \"powergrid simulator\" used internally by grid2op, it is sometimes possible\n",
+    "that an observation obs[\"gen_p\"] is not exactly in the range \n",
+    "env.observation_space[\"gen_p\"].low, env.observation_space[\"gen_p\"].high.\n",
+    "\n",
+    "In this \"pathological\" cases we recommend to manually change the low / high value of the `gen_p` part of the observation space, for example by adding, after the definition of self.observation_space something like:\n",
+    "\n",
+    "```python\n",
+    "        # 4. specific to rllib\n",
+    "        self.action_space = self.env_gym.action_space\n",
+    "        self.observation_space = self.env_gym.observation_space\n",
+    "        self.observation_space[\"gen_p\"].low[:] = -np.inf\n",
+    "        self.observation_space[\"gen_p\"].high[:] = np.inf\n",
+    "```\n",
+    "\n",
+    "More information at https://github.com/rte-france/Grid2Op/issues/196\n",
+    "\n",
+    "**NB** these cases can be spotted with an error like:\n",
+    "\n",
+    "```\n",
+    "RayTaskError(ValueError): ray::RolloutWorker.par_iter_next() (pid=378, ip=172.28.0.2)\n",
+    "  File \"python/ray/_raylet.pyx\", line 480, in ray._raylet.execute_task\n",
+    "  File \"python/ray/_raylet.pyx\", line 432, in ray._raylet.execute_task.function_executor\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/util/iter.py\", line 1152, in par_iter_next\n",
+    "    return next(self.local_it)\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/rollout_worker.py\", line 327, in gen_rollouts\n",
+    "    yield self.sample()\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/rollout_worker.py\", line 662, in sample\n",
+    "    batches = [self.input_reader.next()]\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/sampler.py\", line 95, in next\n",
+    "    batches = [self.get_data()]\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/sampler.py\", line 224, in get_data\n",
+    "    item = next(self.rollout_provider)\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/sampler.py\", line 620, in _env_runner\n",
+    "    sample_collector=sample_collector,\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/evaluation/sampler.py\", line 1056, in _process_observations_w_trajectory_view_api\n",
+    "    policy_id).transform(raw_obs)\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/models/preprocessors.py\", line 257, in transform\n",
+    "    self.check_shape(observation)\n",
+    "  File \"/usr/local/lib/python3.7/dist-packages/ray/rllib/models/preprocessors.py\", line 68, in check_shape\n",
+    "    observation, self._obs_space)\n",
+    "ValueError: ('Observation ({}) outside given space ({})!', OrderedDict([('actual_dispatch', array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
+    "       0., 0., 0., 0., 0.], dtype=float32)), ('gen_p', array([0.        , 0.14583334, 0.        , 0.5376    , 0.        ,\n",
+    "       0.13690476, 0.        , 0.        , 0.13988096, 0.        ,\n",
+    "       0.        , 0.        , 0.        , 0.        , 0.        ,\n",
+    "       0.        , 0.        , 0.10416667, 0.        , 0.9975    ,\n",
+    "       0.        , 0.0872582 ], dtype=float32)), ('load_p', array([-8.33333358e-02,  1.27543859e+01, -3.14843726e+00, -4.91228588e-02,\n",
+    "       -7.84314200e-02,  2.70270016e-02,  4.51001197e-01, -7.63358772e-02,\n",
+    "       -8.42104480e-02, -7.90961310e-02, -2.31212564e-02, -7.31706619e-02,\n",
+    "       -5.47945984e-02, -5.57769537e-02, -4.65115122e-02,  0.00000000e+00,\n",
+    "       -6.25000373e-02, -2.98508592e-02,  0.00000000e+00,  2.59741265e-02,\n",
+    "       -5.12821227e-02,  2.12766770e-02, -4.38757129e-02,  1.45455096e-02,\n",
+    "       -1.45278079e-02, -3.63636017e-02,  7.14286715e-02,  1.03358915e-02,\n",
+    "        8.95522386e-02,  4.81927246e-02, -1.76759213e-02,  1.11111533e-02,\n",
+    "        1.00000061e-01, -5.28445065e-01,  3.00833374e-01,  7.76839375e-01,\n",
+    "       -7.07498193e-01], dtype=float32)), ('rho', array([0.49652272, 0.42036632, 0.12563582, 0.22375877, 0.54946697,\n",
+    "       0.08844228, 0.05907034, 0.10975129, 0.13002895, 0.14068729,\n",
+    "       0.17318982, 0.6956544 , 0.38796344, 0.67179894, 0.22992906,\n",
+    "       0.25189328, 0.15049867, 0.09095841, 0.35627988, 0.35627988,\n",
+    "       0.36776555, 0.27249542, 0.6269728 , 0.62393713, 0.3464659 ,\n",
+    "       0.35879263, 0.22755426, 0.35994047, 0.36117986, 0.12019955,\n",
+    "       0.03638522, 0.2805753 , 0.5809281 , 0.6191531 , 0.5243356 ,\n",
+    "       0.60382956, 0.35834518, 0.35867074, 0.3580954 , 0.6681824 ,\n",
+    "       0.3441911 , 0.6081861 , 0.34460714, 0.18246886, 0.10307808,\n",
+    "       0.46778303, 0.47179568, 0.45407027, 0.30089107, 0.30089107,\n",
+    "       0.34481782, 0.3182735 , 0.35940355, 0.21895139, 0.19766088,\n",
+    "       0.63653564, 0.46778303, 0.4566811 , 0.64398617], dtype=float32)), ('topo_vect', array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+    "       1], dtype=int32))]), Dict(actual_dispatch:Box(-1.0, 1.0, (22,), float32), gen_p:Box(0.0, 1.2000000476837158, (22,), float32), load_p:Box(-inf, inf, (37,), float32), rho:Box(0.0, inf, (59,), float32), topo_vect:Box(-1, 2, (177,), int32)))\n",
+    "```"
    ]
   },
   {
@@ -1085,7 +1167,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,

diff --git a/grid2op/Observation/ObservationSpace.py b/grid2op/Observation/ObservationSpace.py
@@ -103,7 +103,10 @@ def __init__(self,
                                      legalActClass=copy.deepcopy(env._legalActClass),
                                      other_rewards=other_rewards,
                                      helper_action_class=env._helper_action_class,
-                                     helper_action_env=env._helper_action_env)
+                                     helper_action_env=env._helper_action_env,
+                                     epsilon_poly=env._epsilon_poly,
+                                     tol_poly=env._tol_poly,
+                                     )
         for k, v in self.obs_env.other_rewards.items():
             v.initialize(env)
 

diff --git a/grid2op/Observation/_ObsEnv.py b/grid2op/Observation/_ObsEnv.py
@@ -52,11 +52,15 @@ def __init__(self,
                  legalActClass,
                  helper_action_class,
                  helper_action_env,
+                 epsilon_poly,
+                 tol_poly,
                  other_rewards={}):
         BaseEnv.__init__(self,
                          copy.deepcopy(parameters),
                          thermal_limit_a,
-                         other_rewards=other_rewards)
+                         other_rewards=other_rewards,
+                         epsilon_poly=epsilon_poly,
+                         tol_poly=tol_poly)
         self._helper_action_class = helper_action_class
         self._reward_helper = reward_helper
         self._obsClass = None

diff --git a/grid2op/gym_compat/base_gym_attr_converter.py b/grid2op/gym_compat/base_gym_attr_converter.py
@@ -6,6 +6,7 @@
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
 from gym.spaces import Space
+from grid2op.gym_compat.utils import check_gym_version
 
 
 class BaseGymAttrConverter(object):
@@ -15,6 +16,7 @@ class BaseGymAttrConverter(object):
     Need help if you can :-)
     """
     def __init__(self, space=None, gym_to_g2op=None, g2op_to_gym=None):
+        check_gym_version()
         self.__is_init_super = False  # is the "super" class initialized, do not modify in child class
 
         self._is_init_space = False  # is the instance initialized

diff --git a/grid2op/gym_compat/box_gym_actspace.py b/grid2op/gym_compat/box_gym_actspace.py
@@ -14,13 +14,14 @@
 from grid2op.Action import BaseAction, ActionSpace
 from grid2op.dtypes import dt_int, dt_bool, dt_float
 
+
 # TODO test that it works normally
 # TODO test the casting in dt_int or dt_float depending on the data
 # TODO test the scaling
 # TODO doc
 # TODO test the function part
 
-from grid2op.gym_compat.utils import ALL_ATTR, ATTR_DISCRETE
+from grid2op.gym_compat.utils import ALL_ATTR, ATTR_DISCRETE, check_gym_version
 
 
 class BoxGymActSpace(Box):
@@ -124,7 +125,7 @@ def __init__(self,
             raise RuntimeError(f"Impossible to create a BoxGymActSpace without providing a "
                                f"grid2op action_space. You provided {type(grid2op_action_space)}"
                                f"as the \"grid2op_action_space\" attribute.")
-
+        check_gym_version()
         if attr_to_keep == ALL_ATTR:
             # by default, i remove all the attributes that are not supported by the action type
             # i do not do that if the user specified specific attributes to keep. This is his responsibility in

diff --git a/grid2op/gym_compat/box_gym_obsspace.py b/grid2op/gym_compat/box_gym_obsspace.py
@@ -13,6 +13,8 @@
 from grid2op.dtypes import dt_int, dt_bool, dt_float
 from grid2op.Observation import ObservationSpace
 
+from grid2op.gym_compat.utils import _compute_extra_power_for_losses
+from grid2op.gym_compat.utils import check_gym_version
 # TODO doc
 
 
@@ -113,20 +115,31 @@ class BoxGymObsSpace(Box):
     - `dtype` (optional, put None if you don't want to change it, defaults to np.float32) the type of
       the numpy array as output of your function.
 
+    Notes
+    -----
+    The range of the values for "gen_p" / "prod_p" are not strictly `env.gen_pmin` and `env.gen_pmax`.
+    This is due to the "approximation" when some redispatching is performed (the precision of the
+    algorithm that computes the actual dispatch from the information it receives) and also because
+    sometimes the losses of the grid are really different that the one anticipated in the "chronics" (yes
+    env.gen_pmin and env.gen_pmax are not always ensured in grid2op)
+
     """
     def __init__(self,
                  grid2op_observation_space,
                  attr_to_keep=ALL_ATTR_OBS,
                  subtract=None,
                  divide=None,
                  functs=None):
+        check_gym_version()
         if not isinstance(grid2op_observation_space, ObservationSpace):
             raise RuntimeError(f"Impossible to create a BoxGymObsSpace without providing a "
                                f"grid2op observation. You provided {type(grid2op_observation_space)}"
                                f"as the \"grid2op_observation_space\" attribute.")
         self._attr_to_keep = attr_to_keep
 
         ob_sp = grid2op_observation_space
+        tol_redisp = ob_sp.obs_env._tol_poly  # add to gen_p otherwise ... well it can crash
+        extra_for_losses = _compute_extra_power_for_losses(ob_sp)
 
         self.dict_properties = {
             "year": (np.zeros(1, dtype=dt_int),
@@ -141,8 +154,8 @@ def __init__(self,
                                np.zeros(1, dtype=dt_int) + 60, (1,), dt_int),
             "day_of_week": (np.zeros(1, dtype=dt_int),
                             np.zeros(1, dtype=dt_int) + 7, (1,), dt_int),
-            "gen_p": (np.full(shape=(ob_sp.n_gen,), fill_value=0., dtype=dt_float),
-                      1.2 * ob_sp.gen_pmax,
+            "gen_p": (np.full(shape=(ob_sp.n_gen,), fill_value=0., dtype=dt_float) - tol_redisp - extra_for_losses,
+                      ob_sp.gen_pmax + tol_redisp + extra_for_losses,
                       (ob_sp.n_gen,),
                       dt_float),
             "gen_q": (np.full(shape=(ob_sp.n_gen,), fill_value=-np.inf, dtype=dt_float),
@@ -273,6 +286,7 @@ def __init__(self,
         self.dict_properties["prod_p"] = self.dict_properties["gen_p"]
         self.dict_properties["prod_q"] = self.dict_properties["gen_q"]
         self.dict_properties["prod_v"] = self.dict_properties["gen_v"]
+        self.dict_properties["gen_p_before_curtail"] = self.dict_properties["gen_p"]
 
         if functs is None:
             functs = {}

diff --git a/grid2op/gym_compat/gym_obs_space.py b/grid2op/gym_compat/gym_obs_space.py
@@ -5,7 +5,7 @@
 # you can obtain one at http://mozilla.org/MPL/2.0/.
 # SPDX-License-Identifier: MPL-2.0
 # This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
-
+import copy
 import numpy as np
 from gym import spaces
 
@@ -14,6 +14,7 @@
 from grid2op.Observation import BaseObservation
 from grid2op.dtypes import dt_int, dt_bool, dt_float
 from grid2op.gym_compat.base_gym_attr_converter import BaseGymAttrConverter
+from grid2op.gym_compat.utils import _compute_extra_power_for_losses
 
 
 class GymObservationSpace(_BaseGymSpaceConverter):
@@ -48,6 +49,14 @@ class GymObservationSpace(_BaseGymSpaceConverter):
         # a situation where it is useful. And especially, you will not be able to
         # use "obs.simulate" for the observation converted back from this gym action.
 
+    Notes
+    -----
+    The range of the values for "gen_p" / "prod_p" are not strictly `env.gen_pmin` and `env.gen_pmax`.
+    This is due to the "approximation" when some redispatching is performed (the precision of the
+    algorithm that computes the actual dispatch from the information it receives) and also because
+    sometimes the losses of the grid are really different that the one anticipated in the "chronics" (yes
+    env.gen_pmin and env.gen_pmax are not always ensured in grid2op)
+
     """
     def __init__(self, env, dict_variables=None):
         if not isinstance(env, (Environment, MultiMixEnvironment, BaseMultiProcessEnvironment)):
@@ -173,9 +182,19 @@ def _fill_dict_obs_space(self, dict_, observation_space, env_params, opponent_sp
                 shape = (sh,)
                 SpaceType = spaces.Box
                 if attr_nm == "gen_p" or attr_nm == "gen_p_before_curtail":
-                    low = observation_space.gen_pmin
-                    high = observation_space.gen_pmax * 1.2  # because of the slack bus... # TODO
+                    low = copy.deepcopy(observation_space.gen_pmin)
+                    high = copy.deepcopy(observation_space.gen_pmax)
                     shape = None
+
+                    # for redispatching
+                    low -= observation_space.obs_env._tol_poly
+                    high += observation_space.obs_env._tol_poly
+
+                    # for "power losses" that are not properly computed in the original data
+                    extra_for_losses = _compute_extra_power_for_losses(observation_space)
+                    low -= extra_for_losses
+                    high += extra_for_losses
+
                 elif attr_nm == "gen_v" or attr_nm == "load_v" or attr_nm == "v_or" or attr_nm == "v_ex":
                     # voltages can't be negative
                     low = 0.

diff --git a/grid2op/gym_compat/gym_space_converter.py b/grid2op/gym_compat/gym_space_converter.py
@@ -11,6 +11,7 @@
 import copy
 
 from grid2op.dtypes import dt_int, dt_bool, dt_float
+from grid2op.gym_compat.utils import check_gym_version
 
 
 class _BaseGymSpaceConverter(spaces.Dict):
@@ -23,6 +24,7 @@ class _BaseGymSpaceConverter(spaces.Dict):
 
     """
     def __init__(self, dict_gym_space, dict_variables=None):
+        check_gym_version()
         spaces.Dict.__init__(self, dict_gym_space)
         self._keys_encoding = {}
         if dict_variables is not None:
@@ -207,3 +209,16 @@ def ignore_attr(self, attr_names):
             if k in attr_names:
                 res = res.reencode_space(k, None)
         return res
+
+    def seed(self, seed=None):
+        """Seed the PRNG of this space.
+        see issue https://github.com/openai/gym/issues/2166
+        of openAI gym
+        """
+        seeds = super(spaces.Dict, self).seed(seed)
+        sub_seeds = seeds
+        max_ = np.iinfo(int).max
+        for i, space_key in enumerate(sorted(self.spaces.keys())):
+            sub_seed = self.np_random.randint(max_)
+            sub_seeds.append(self.spaces[space_key].seed(sub_seed))
+        return sub_seeds
diff --git a/grid2op/gym_compat/gymenv.py b/grid2op/gym_compat/gymenv.py
@@ -9,6 +9,7 @@
 import gym
 from grid2op.gym_compat.gym_obs_space import GymObservationSpace
 from grid2op.gym_compat.gym_act_space import GymActionSpace
+from grid2op.gym_compat.utils import check_gym_version
 
 
 class GymEnv(gym.Env):
@@ -39,6 +40,7 @@ class GymEnv(gym.Env):
 
     """
     def __init__(self, env_init):
+        check_gym_version()
         self.init_env = env_init.copy()
         self.action_space = GymActionSpace(self.init_env)
         self.observation_space = GymObservationSpace(self.init_env)