Skip to content

Commit

Permalink
Merge pull request #134 from BDonnot/bd_dev
Browse files Browse the repository at this point in the history
Adding a few features and fix
  • Loading branch information
BDonnot authored May 3, 2021
2 parents 2995859 + f755f47 commit f9ed970
Show file tree
Hide file tree
Showing 20 changed files with 243 additions and 35 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ test_issue185.py
test_can_make_opponent.py
enigma_nili.py
test_issue196.py
test_increasingreward.py

# profiling files
**.prof
11 changes: 10 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,25 @@ Change Log

[1.5.2] - 2021-xx-yy
-----------------------
- [BREAKING]: allow the opponent to chose the duration of its attack. This breaks the previous "Opponent.attack(...)"
signature by adding an object in the return value. All code provided with grid2op are compatible with this
new change. (for previously coded opponent, the only thing you have to do to make it compliant with
the new interface is, in the `opponent.attack(...)` function return `whatever_you_returned_before, None` instead
of simply `whatever_you_returned_before`
- [FIXED]: `Issue#196 <https://github.com/rte-france/Grid2Op/issues/196>`_ an issue related to the
low / high of the observation if using the gym_compat module. Some more protections
are enforced now.
- [FIXED]: `Issue#196 <https://github.com/rte-france/Grid2Op/issues/196>`_ an issue related the scaling when negative
numbers are used (in these cases low / max would be mixed up)
- [FIXED]: an issue with the `IncreasingFlatReward` reward types
- [ADDED]: a reward `EpisodeDurationReward` that is always 0 unless at the end of an episode where it returns a float
proportional to the number of step made from the beginning of the environment.
- [ADDED]: in the `Observation` the possibility to retrieve the current number of steps
- [IMPROVED]: on windows at least, grid2op does not work with gym < 0.17.2 Checks are performed in order to make sure
the installed open ai gym package meets this requirement (see issue
`Issue#185 <https://github.com/rte-france/Grid2Op/issues/185>`_ )
- [IMPROVED] the seed of openAI gym for composed action space (see issue `https://github.com/openai/gym/issues/2166`):
waiting for an official fix, grid2op will use the solution proposed there
in waiting for an official fix, grid2op will use the solution proposed there
https://github.com/openai/gym/issues/2166#issuecomment-803984619 )

[1.5.1] - 2021-04-15
Expand Down
1 change: 1 addition & 0 deletions grid2op/Environment/BaseEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,6 +1551,7 @@ def step(self, action):
lines_attacked, subs_attacked = None, None
conv_ = None
init_line_status = copy.deepcopy(self.backend.get_line_status())

beg_step = time.time()
try:
beg_ = time.time()
Expand Down
16 changes: 9 additions & 7 deletions grid2op/Environment/Environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,13 +244,6 @@ def _init_backend(self,
actionClass=CompleteAction,
legal_action=self._game_rules.legal_action)

self._helper_observation_class = ObservationSpace.init_grid(gridobj=bk_type)
self._observation_space = self._helper_observation_class(gridobj=bk_type,
observationClass=observationClass,
actionClass=actionClass,
rewardClass=rewardClass,
env=self)

# handles input data
if not isinstance(chronics_handler, ChronicsHandler):
raise Grid2OpException(
Expand All @@ -263,6 +256,15 @@ def _init_backend(self,
names_chronics_to_backend=names_chronics_to_backend)
self.names_chronics_to_backend = names_chronics_to_backend

# this needs to be done after the chronics handler: rewards might need information
# about the chronics to work properly.
self._helper_observation_class = ObservationSpace.init_grid(gridobj=bk_type)
self._observation_space = self._helper_observation_class(gridobj=bk_type,
observationClass=observationClass,
actionClass=actionClass,
rewardClass=rewardClass,
env=self)

# test to make sure the backend is consistent with the chronics generator
self.chronics_handler.check_validity(self.backend)
self.delta_time_seconds = dt_float(self.chronics_handler.time_interval.seconds)
Expand Down
6 changes: 6 additions & 0 deletions grid2op/Observation/BaseObservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class BaseObservation(GridObjects):
curtailment_limit: :class:`numpy.ndarray`, dtype:float
Limit (in ratio of gen_pmax) imposed on each renewable generator.
current_step: ``int``
Current number of step performed up until this observation (NB this is not given in the observation if
it is transformed into a vector)
"""

_attr_eq = ["line_status",
Expand Down Expand Up @@ -316,6 +319,9 @@ def __init__(self,
self.gen_theta = np.empty(shape=self.n_gen, dtype=dt_float)
self.storage_theta = np.empty(shape=self.n_storage, dtype=dt_float)

# counter
self.current_step = 0

def state_of(self,
_sentinel=None,
load_id=None,
Expand Down
3 changes: 3 additions & 0 deletions grid2op/Observation/CompleteObservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ def update(self, env, with_forecast=True):
self._reset_matrices()
self.reset()

# counter
self.current_step = env.nb_time_step

# extract the time stamps
self.year = dt_int(env.time_stamp.year)
self.month = dt_int(env.time_stamp.month)
Expand Down
9 changes: 9 additions & 0 deletions grid2op/Observation/_ObsEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ def __init__(self,
self._sum_curtailment_mw_init = 0.
self._sum_curtailment_mw_prev_init = 0.

# step count
self._nb_time_step_init = 0

def _init_myclass(self):
"""this class has already all the powergrid information: it is initialized in the obs space !"""
pass
Expand Down Expand Up @@ -363,6 +366,9 @@ def _reset_to_orig_state(self):
self._sum_curtailment_mw = self._sum_curtailment_mw_init
self._sum_curtailment_mw_prev = self._sum_curtailment_mw_prev_init

# current step
self.nb_time_step = self._nb_time_step_init

def simulate(self, action):
"""
INTERNAL
Expand Down Expand Up @@ -490,5 +496,8 @@ def update_grid(self, env):
# time delta
self.delta_time_seconds = env.delta_time_seconds

# current time
self._nb_time_step_init = env.nb_time_step

def get_current_line_status(self):
return self._line_status == 1
6 changes: 5 additions & 1 deletion grid2op/Opponent/BaseOpponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,14 @@ def attack(self, observation, agent_action, env_action, budget, previous_fails):
-------
attack: :class:`grid2op.Action.Action`
The attack performed by the opponent. In this case, a do nothing, all the time.
duration: ``int``
The duration of the attack
"""
# TODO maybe have a class "GymOpponent" where the observation would include the budget and all other
# TODO information, and forward something to the "act" method.
return None
return None, None

def tell_attack_continues(self, observation, agent_action, env_action, budget):
"""
Expand Down
40 changes: 31 additions & 9 deletions grid2op/Opponent/OpponentSpace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import numpy as np

from grid2op.Exceptions import OpponentError


class OpponentSpace(object):
"""
Is similar to the action space, but for the opponent.
This class is used to express some "constraints" on the opponent attack. The opponent is free to attack whatever
it wants, for how long it wants and when it wants. This class ensures that the opponent does not break any
rules.
Attributes
----------
action_space: :class:`grid2op.Action.ActionSpace`
Expand All @@ -32,7 +39,12 @@ class OpponentSpace(object):
budget_per_timestep: ``float``
The increase of the opponent budget per time step (if any)
"""
def __init__(self, compute_budget, init_budget, opponent, attack_duration, attack_cooldown,
def __init__(self,
compute_budget,
init_budget,
opponent,
attack_duration, # maximum duration of an attack
attack_cooldown, # minimum duration between two consecutive attack
budget_per_timestep=0., action_space=None):
if action_space is not None:
if not isinstance(action_space, compute_budget.action_space):
Expand All @@ -48,7 +60,7 @@ def __init__(self, compute_budget, init_budget, opponent, attack_duration, attac
self._do_nothing = self.action_space()
self.previous_fails = False
self.budget_per_timestep = budget_per_timestep
self.attack_duration = attack_duration
self.attack_max_duration = attack_duration
self.attack_cooldown = attack_cooldown
self.current_attack_duration = 0
self.current_attack_cooldown = attack_cooldown
Expand Down Expand Up @@ -150,32 +162,42 @@ def attack(self, observation, agent_action, env_action):
self.current_attack_cooldown = max(0, self.current_attack_cooldown - 1)
attack_called = False

# If currently attacking
if self.current_attack_duration > 0:
# previous attack is not over
attack = self.last_attack

# If the opponent has already attacked today
elif self.current_attack_cooldown > self.attack_cooldown:
# minimum time between two consecutive attack not met
attack = None

# If the opponent can attack
else:
self.previous_fails = False
attack = self.opponent.attack(observation, agent_action, env_action, self.budget,
self.previous_fails)
attack_called = True
attack, duration = self.opponent.attack(observation, agent_action, env_action, self.budget,
self.previous_fails)
if duration is None:
if np.isfinite(self.attack_max_duration):
duration = self.attack_max_duration
else:
duration = 1

if duration > self.attack_max_duration:
# duration chosen by the opponent would exceed the maximum duration allowed
attack = None

# If the cost is too high
final_budget = self.budget # TODO add the: + self.budget_per_timestep * (self.attack_duration - 1)
# i did not do it in case an attack is ok at the beginning, ok at the end, but at some point in the attack
# process it is not (but i'm not sure this can happen, and don't have time to think about it right now)
if self.attack_duration * self.compute_budget(attack) > final_budget:
if duration * self.compute_budget(attack) > final_budget:
attack = None
self.previous_fails = True

# If we can afford the attack
elif attack is not None:
# even if it's "do nothing", it's sill an attack. To bad if the opponent chose to do nothing.
self.current_attack_duration = self.attack_duration
# even if it's "do nothing", it's sill an attack. Too bad if the opponent chose to do nothing.
self.current_attack_duration = duration
self.current_attack_cooldown += self.attack_cooldown

if not attack_called:
Expand Down
9 changes: 6 additions & 3 deletions grid2op/Opponent/RandomLineOpponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,23 @@ def attack(self, observation, agent_action, env_action,
-------
attack: :class:`grid2op.Action.Action`
The attack performed by the opponent. In this case, a do nothing, all the time.
duration: ``int``
The duration of the attack (if ``None`` then the attack will be made for the longest allowed time)
"""
# TODO maybe have a class "GymOpponent" where the observation would include the budget and all other
# TODO information, and forward something to the "act" method.

if observation is None: # during creation of the environment
return None # i choose not to attack in this case
return None, 0 # i choose not to attack in this case

# Status of attackable lines
status = observation.line_status[self._lines_ids]

# If all attackable lines are disconnected
if np.all(~status):
return None # i choose not to attack in this case
return None, 0 # i choose not to attack in this case

# Pick a line among the connected lines
attack = self.space_prng.choice(self._attacks[status])
return attack
return attack, None
12 changes: 8 additions & 4 deletions grid2op/Opponent/WeightedRandomOpponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,16 @@ def attack(self, observation, agent_action, env_action,
-------
attack: :class:`grid2op.Action.Action`
The attack performed by the opponent. In this case, a do nothing, all the time.
duration: ``int``
The duration of the attack
"""
# TODO maybe have a class "GymOpponent" where the observation would include the budget and all other
# TODO information, and forward something to the "act" method.

# During creation of the environment, do not attack
if observation is None:
return None
return None, 0

# Decide the time of the next attack
if self._next_attack_time is None:
Expand All @@ -150,16 +153,17 @@ def attack(self, observation, agent_action, env_action,

# If the attack time has not come yet, do not attack
if self._next_attack_time > 0:
return None
return None, 0

# If all attackable lines are disconnected, do not attack
status = observation.line_status[self._lines_ids]
if np.all(~status):
return None
return None, 0

available_attacks = self._attacks[status]
rho = observation.rho[self._lines_ids][status] / self._rho_normalization[status]
rho_sum = rho.sum()
if rho_sum <= 0.:
return None
attack = self.space_prng.choice(available_attacks, p=rho / rho_sum)
return attack
return attack, None
66 changes: 66 additions & 0 deletions grid2op/Reward/EpisodeDurationReward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.

import numpy as np
from grid2op.Reward.BaseReward import BaseReward
from grid2op.dtypes import dt_float


class EpisodeDurationReward(BaseReward):
"""
This reward will always be 0., unless at the end of an episode where it will return the number
of steps made by the agent divided by the total number of steps possible in the episode.
Examples
---------
You can use this reward in any environment with:
.. code-block:
import grid2op
from grid2op.Reward import EpisodeDurationReward
# then you create your environment with it:
NAME_OF_THE_ENVIRONMENT = "rte_case14_realistic"
env = grid2op.make(NAME_OF_THE_ENVIRONMENT,reward_class=EpisodeDurationReward)
# and do a step with a "do nothing" action
obs = env.reset()
obs, reward, done, info = env.step(env.action_space())
# the reward is computed with the EpisodeDurationReward class
Notes
-----
In case of an environment being "fast forward" (see :func:`grid2op.Environment.BaseEnv.fast_forward_chronics`)
the time "during" the fast forward are counted "as if" they were successful.
This means that if you "fast forward" up until the end of an episode, you are likely to receive a reward of 1.0
"""
def __init__(self, per_timestep=1):
BaseReward.__init__(self)
self.per_timestep = dt_float(per_timestep)
self.total_time_steps = dt_float(0.0)
self.reward_min = dt_float(0.)
self.reward_max = dt_float(1.)

def initialize(self, env):
if env.chronics_handler.max_timestep() > 0:
self.total_time_steps = env.chronics_handler.max_timestep() * self.per_timestep
else:
self.total_time_steps = np.inf
self.reward_max = np.inf

def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
if is_done:
res = env.nb_time_step
if np.isfinite(self.total_time_steps):
res /= self.total_time_steps
else:
res = self.reward_min
return res
3 changes: 1 addition & 2 deletions grid2op/Reward/IncreasingFlatReward.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class IncreasingFlatReward(BaseReward):
def __init__(self, per_timestep=1):
BaseReward.__init__(self)
self.per_timestep = dt_float(per_timestep)
self.total_reward = dt_float(0.0)
self.reward_min = dt_float(0.0)

def initialize(self, env):
Expand All @@ -49,7 +48,7 @@ def initialize(self, env):

def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
if not has_error:
res = dt_float(env._nb_time_step * self.per_timestep)
res = dt_float(env.nb_time_step * self.per_timestep)
else:
res = self.reward_min
return res
Loading

0 comments on commit f9ed970

Please sign in to comment.