Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v(3.3.4) - NormalizeObservation calibration load improvement #413

Merged
merged 5 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/source/pages/wrappers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ However, *Sinergym* enhances its functionality with some additional features:
- It provides access to the means and variations used for normalization calibration, addressing the low-level
issues found in the original wrapper.

- Similarly, these calibration values can be set via a method. Refer to the :ref:`API reference` for more information.
- Similarly, these calibration values can be set via a method or in the constructor.
These values can be specified neither in list/numpy array format or writing the txt path
previously generated. Refer to the :ref:`API reference` for more information.

- The automatic calibration can be enabled or disabled as you interact with the environment, allowing the
calibration to remain static instead of adaptive.
Expand Down
2 changes: 1 addition & 1 deletion examples/drl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80137,7 +80137,7 @@
"source": [
"We'll create the Gym environment, but it's **important to wrap the environment with the same wrappers used during training**. We can use the evaluation experiment name to rename the environment.\n",
"\n",
"**Note**: If you are loading a pre-trained model and using the observation space normalization wrapper, you should use the means and variations calibrated during the training process for a fair evaluation. The next code specifies this aspect, those mean and var values are written in Sinergym training output as txt file automatically if you want to consult it later. It is also important to deactivate calibration update during evaluations. Check the documentation on the wrapper for more information."
"**Note**: If you are loading a pre-trained model and using the observation space normalization wrapper, you should use the means and variations calibrated during the training process for a fair evaluation. The next code specifies this aspect, those mean and var values are written in Sinergym training output as txt file automatically if you want to consult it later. You can use the list/numpy array values or set the txt path directly in the field constructor. It is also important to deactivate calibration update during evaluations. Check the documentation on the wrapper for more information."
]
},
{
Expand Down
62 changes: 40 additions & 22 deletions sinergym/utils/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,21 @@ def __init__(self,
env: EplusEnv,
automatic_update: bool = True,
epsilon: float = 1e-8,
mean: Union[list, np.float64] = None,
var: Union[list, np.float64] = None):
mean: Union[list, np.float64, str] = None,
var: Union[list, np.float64, str] = None):
"""Initializes the NormalizationWrapper. Mean and var values can be None andbeing updated during interaction with environment.

Args:
env (EplusEnv): The environment to apply the wrapper.
automatic_update (bool, optional): Whether or not to update the mean and variance values automatically. Defaults to True.
epsilon (float, optional): A stability parameter used when scaling the observations. Defaults to 1e-8.
mean (np.float64, optional): The mean value used for normalization. Defaults to None.
var (np.float64, optional): The variance value used for normalization. Defaults to None.
mean (list, np.float64, str, optional): The mean value used for normalization. It can be a mean.txt path too. Defaults to None.
var (list, np.float64, str, optional): The variance value used for normalization. It can be a var.txt path too. Defaults to None.
"""
# Check mean and var format if it is defined
mean = np.float64(mean) if mean is not None else None
var = np.float64(var) if var is not None else None
mean = self._check_and_update_metric(mean, 'mean')
var = self._check_and_update_metric(var, 'var')

# Save normalization configuration for whole python process
gym.utils.RecordConstructorArgs.__init__(
self, epsilon=epsilon, mean=mean, var=var)
Expand Down Expand Up @@ -126,7 +127,34 @@ def close(self):
# Update normalization calibration if it is required
self._save_normalization_calibration()

# ----------------------- Wrappers extra functionality ----------------------- #
# ----------------------- Wrapper extra functionality ----------------------- #

def _check_and_update_metric(self, metric, metric_name):
if metric is not None:
# Check type and conversions
if isinstance(metric, str):
try:
metric = np.loadtxt(metric)
except FileNotFoundError as err:
self.logger.error(
'{}.txt file not found. Please, check the path.'.format(metric_name))
raise err
elif isinstance(metric, list) or isinstance(metric, np.ndarray):
metric = np.float64(metric)
else:
self.logger.error(
'{} values must be a list, a numpy array or a path to a txt file.'.format(metric_name))
raise ValueError

# Check dimension of mean and var
try:
assert len(metric) == self.observation_space.shape[0]
except AssertionError as err:
self.logger.error(
'{} values must have the same shape than environment observation space.'.format(metric_name))
raise err

return metric

def _save_normalization_calibration(self):
"""Saves the normalization calibration data in the output folder as txt files.
Expand Down Expand Up @@ -171,24 +199,14 @@ def var(self) -> Optional[np.float64]:
else:
return None

def set_mean(self, mean: np.float64):
def set_mean(self, mean: Union[list, np.float64, str]):
"""Sets the mean value of the observations."""
try:
assert len(mean) == self.observation_space.shape[0]
except AssertionError as err:
self.logger.error(
'Mean values must have the same shape than environment observation space.')
raise err
mean = self._check_and_update_metric(mean, 'mean')
self.obs_rms.mean = mean

def set_var(self, var: np.float64):
def set_var(self, var: Union[list, np.float64, str]):
"""Sets the variance value of the observations."""
try:
assert len(var) == self.observation_space.shape[0]
except AssertionError as err:
self.logger.error(
'Variance values must have the same shape than environment observation space.')
raise err
var = self._check_and_update_metric(var, 'var')
self.obs_rms.var = var

def normalize(self, obs):
Expand Down Expand Up @@ -1056,7 +1074,7 @@ def reset(self,
class OfficeGridStorageSmoothingActionConstraintsWrapper(
gym.ActionWrapper): # pragma: no cover
def __init__(self, env):
assert env.building_path.split(
assert env.get_wrapper_attr('building_path').split(
'/')[-1] == 'OfficeGridStorageSmoothing.epJSON', 'OfficeGridStorageSmoothingActionConstraintsWrapper: This wrapper is not valid for this environment.'
super().__init__(env)

Expand Down
2 changes: 1 addition & 1 deletion sinergym/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.3.3
3.3.4
29 changes: 16 additions & 13 deletions tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,32 @@
def test_reset(env_name, request):
env = request.getfixturevalue(env_name)
# Check state before reset
assert env.episode == 0
assert env.energyplus_simulator.energyplus_state is None
assert env.get_wrapper_attr('episode') == 0
assert env.get_wrapper_attr(
'energyplus_simulator').energyplus_state is None
obs, info = env.reset()
# Check after reset
assert env.episode == 1
assert env.energyplus_simulator.energyplus_state is not None
assert len(obs) == len(env.time_variables) + len(env.variables) + \
len(env.meters) # year, month, day and hour
assert env.get_wrapper_attr('episode') == 1
assert env.get_wrapper_attr(
'energyplus_simulator').energyplus_state is not None
assert len(obs) == len(env.get_wrapper_attr('time_variables')) + len(env.get_wrapper_attr(
'variables')) + len(env.get_wrapper_attr('meters')) # year, month, day and hour
assert isinstance(info, dict)
assert len(info) > 0
# default_options check
if 'stochastic' not in env_name:
assert not env.default_options.get('weather_variability', False)
assert not env.get_wrapper_attr('default_options').get(
'weather_variability', False)
else:
assert isinstance(env.default_options['weather_variability'], tuple)
assert isinstance(env.get_wrapper_attr('default_options')[
'weather_variability'], tuple)


def test_reset_custom_options(env_5zone_stochastic):
assert isinstance(
env_5zone_stochastic.default_options['weather_variability'],
tuple)
assert len(
env_5zone_stochastic.default_options['weather_variability']) == 3
assert isinstance(env_5zone_stochastic.get_wrapper_attr(
'default_options')['weather_variability'], tuple)
assert len(env_5zone_stochastic.get_wrapper_attr(
'default_options')['weather_variability']) == 3
custom_options = {'weather_variability': (1.1, 0.1, 0.002)}
env_5zone_stochastic.reset(options=custom_options)
# Check if epw with new variation is overwriting default options
Expand Down
63 changes: 33 additions & 30 deletions tests/test_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@ def test_normalization_wrapper(env_name, request):
assert hasattr(env, 'unwrapped_observation')

# Check initial values of that attributes
assert env.unwrapped_observation is None
assert env.get_wrapper_attr('unwrapped_observation') is None

# Initialize env
obs, _ = env.reset()

# Check observation normalization
# ...
# Check original observation recording
assert env.unwrapped_observation is not None
assert env.get_wrapper_attr('unwrapped_observation') is not None

# Simulation random step
a = env.action_space.sample()
obs, _, _, _, _ = env.step(a)

# ...
assert env.unwrapped_observation is not None
assert env.get_wrapper_attr('unwrapped_observation') is not None


@pytest.mark.parametrize('env_name',
Expand All @@ -50,7 +50,7 @@ def test_multiobjective_wrapper(env_name, request):
action = env.action_space.sample()
_, reward, _, _, info = env.step(action)
assert isinstance(reward, list)
assert len(reward) == len(env.reward_terms)
assert len(reward) == len(env.get_wrapper_attr('reward_terms'))


@pytest.mark.parametrize('env_name',
Expand Down Expand Up @@ -91,14 +91,14 @@ def test_previous_observation_wrapper(env_name, request):
var for var in env.observation_variables if '_previous' in var]

# Check previous observation stored has the correct len and initial values
assert len(env.previous_observation) == 3
assert len(env.get_wrapper_attr('previous_observation')) == 3
assert len(previous_variable_names) == len(
env.previous_observation)
env.get_wrapper_attr('previous_observation'))
# Check reset and np.zeros is added in obs as previous variables
assert (env.previous_observation == 0.0).all()
assert (env.get_wrapper_attr('previous_observation') == 0.0).all()
obs1, _ = env.reset()
original_obs1 = []
for variable in env.previous_variables:
for variable in env.get_wrapper_attr('previous_variables'):
original_obs1.append(
obs1[env.observation_variables.index(variable)])

Expand All @@ -108,7 +108,7 @@ def test_previous_observation_wrapper(env_name, request):

# Original obs1 values should be previous variables for obs 2
assert np.array_equal(
original_obs1, obs2[-len(env.previous_variables):])
original_obs1, obs2[-len(env.get_wrapper_attr('previous_variables')):])


def test_incremental_wrapper(env_wrapper_incremental):
Expand Down Expand Up @@ -140,37 +140,38 @@ def test_discrete_incremental_wrapper(env_name, request):

env = request.getfixturevalue(env_name)
# Check initial setpoints values is initialized
assert len(env.current_setpoints) > 0
assert len(env.get_wrapper_attr('current_setpoints')) > 0
# Check if action selected is applied correctly
env.reset()
action = 16
_, _, _, _, info = env.step(action)
assert (env.current_setpoints == info['action']).all()
assert (env.get_wrapper_attr('current_setpoints') == info['action']).all()
# Check environment clip actions(
for i in range(10):
env.step(2) # [1,0]
assert env.unwrapped.action_space.contains(list(env.current_setpoints))
assert env.unwrapped.action_space.contains(
list(env.get_wrapper_attr('current_setpoints')))


def test_discretize_wrapper(env_wrapper_discretize):

env = env_wrapper_discretize
# Check is a discrete env and original env is continuous
# Wrapped env
assert env.is_discrete
assert env.get_wrapper_attr('is_discrete')
assert env.action_space.n == 10
assert isinstance(env.action_mapping(0), list)
# Original continuos env
original_env = env.env
assert not original_env.is_discrete
assert not original_env.get_wrapper_attr('is_discrete')
assert not hasattr(original_env, 'action_mapping')


def test_normalize_observation_wrapper(env_wrapper_normalization):

# Spaces
env = env_wrapper_normalization
assert not env.is_discrete
assert not env.get_wrapper_attr('is_discrete')
assert hasattr(env, 'unwrapped_observation')

# Normalization calibration
Expand Down Expand Up @@ -224,15 +225,16 @@ def test_normalize_observation_wrapper(env_wrapper_normalization):
def test_normalize_action_wrapper(env_normalize_action_wrapper):

env = env_normalize_action_wrapper
assert not env.is_discrete
assert not env.get_wrapper_attr('is_discrete')
assert hasattr(env, 'real_space')
assert hasattr(env, 'normalized_space')
assert env.normalized_space != env.real_space
assert env.normalized_space == env.action_space
assert env.real_space == env.unwrapped.action_space
assert env.get_wrapper_attr(
'normalized_space') != env.get_wrapper_attr('real_space')
assert env.get_wrapper_attr('normalized_space') == env.action_space
assert env.get_wrapper_attr('real_space') == env.unwrapped.action_space
env.reset()
action = env.action_space.sample()
assert env.normalized_space.contains(action)
assert env.get_wrapper_attr('normalized_space').contains(action)
_, _, _, _, info = env.step(action)
assert env.unwrapped.action_space.contains(info['action'])

Expand All @@ -254,47 +256,48 @@ def test_multiobs_wrapper(env_name, request):
'history')

# Check history
assert env.history == deque([])
assert env.get_wrapper_attr('history') == deque([])

# Check observation space transformation
original_shape = env.env.observation_space.shape[0]
wrapped_shape = env.observation_space.shape[0]
assert wrapped_shape == original_shape * env.n
assert wrapped_shape == original_shape * env.get_wrapper_attr('n')

# Check reset obs
obs, _ = env.reset()
assert len(obs) == wrapped_shape
for i in range(env.n - 1):
for i in range(env.get_wrapper_attr('n') - 1):
# Check store same observation n times
assert (obs[original_shape * i:original_shape *
(i + 1)] == obs[0:original_shape]).all()
# Check history save same observation n times
assert (env.history[i] ==
env.history[i + 1]).all()
assert (env.get_wrapper_attr('history')[i] ==
env.get_wrapper_attr('history')[i + 1]).all()

# Check step obs
a = env.action_space.sample()
obs, _, _, _, _ = env.step(a)

# Last observation must be different of the rest of them
assert (obs[original_shape * (env.n - 1):]
assert (obs[original_shape * (env.get_wrapper_attr('n') - 1):]
!= obs[0:original_shape]).any()
assert (env.history[0] !=
env.history[-1]).any()
assert (env.get_wrapper_attr('history')[0] !=
env.get_wrapper_attr('history')[-1]).any()


@ pytest.mark.parametrize('env_name',
[('env_wrapper_logger'), ('env_all_wrappers'), ])
def test_logger_wrapper(env_name, request):

env = request.getfixturevalue(env_name)
logger = env.file_logger
logger = env.get_wrapper_attr('file_logger')
env.reset()

# Check CSV's have been created and linked in simulator correctly
assert logger.log_progress_file == env.get_wrapper_attr(
'workspace_path') + '/progress.csv'
assert logger.log_file == env.episode_path + '/monitor.csv'
assert logger.log_file == env.get_wrapper_attr(
'episode_path') + '/monitor.csv'

tmp_log_file = logger.log_file

Expand Down
Loading