Skip to content

Commit

Permalink
prepared hyperopt experiment ddpg
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Weber committed Apr 27, 2021
1 parent 9402dc8 commit a4e37bb
Show file tree
Hide file tree
Showing 2 changed files with 210 additions and 136 deletions.
121 changes: 121 additions & 0 deletions experiments/issue51_new/env/rewards.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import numpy as np
from openmodelica_microgrid_gym.util import nested_map
from typing import List


class Reward:
def __init__(self, nom, lim, v_DC, gamma, use_gamma_normalization=1):
self._idx = None
self.nom = nom
self.lim = lim
self.v_DC = v_DC
self.use_gamma_normalization = use_gamma_normalization
if self.use_gamma_normalization == 1:
self.gamma = gamma
else:
self.gamma = 0

def set_idx(self, obs):
if self._idx is None:
self._idx = nested_map(
lambda n: obs.index(n),
[[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'],
[f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012']])

def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
"""
Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of
the used parameters.
Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a
logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using
parameter mu.
:param cols: list of variable names of the data
:param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
:return: Error as negative reward
"""
self.set_idx(cols)
idx = self._idx

iabc_master = data[idx[0]] # 3 phase currents at LC inductors
vabc_master = data[idx[2]] # 3 phase currents at LC inductors

# set points (sp)
isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates
vsp_abc_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates

# control error = mean-root-error (MRE) of reference minus measurement
# (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
# better, i.e. more significant, gradients)
# plus barrier penalty for violating the current constraint

# error = ( # np.sum((np.abs((isp_abc_master - iabc_master)) / i_lim) ** 0.5, axis=0)
# toDo: Es gibt kein iREF!!! Nur Grenze überprüfen
# + -np.sum(mu_c * np.log(1 - np.maximum(np.abs(iabc_master) - i_nom, 0) /
# (i_lim - i_nom)), axis=0)
# np.sum((np.abs((vsp_abc_master - vabc_master)) / v_nom) ** 0.5, axis=0) \
# + -np.sum(mu_v * np.log(1 - np.maximum(np.abs(vabc_master) - v_nom, 0)/ \
# (v_lim - v_nom)), axis=0)\
# ) #/ max_episode_steps

# difference_sp_nom = vsp_abc_master - vabc_master
# error = np.sum(
# np.minimum(
# 1 - np.abs(difference_sp_nom) / np.abs(
# vsp_abc_master + (v_nom * (np.sign(difference_sp_nom) + (difference_sp_nom == 0)))),
# (v_nom - np.abs(vabc_master)) / (v_lim - v_nom))
# )
# error = error* 10

SP = vsp_abc_master * self.lim
mess = vabc_master * self.lim

if all(np.abs(mess) <= self.nom):
"""
1st area - inside wanted (nom) operation range
-v_nom -> + v_nom
rew = 1; if mess = SP
rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area)
"""

rew = np.sum((1 - np.abs(SP - mess) / (2 * self.nom)) * 2 * (1 - self.gamma) / 3 + (1 - self.gamma) / 3) / 3

elif any(np.abs(vabc_master) > self.lim):
"""
3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS
+-v_lim -> +-v_DC
@ SP = +v_nom AND mess = -v_DC:
rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured
@ SP = -v_nom AND mess = -v_lim
rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)]
rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC))
The latter fraction is quite small but leads to depending on the system less then 2/3 is
substracted and we have a gap to the 2nd area! :)
"""

rew = np.sum(
(1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3

# if return -> rew = None and in env abort_reward is given to agent
# return

# elif any(np.abs(vabc_master) > v_DC):
# rew = (1-gamma)

else:
"""
2nd area
+-v_nom -> +- v_lim
@ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?)
rew = 1/3
@ SP = v_nom AND mess = -v_lim
rew = -1/3
"""
rew = np.sum(
(1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3

return rew # * (1-0.9)
# return -np.clip(error.squeeze(), 0, 1e5)
225 changes: 89 additions & 136 deletions experiments/issue51_new/stable_baselinesDDPG_voltage_control.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from datetime import datetime
from functools import partial
from os import makedirs
from typing import List

import torch as th
import torch.nn as nn
Expand All @@ -15,6 +14,7 @@
from stable_baselines3.common.monitor import Monitor
from stochastic.processes import VasicekProcess

from experiments.issue51_new.env.rewards import Reward
from openmodelica_microgrid_gym.env import PlotTmpl
from openmodelica_microgrid_gym.net import Network
from openmodelica_microgrid_gym.util import nested_map, RandProcess
Expand Down Expand Up @@ -63,141 +63,94 @@ def load_step(t, gain):
return gen.sample(t)


class Reward:
def __init__(self):
self._idx = None

def set_idx(self, obs):
if self._idx is None:
self._idx = nested_map(
lambda n: obs.index(n),
[[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'],
[f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012']])

def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
"""
Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of
the used parameters.
Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a
logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using
parameter mu.
:param cols: list of variable names of the data
:param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
:return: Error as negative reward
"""
self.set_idx(cols)
idx = self._idx

iabc_master = data[idx[0]] # 3 phase currents at LC inductors
vabc_master = data[idx[2]] # 3 phase currents at LC inductors

# set points (sp)
isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates
vsp_abc_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates

# control error = mean-root-error (MRE) of reference minus measurement
# (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
# better, i.e. more significant, gradients)
# plus barrier penalty for violating the current constraint

error = ( # np.sum((np.abs((isp_abc_master - iabc_master)) / i_lim) ** 0.5, axis=0)
# toDo: Es gibt kein iREF!!! Nur Grenze überprüfen
# + -np.sum(mu_c * np.log(1 - np.maximum(np.abs(iabc_master) - i_nom, 0) /
# (i_lim - i_nom)), axis=0)
np.sum((np.abs((vsp_abc_master - vabc_master)) / v_nom) ** 0.5, axis=0) \
# + -np.sum(mu_v * np.log(1 - np.maximum(np.abs(vabc_master) - v_nom, 0) /
# (v_lim - v_nom)), axis=0)\
) # / max_episode_steps

return -np.clip(error.squeeze(), 0, 1e5)


def xylables_i(fig):
ax = fig.gca()
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
ax.grid(which='both')
fig.savefig(f'{timestamp}/Inductor_currents.pdf')
fig.show()


def xylables_v(fig):
ax = fig.gca()
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
ax.grid(which='both')
fig.savefig(f'{timestamp}/Capacitor_voltages.pdf')
fig.show()


def xylables_R(fig):
ax = fig.gca()
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
ax.grid(which='both')
ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
fig.savefig(f'{timestamp}/Load.pdf')
fig.show()


env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
reward_fun=Reward().rew_fun,
viz_cols=[
PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
callback=xylables_v,
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
style=[[None], ['--']]
),
PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
callback=xylables_i,
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
style=[[None], ['--']]
),
PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
callback=xylables_R,
color=[['b', 'r', 'g']],
style=[[None]]
)
],
viz_mode='episode',
max_episode_steps=max_episode_steps,
model_params={'lc.resistor1.R': R_filter,
'lc.resistor2.R': R_filter,
'lc.resistor3.R': R_filter,
'lc.resistor4.R': 0.0000001,
'lc.resistor5.R': 0.0000001,
'lc.resistor6.R': 0.0000001,
'lc.inductor1.L': L_filter,
'lc.inductor2.L': L_filter,
'lc.inductor3.L': L_filter,
'lc.capacitor1.C': C_filter,
'lc.capacitor2.C': C_filter,
'lc.capacitor3.C': C_filter,
'r_load.resistor1.R': partial(load_step, gain=R),
'r_load.resistor2.R': partial(load_step, gain=R),
'r_load.resistor3.R': partial(load_step, gain=R),
# 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_lim,
# high=v_lim) if t == 0 else None,
# 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_lim,
# high=v_lim) if t == 0 else None,
# 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_lim,
# high=v_lim) if t == 0 else None,
# 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_lim,
# high=i_lim) if t == 0 else None,
# 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_lim,
# high=i_lim) if t == 0 else None,
# 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_lim,
# high=i_lim) if t == 0 else None,
},
net=net,
model_path='../../omg_grid/grid.paper_loadstep.fmu',
on_episode_reset_callback=partial(gen.reset, initial=R)
)

with open(f'{timestamp}/env.txt', 'w') as f:
print(str(env), file=f)
env = Monitor(env)
def experiment_fit_DDPG(learning_rate, gamma, n_trail):
makedirs(folder_name + experiment_name + n_trail)

def xylables_i(fig):
ax = fig.gca()
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
ax.grid(which='both')
fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
fig.show()

def xylables_v(fig):
ax = fig.gca()
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
ax.grid(which='both')
fig.savefig(f'{folder_name + experiment_name + n_trail}/Capacitor_voltages{datetime.now()}.pdf')
fig.show()

def xylables_R(fig):
ax = fig.gca()
ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
ax.grid(which='both')
ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
fig.savefig(f'{folder_name + experiment_name + n_trail}/Load.pdf')
fig.show()

rew = Reward(v_nom, v_lim, v_DC, gamma)

env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
reward_fun=rew.rew_fun,
viz_cols=[
PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
callback=xylables_v,
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
style=[[None], ['--']]
),
PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
callback=xylables_i,
color=[['b', 'r', 'g'], ['b', 'r', 'g']],
style=[[None], ['--']]
),
# PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
# callback=xylables_R,
# color=[['b', 'r', 'g']],
# style=[[None]]
# )
],
viz_mode='episode',
max_episode_steps=max_episode_steps,
model_params={'lc.resistor1.R': R_filter,
'lc.resistor2.R': R_filter,
'lc.resistor3.R': R_filter,
'lc.resistor4.R': 0.0000001,
'lc.resistor5.R': 0.0000001,
'lc.resistor6.R': 0.0000001,
'lc.inductor1.L': L_filter,
'lc.inductor2.L': L_filter,
'lc.inductor3.L': L_filter,
'lc.capacitor1.C': C_filter,
'lc.capacitor2.C': C_filter,
'lc.capacitor3.C': C_filter,
'r_load.resistor1.R': partial(load_step, gain=R),
'r_load.resistor2.R': partial(load_step, gain=R),
'r_load.resistor3.R': partial(load_step, gain=R),
# 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_lim,
# high=v_lim) if t == 0 else None,
# 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_lim,
# high=v_lim) if t == 0 else None,
# 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_lim,
# high=v_lim) if t == 0 else None,
# 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_lim,
# high=i_lim) if t == 0 else None,
# 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_lim,
# high=i_lim) if t == 0 else None,
# 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_lim,
# high=i_lim) if t == 0 else None,
},
net=net,
model_path='../../omg_grid/grid.paper_loadstep.fmu',
on_episode_reset_callback=partial(gen.reset, initial=R),
is_normalized=True
)

with open(f'{folder_name + experiment_name + n_trail}/env.txt', 'w') as f:
print(str(env), file=f)
env = Monitor(env)


class RecordEnvCallback(BaseCallback):
Expand Down

0 comments on commit a4e37bb

Please sign in to comment.