-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Daniel Weber
committed
Apr 27, 2021
1 parent
9402dc8
commit a4e37bb
Showing
2 changed files
with
210 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import numpy as np | ||
from openmodelica_microgrid_gym.util import nested_map | ||
from typing import List | ||
|
||
|
||
class Reward: | ||
def __init__(self, nom, lim, v_DC, gamma, use_gamma_normalization=1): | ||
self._idx = None | ||
self.nom = nom | ||
self.lim = lim | ||
self.v_DC = v_DC | ||
self.use_gamma_normalization = use_gamma_normalization | ||
if self.use_gamma_normalization == 1: | ||
self.gamma = gamma | ||
else: | ||
self.gamma = 0 | ||
|
||
def set_idx(self, obs): | ||
if self._idx is None: | ||
self._idx = nested_map( | ||
lambda n: obs.index(n), | ||
[[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'], | ||
[f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012']]) | ||
|
||
def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float: | ||
""" | ||
Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of | ||
the used parameters. | ||
Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a | ||
logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using | ||
parameter mu. | ||
:param cols: list of variable names of the data | ||
:param data: observation data from the environment (ControlVariables, e.g. currents and voltages) | ||
:return: Error as negative reward | ||
""" | ||
self.set_idx(cols) | ||
idx = self._idx | ||
|
||
iabc_master = data[idx[0]] # 3 phase currents at LC inductors | ||
vabc_master = data[idx[2]] # 3 phase currents at LC inductors | ||
|
||
# set points (sp) | ||
isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates | ||
vsp_abc_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates | ||
|
||
# control error = mean-root-error (MRE) of reference minus measurement | ||
# (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides | ||
# better, i.e. more significant, gradients) | ||
# plus barrier penalty for violating the current constraint | ||
|
||
# error = ( # np.sum((np.abs((isp_abc_master - iabc_master)) / i_lim) ** 0.5, axis=0) | ||
# toDo: Es gibt kein iREF!!! Nur Grenze überprüfen | ||
# + -np.sum(mu_c * np.log(1 - np.maximum(np.abs(iabc_master) - i_nom, 0) / | ||
# (i_lim - i_nom)), axis=0) | ||
# np.sum((np.abs((vsp_abc_master - vabc_master)) / v_nom) ** 0.5, axis=0) \ | ||
# + -np.sum(mu_v * np.log(1 - np.maximum(np.abs(vabc_master) - v_nom, 0)/ \ | ||
# (v_lim - v_nom)), axis=0)\ | ||
# ) #/ max_episode_steps | ||
|
||
# difference_sp_nom = vsp_abc_master - vabc_master | ||
# error = np.sum( | ||
# np.minimum( | ||
# 1 - np.abs(difference_sp_nom) / np.abs( | ||
# vsp_abc_master + (v_nom * (np.sign(difference_sp_nom) + (difference_sp_nom == 0)))), | ||
# (v_nom - np.abs(vabc_master)) / (v_lim - v_nom)) | ||
# ) | ||
# error = error* 10 | ||
|
||
SP = vsp_abc_master * self.lim | ||
mess = vabc_master * self.lim | ||
|
||
if all(np.abs(mess) <= self.nom): | ||
""" | ||
1st area - inside wanted (nom) operation range | ||
-v_nom -> + v_nom | ||
rew = 1; if mess = SP | ||
rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area) | ||
""" | ||
|
||
rew = np.sum((1 - np.abs(SP - mess) / (2 * self.nom)) * 2 * (1 - self.gamma) / 3 + (1 - self.gamma) / 3) / 3 | ||
|
||
elif any(np.abs(vabc_master) > self.lim): | ||
""" | ||
3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS | ||
+-v_lim -> +-v_DC | ||
@ SP = +v_nom AND mess = -v_DC: | ||
rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured | ||
@ SP = -v_nom AND mess = -v_lim | ||
rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)] | ||
rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC)) | ||
The latter fraction is quite small but leads to depending on the system less then 2/3 is | ||
substracted and we have a gap to the 2nd area! :) | ||
""" | ||
|
||
rew = np.sum( | ||
(1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3 | ||
|
||
# if return -> rew = None and in env abort_reward is given to agent | ||
# return | ||
|
||
# elif any(np.abs(vabc_master) > v_DC): | ||
# rew = (1-gamma) | ||
|
||
else: | ||
""" | ||
2nd area | ||
+-v_nom -> +- v_lim | ||
@ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?) | ||
rew = 1/3 | ||
@ SP = v_nom AND mess = -v_lim | ||
rew = -1/3 | ||
""" | ||
rew = np.sum( | ||
(1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3 | ||
|
||
return rew # * (1-0.9) | ||
# return -np.clip(error.squeeze(), 0, 1e5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters