diff --git a/openrl/envs/vec_env/vec_info/simple_vec_info.py b/openrl/envs/vec_env/vec_info/simple_vec_info.py index ebe21ce4..fd7b0a1c 100644 --- a/openrl/envs/vec_env/vec_info/simple_vec_info.py +++ b/openrl/envs/vec_env/vec_info/simple_vec_info.py @@ -17,7 +17,7 @@ def __init__(self, parallel_env_num: int, agent_num: int): def statistics(self, buffer: Any) -> Dict[str, Any]: # this function should be called each episode - rewards = buffer.data.rewardsc.copy() + rewards = buffer.data.rewards.copy() self.total_step += np.prod(rewards.shape[:2]) rewards = rewards.transpose(2, 1, 0, 3) info_dict = {} diff --git a/openrl/rewards/base_reward.py b/openrl/rewards/base_reward.py index 7ad43fe0..50de647e 100644 --- a/openrl/rewards/base_reward.py +++ b/openrl/rewards/base_reward.py @@ -12,29 +12,14 @@ def __init__(self): def step_reward( self, data: Dict[str, Any] ) -> Union[np.ndarray, List[Dict[str, Any]]]: - rewards = data["reward"].copy() - infos = [] - - for rew_func in self.step_rew_funcs.values(): - new_rew, new_info = rew_func(data) - if len(infos) == 0: - infos = new_info - else: - for i in range(len(infos)): - infos[i].update(new_info[i]) - rewards += new_rew + + rewards = data["rewards"].copy() + infos = [dict() for _ in range(rewards.shape[0])] return rewards, infos def batch_rewards(self, buffer: Any) -> Dict[str, Any]: + infos = dict() - for rew_func in self.batch_rew_funcs.values(): - new_rew, new_info = rew_func() - if len(infos) == 0: - infos = new_info - else: - infos.update(new_info) - # update rewards, and infos here - - return dict() + return infos