Skip to content

Commit

Permalink
Hotfix 0.3.1b (#637)
Browse files Browse the repository at this point in the history
* [Fix] Use the stored agent info instead of the previous agent info when bootstraping the value

* [Bug Fix] Addressed #643

* [Added Line Break]
  • Loading branch information
vincentpierre authored Apr 19, 2018
1 parent 5165e88 commit 13beeac
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
5 changes: 3 additions & 2 deletions python/unitytrainers/bc/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,9 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take
else:
idx = stored_info_teacher.agents.index(agent_id)
next_idx = next_info_teacher.agents.index(agent_id)
if info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = info_teacher.text_observations[idx].lower().split(",")
if stored_info_teacher.text_observations[idx] != "":
info_teacher_record, info_teacher_reset = \
stored_info_teacher.text_observations[idx].lower().split(",")
next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
lower().split(",")
if next_info_teacher_reset == "true":
Expand Down
9 changes: 5 additions & 4 deletions python/unitytrainers/ppo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,18 +269,20 @@ def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo
"""

info = new_info[self.brain_name]
last_info = current_info[self.brain_name]
for l in range(len(info.agents)):
agent_actions = self.training_buffer[info.agents[l]]['actions']
if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
and len(agent_actions) > 0):
agent_id = info.agents[l]
if info.local_done[l] and not info.max_reached[l]:
value_next = 0.0
else:
if info.max_reached[l]:
bootstrapping_info = last_info
bootstrapping_info = self.training_buffer[agent_id].last_brain_info
idx = bootstrapping_info.agents.index(agent_id)
else:
bootstrapping_info = info
idx = l
feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
if self.use_observations:
for i in range(len(bootstrapping_info.visual_observations)):
Expand All @@ -293,8 +295,7 @@ def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo
feed_dict[self.model.memory_in] = bootstrapping_info.memories
if not self.is_continuous_action and self.use_recurrent:
feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
value_next = self.sess.run(self.model.value, feed_dict)[l]
agent_id = info.agents[l]
value_next = self.sess.run(self.model.value, feed_dict)[idx]

self.training_buffer[agent_id]['advantages'].set(
get_gae(
Expand Down

0 comments on commit 13beeac

Please sign in to comment.