Hotfix 0.3.1b (#637)

* [Fix] Use the stored agent info instead of the previous agent info when bootstraping the value * [Bug Fix] Addressed #643 * [Added Line Break]
Unity-Technologies · Apr 19, 2018 · 13beeac · 13beeac
1 parent 5165e88
commit 13beeac
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 6 deletions.
diff --git a/python/unitytrainers/bc/trainer.py b/python/unitytrainers/bc/trainer.py
@@ -185,8 +185,9 @@ def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take
             else:
                 idx = stored_info_teacher.agents.index(agent_id)
                 next_idx = next_info_teacher.agents.index(agent_id)
-                if info_teacher.text_observations[idx] != "":
-                    info_teacher_record, info_teacher_reset = info_teacher.text_observations[idx].lower().split(",")
+                if stored_info_teacher.text_observations[idx] != "":
+                    info_teacher_record, info_teacher_reset = \
+                        stored_info_teacher.text_observations[idx].lower().split(",")
                     next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
                         lower().split(",")
                     if next_info_teacher_reset == "true":

diff --git a/python/unitytrainers/ppo/trainer.py b/python/unitytrainers/ppo/trainer.py
@@ -269,18 +269,20 @@ def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo
         """
 
         info = new_info[self.brain_name]
-        last_info = current_info[self.brain_name]
         for l in range(len(info.agents)):
             agent_actions = self.training_buffer[info.agents[l]]['actions']
             if ((info.local_done[l] or len(agent_actions) > self.trainer_parameters['time_horizon'])
                 and len(agent_actions) > 0):
+                agent_id = info.agents[l]
                 if info.local_done[l] and not info.max_reached[l]:
                     value_next = 0.0
                 else:
                     if info.max_reached[l]:
-                        bootstrapping_info = last_info
+                        bootstrapping_info = self.training_buffer[agent_id].last_brain_info
+                        idx = bootstrapping_info.agents.index(agent_id)
                     else:
                         bootstrapping_info = info
+                        idx = l
                     feed_dict = {self.model.batch_size: len(bootstrapping_info.vector_observations), self.model.sequence_length: 1}
                     if self.use_observations:
                         for i in range(len(bootstrapping_info.visual_observations)):
@@ -293,8 +295,7 @@ def process_experiences(self, current_info: AllBrainInfo, new_info: AllBrainInfo
                         feed_dict[self.model.memory_in] = bootstrapping_info.memories
                     if not self.is_continuous_action and self.use_recurrent:
                         feed_dict[self.model.prev_action] = np.reshape(bootstrapping_info.previous_vector_actions, [-1])
-                    value_next = self.sess.run(self.model.value, feed_dict)[l]
-                agent_id = info.agents[l]
+                    value_next = self.sess.run(self.model.value, feed_dict)[idx]
 
                 self.training_buffer[agent_id]['advantages'].set(
                     get_gae(