diff --git a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py index 56130367ea..7ce18ac1bc 100644 --- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py +++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py @@ -88,7 +88,7 @@ def _evaluate_by_sequence( # For the first sequence, the initial memory should be the one at the # beginning of this trajectory. for _ in range(first_seq_len): - all_next_memories.append(initial_memory.squeeze().detach().numpy()) + all_next_memories.append(ModelUtils.to_numpy(initial_memory.squeeze())) init_values, _mem = self.critic.critic_pass( seq_obs, initial_memory, sequence_length=first_seq_len @@ -105,7 +105,7 @@ def _evaluate_by_sequence( ): seq_obs = [] for _ in range(self.policy.sequence_length): - all_next_memories.append(_mem.squeeze().detach().numpy()) + all_next_memories.append(ModelUtils.to_numpy(_mem.squeeze())) for _obs in tensor_obs: start = seq_num * self.policy.sequence_length - ( self.policy.sequence_length - leftover