Passing policy_state correctly into policy.action() function when com…

…puting Q values. PiperOrigin-RevId: 402601593 Change-Id: Ibb58d385835ccf9a1062be8a7a281a7f4dd92bdc
tensorflow · Oct 12, 2021 · 2dc591b · 2dc591b
1 parent e769fec
commit 2dc591b
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/tf_agents/agents/qtopt/qtopt_agent.py b/tf_agents/agents/qtopt/qtopt_agent.py
@@ -684,7 +684,8 @@ def _compute_next_q_values(self, next_time_steps, info, network_state=()):
  if not self._in_graph_bellman_update:
  return info['target_q']
 
- next_action_policy_step = self._policy.action(next_time_steps)
+ next_action_policy_step = self._policy.action(
+ next_time_steps, network_state)
 
  if self._enable_td3:
  q_values_target_delayed, _ = self._target_q_network_delayed(