-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make the Agent reset immediately after Done #3291
Changes from 5 commits
e1385f7
043819a
90cfbb2
43a1185
adcffb0
a930143
de0303c
98dc22b
25b7483
b88f7ae
b094180
aecb9e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,7 @@ public struct AgentInfo | |
/// Unique identifier each agent receives at initialization. It is used | ||
/// to separate between different agents in the environment. | ||
/// </summary> | ||
public int id; | ||
public int episodeId; | ||
} | ||
|
||
/// <summary> | ||
|
@@ -148,13 +148,6 @@ public abstract class Agent : MonoBehaviour | |
/// Whether or not the agent requests a decision. | ||
bool m_RequestDecision; | ||
|
||
/// Whether or not the agent has completed the episode. This may be due | ||
/// to either reaching a success or fail state, or reaching the maximum | ||
/// number of steps (i.e. timing out). | ||
bool m_Done; | ||
|
||
/// Whether or not the agent reached the maximum number of steps. | ||
bool m_MaxStepReached; | ||
|
||
/// Keeps track of the number of steps taken by the agent in this episode. | ||
/// Note that this value is different for each agent, and may not overlap | ||
|
@@ -164,7 +157,7 @@ public abstract class Agent : MonoBehaviour | |
|
||
/// Unique identifier each agent receives at initialization. It is used | ||
/// to separate between different agents in the environment. | ||
int m_Id; | ||
int m_EpisodeId; | ||
vincentpierre marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/// Keeps track of the actions that are masked at each step. | ||
ActionMasker m_ActionMasker; | ||
|
@@ -190,7 +183,7 @@ public abstract class Agent : MonoBehaviour | |
/// becomes enabled or active. | ||
void OnEnable() | ||
{ | ||
m_Id = gameObject.GetInstanceID(); | ||
m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); | ||
OnEnableHelper(); | ||
|
||
m_Recorder = GetComponent<DemonstrationRecorder>(); | ||
|
@@ -204,7 +197,6 @@ void OnEnableHelper() | |
m_Action = new AgentAction(); | ||
sensors = new List<ISensor>(); | ||
|
||
Academy.Instance.AgentResetIfDone += ResetIfDone; | ||
Academy.Instance.AgentSendState += SendInfo; | ||
Academy.Instance.DecideAction += DecideAction; | ||
Academy.Instance.AgentAct += AgentStep; | ||
|
@@ -224,7 +216,6 @@ void OnDisable() | |
// We don't want to even try, because this will lazily create a new Academy! | ||
if (Academy.IsInitialized) | ||
{ | ||
Academy.Instance.AgentResetIfDone -= ResetIfDone; | ||
Academy.Instance.AgentSendState -= SendInfo; | ||
Academy.Instance.DecideAction -= DecideAction; | ||
Academy.Instance.AgentAct -= AgentStep; | ||
|
@@ -234,12 +225,14 @@ void OnDisable() | |
m_Brain?.Dispose(); | ||
} | ||
|
||
void NotifyAgentDone() | ||
void NotifyAgentDone(bool maxStepReached = false) | ||
{ | ||
m_Info.done = true; | ||
m_Info.maxStepReached = maxStepReached; | ||
// Request the last decision with no callbacks | ||
// We request a decision so Python knows the Agent is disabled | ||
m_Brain?.RequestDecision(m_Info, sensors, (a) => { }); | ||
m_EpisodeId = EpisodeIdCounter.GetEpisodeId(); | ||
vincentpierre marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/// <summary> | ||
|
@@ -322,7 +315,12 @@ public float GetCumulativeReward() | |
/// </summary> | ||
public void Done() | ||
{ | ||
m_Done = true; | ||
NotifyAgentDone(); | ||
_AgentReset(); | ||
m_RequestAction = false; | ||
m_RequestDecision = false; | ||
m_Reward = 0f; | ||
m_CumulativeReward = 0f; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels like this could be moved into NotifyAgentDone() (or maybe combine Done and NotifyAgentDone, unless you don't want to the user to set maxStepReached) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved some things around |
||
} | ||
|
||
/// <summary> | ||
|
@@ -342,28 +340,6 @@ public void RequestAction() | |
m_RequestAction = true; | ||
} | ||
|
||
/// <summary> | ||
/// Indicates if the agent has reached his maximum number of steps. | ||
/// </summary> | ||
/// <returns> | ||
/// <c>true</c>, if max step reached was reached, <c>false</c> otherwise. | ||
/// </returns> | ||
public bool IsMaxStepReached() | ||
{ | ||
return m_MaxStepReached; | ||
} | ||
|
||
/// <summary> | ||
/// Indicates if the agent is done | ||
/// </summary> | ||
/// <returns> | ||
/// <c>true</c>, if the agent is done, <c>false</c> otherwise. | ||
/// </returns> | ||
public bool IsDone() | ||
{ | ||
return m_Done; | ||
} | ||
|
||
/// Helper function that resets all the data structures associated with | ||
/// the agent. Typically used when the agent is being initialized or reset | ||
/// at the end of an episode. | ||
|
@@ -489,9 +465,9 @@ void SendInfoToBrain() | |
m_Info.actionMasks = m_ActionMasker.GetMask(); | ||
|
||
m_Info.reward = m_Reward; | ||
m_Info.done = m_Done; | ||
m_Info.maxStepReached = m_MaxStepReached; | ||
m_Info.id = m_Id; | ||
m_Info.done = false; | ||
m_Info.maxStepReached = false; | ||
m_Info.episodeId = m_EpisodeId; | ||
|
||
m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction); | ||
|
||
|
@@ -742,51 +718,41 @@ protected float ScaleAction(float rawAction, float min, float max) | |
} | ||
|
||
|
||
/// Signals the agent that it must reset if its done flag is set to true. | ||
void ResetIfDone() | ||
{ | ||
if (m_Done) | ||
{ | ||
_AgentReset(); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Signals the agent that it must sent its decision to the brain. | ||
/// </summary> | ||
void SendInfo() | ||
{ | ||
// If the Agent is done, it has just reset and thus requires a new decision | ||
if (m_RequestDecision || m_Done) | ||
if (m_RequestDecision) | ||
{ | ||
SendInfoToBrain(); | ||
m_Reward = 0f; | ||
if (m_Done) | ||
{ | ||
m_CumulativeReward = 0f; | ||
} | ||
m_Done = false; | ||
m_MaxStepReached = false; | ||
m_RequestDecision = false; | ||
} | ||
} | ||
|
||
/// Used by the brain to make the agent perform a step. | ||
void AgentStep() | ||
{ | ||
if ((m_RequestAction) && (m_Brain != null)) | ||
if ((m_StepCount >= maxStep - 1) && (maxStep > 0)) | ||
{ | ||
NotifyAgentDone(true); | ||
_AgentReset(); | ||
m_RequestAction = false; | ||
AgentAction(m_Action.vectorActions); | ||
m_RequestDecision = false; | ||
m_Reward = 0f; | ||
m_CumulativeReward = 0f; | ||
} | ||
|
||
if ((m_StepCount >= maxStep) && (maxStep > 0)) | ||
else | ||
{ | ||
m_MaxStepReached = true; | ||
Done(); | ||
m_StepCount += 1; | ||
} | ||
if ((m_RequestAction) && (m_Brain != null)) | ||
{ | ||
m_RequestAction = false; | ||
AgentAction(m_Action.vectorActions); | ||
} | ||
|
||
m_StepCount += 1; | ||
} | ||
|
||
void DecideAction() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
namespace MLAgents | ||
{ | ||
public static class EpisodeIdCounter | ||
{ | ||
private static int Counter; | ||
public static int GetEpisodeId() | ||
{ | ||
return Counter++; | ||
} | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update this comment too.