-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MultiAgentGroup Interface #4923
Changes from 32 commits
6d2be2c
09590ad
c982c06
7e3d976
c40fec0
ffb3f0b
f87cfbd
8b8e916
6b71f5a
87e97dd
d3d1dc1
2ba09ca
a22c621
2dc90a9
70207a3
49282f6
204b45b
7eacfba
016ffd8
8b9d662
3fb14b9
4e4ecad
492fd17
78e052b
81d8389
ad4a821
9725aa5
cbfdfb3
6badfb5
ef67f53
8e78dbd
31ee1c4
1e4c837
d29a770
146f34e
2953003
02ac8e2
e469f6c
727ef88
e026eca
c802129
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
using System; | ||
using System.Linq; | ||
using System.Collections.Generic; | ||
|
||
namespace Unity.MLAgents.Extensions.MultiAgent | ||
{ | ||
/// <summary> | ||
/// A base class implementation of MultiAgentGroup. | ||
/// </summary> | ||
public class BaseMultiAgentGroup : IMultiAgentGroup, IDisposable | ||
{ | ||
int m_StepCount; | ||
int m_GroupMaxStep; | ||
readonly int m_Id = MultiAgentGroupIdCounter.GetGroupId(); | ||
HashSet<Agent> m_Agents = new HashSet<Agent>(); | ||
|
||
|
||
public void Dispose() | ||
{ | ||
while (m_Agents.Count > 0) | ||
{ | ||
UnregisterAgent(m_Agents.First()); | ||
} | ||
} | ||
|
||
/// <inheritdoc /> | ||
public virtual void RegisterAgent(Agent agent) | ||
{ | ||
if (!m_Agents.Contains(agent)) | ||
{ | ||
agent.SetMultiAgentGroup(this); | ||
m_Agents.Add(agent); | ||
agent.UnregisterFromGroup += UnregisterAgent; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of relying on another callback, is there a way to simply check if the Agent is still alive and remove it from the list if it is not ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we would like the group to constantly check on agents instead of agent triggering it, it will probably needs to be a monobehaviour to do this check in every FixedUpdate (or passively check it when any method is called which sounds buggy). Also this doesn't work if the agent is still alive but assigned to another group. |
||
} | ||
} | ||
|
||
/// <inheritdoc /> | ||
public virtual void UnregisterAgent(Agent agent) | ||
{ | ||
if (m_Agents.Contains(agent)) | ||
{ | ||
m_Agents.Remove(agent); | ||
agent.UnregisterFromGroup -= UnregisterAgent; | ||
} | ||
} | ||
|
||
/// <inheritdoc /> | ||
public int GetId() | ||
{ | ||
return m_Id; | ||
} | ||
|
||
/// <summary> | ||
/// Get list of all agents currently registered to this MultiAgentGroup. | ||
/// </summary> | ||
/// <returns> | ||
/// List of agents registered to the MultiAgentGroup. | ||
/// </returns> | ||
public HashSet<Agent> GetRegisteredAgents() | ||
{ | ||
return m_Agents; | ||
} | ||
|
||
/// <summary> | ||
/// Increments the group rewards for all agents in this MultiAgentGroup. | ||
/// </summary> | ||
/// <remarks> | ||
/// This function increases or decreases the group rewards by a given amount for all agents | ||
/// in the group. Use <see cref="SetGroupReward(float)"/> to set the group reward assigned | ||
/// to the current step with a specific value rather than increasing or decreasing it. | ||
/// | ||
/// A positive group reward indicates the whole group's accomplishments or desired behaviors. | ||
/// Every agent in the group will receive the same group reward no matter whether the | ||
/// agent's act directly leads to the reward. Group rewards are meant to reinforce agents | ||
/// to act in the group's best interest instead of individual ones. | ||
/// Group rewards are treated differently than individual agent rewards during training, so | ||
/// calling AddGroupReward() is not equivalent to calling agent.AddReward() on each agent in the group. | ||
/// </remarks> | ||
/// <param name="reward">Incremental group reward value.</param> | ||
public void AddGroupReward(float reward) | ||
{ | ||
foreach (var agent in m_Agents) | ||
{ | ||
agent.AddGroupReward(reward); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Set the group rewards for all agents in this MultiAgentGroup. | ||
/// </summary> | ||
/// <remarks> | ||
/// This function replaces any group rewards given during the current step for all agents in the group. | ||
/// Use <see cref="AddGroupReward(float)"/> to incrementally change the group reward rather than | ||
/// overriding it. | ||
/// | ||
/// A positive group reward indicates the whole group's accomplishments or desired behaviors. | ||
/// Every agent in the group will receive the same group reward no matter whether the | ||
/// agent's act directly leads to the reward. Group rewards are meant to reinforce agents | ||
/// to act in the group's best interest instead of indivisual ones. | ||
/// Group rewards are treated differently than individual agent rewards during training, so | ||
/// calling SetGroupReward() is not equivalent to calling agent.SetReward() on each agent in the group. | ||
/// </remarks> | ||
/// <param name="reward">The new value of the group reward.</param> | ||
public void SetGroupReward(float reward) | ||
{ | ||
foreach (var agent in m_Agents) | ||
{ | ||
agent.SetGroupReward(reward); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// End episodes for all agents in this MultiAgentGroup. | ||
/// </summary> | ||
/// <remarks> | ||
/// This should be used when the episode can no longer continue, such as when the group | ||
/// reaches the goal or fails at the task. | ||
/// </remarks> | ||
public void EndGroupEpisode() | ||
{ | ||
foreach (var agent in m_Agents) | ||
{ | ||
agent.EndEpisode(); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Indicate that the episode is over but not due to the "fault" of the group. | ||
/// This has the same end result as calling <see cref="EndGroupEpisode"/>, but has a | ||
/// slightly different effect on training. | ||
/// </summary> | ||
/// <remarks> | ||
/// This should be used when the episode could continue, but has gone on for | ||
/// a sufficient number of steps, such as if the environment hits some maximum number of steps. | ||
/// </remarks> | ||
public void GroupEpisodeInterrupted() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. InterruptGroupEpisode for symmetry with EndGroupEpisode ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made this for symmetry with Agent.EpisodeInterrupted() in the hope that user can map EndEpisode/EpisodeInterrupted to EndGroupEpisode/GroupEpisodeInterrupted |
||
{ | ||
foreach (var agent in m_Agents) | ||
{ | ||
agent.EpisodeInterrupted(); | ||
} | ||
} | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,11 @@ internal struct AgentInfo | |
/// </summary> | ||
public float reward; | ||
|
||
/// <summary> | ||
/// The current group reward received by the agent. | ||
/// </summary> | ||
public float groupReward; | ||
|
||
/// <summary> | ||
/// Whether the agent is done or not. | ||
/// </summary> | ||
|
@@ -50,6 +55,11 @@ internal struct AgentInfo | |
/// </summary> | ||
public int episodeId; | ||
|
||
/// <summary> | ||
/// MultiAgentGroup identifier. | ||
/// </summary> | ||
public int groupId; | ||
|
||
public void ClearActions() | ||
{ | ||
storedActions.Clear(); | ||
|
@@ -243,6 +253,9 @@ internal struct AgentParameters | |
/// Additionally, the magnitude of the reward should not exceed 1.0 | ||
float m_Reward; | ||
|
||
/// Represents the group reward the agent accumulated during the current step. | ||
float m_GroupReward; | ||
|
||
/// Keeps track of the cumulative reward in this episode. | ||
float m_CumulativeReward; | ||
|
||
|
@@ -317,6 +330,10 @@ internal struct AgentParameters | |
/// </summary> | ||
float[] m_LegacyHeuristicCache; | ||
|
||
int m_GroupId; | ||
|
||
internal event Action<Agent> UnregisterFromGroup; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this event really needed? Can you add a comment to justify why and when the event is called? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the delegate for agent to trigger removing itself from the group and is added to avoid cyclic reference between agent and group. The agent needs a way to clean itself up from the group when disabled/destroyed/assigned to another group. I'll add comments. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be better with a more generic name, like OnAgentDisabled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. didn't named it OnAgentDisabled since it was also triggered in SetMultiAgentGroup. Now that is removed I changed the name. |
||
|
||
/// <summary> | ||
/// Called when the attached [GameObject] becomes enabled and active. | ||
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | ||
|
@@ -448,6 +465,8 @@ public void LazyInitialize() | |
new int[m_ActuatorManager.NumDiscreteActions] | ||
); | ||
|
||
m_Info.groupId = m_GroupId; | ||
|
||
// The first time the Academy resets, all Agents in the scene will be | ||
// forced to reset through the <see cref="AgentForceReset"/> event. | ||
// To avoid the Agent resetting twice, the Agents will not begin their | ||
|
@@ -516,6 +535,7 @@ protected virtual void OnDisable() | |
NotifyAgentDone(DoneReason.Disabled); | ||
} | ||
m_Brain?.Dispose(); | ||
UnregisterFromGroup?.Invoke(this); | ||
m_Initialized = false; | ||
} | ||
|
||
|
@@ -528,8 +548,10 @@ void NotifyAgentDone(DoneReason doneReason) | |
} | ||
m_Info.episodeId = m_EpisodeId; | ||
m_Info.reward = m_Reward; | ||
m_Info.groupReward = m_GroupReward; | ||
m_Info.done = true; | ||
m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached; | ||
m_Info.groupId = m_GroupId; | ||
if (collectObservationsSensor != null) | ||
{ | ||
// Make sure the latest observations are being passed to training. | ||
|
@@ -559,6 +581,7 @@ void NotifyAgentDone(DoneReason doneReason) | |
} | ||
|
||
m_Reward = 0f; | ||
m_GroupReward = 0f; | ||
m_CumulativeReward = 0f; | ||
m_RequestAction = false; | ||
m_RequestDecision = false; | ||
|
@@ -698,6 +721,22 @@ public void AddReward(float increment) | |
m_CumulativeReward += increment; | ||
} | ||
|
||
internal void SetGroupReward(float reward) | ||
{ | ||
#if DEBUG | ||
Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetGroupReward)); | ||
#endif | ||
m_GroupReward = reward; | ||
} | ||
|
||
internal void AddGroupReward(float increment) | ||
{ | ||
#if DEBUG | ||
Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddGroupReward)); | ||
#endif | ||
m_GroupReward += increment; | ||
} | ||
|
||
/// <summary> | ||
/// Retrieves the episode reward for the Agent. | ||
/// </summary> | ||
|
@@ -1054,9 +1093,11 @@ void SendInfoToBrain() | |
|
||
m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask(); | ||
m_Info.reward = m_Reward; | ||
m_Info.groupReward = m_GroupReward; | ||
m_Info.done = false; | ||
m_Info.maxStepReached = false; | ||
m_Info.episodeId = m_EpisodeId; | ||
m_Info.groupId = m_GroupId; | ||
|
||
using (TimerStack.Instance.Scoped("RequestDecision")) | ||
{ | ||
|
@@ -1323,6 +1364,7 @@ void SendInfo() | |
{ | ||
SendInfoToBrain(); | ||
m_Reward = 0f; | ||
m_GroupReward = 0f; | ||
m_RequestDecision = false; | ||
} | ||
} | ||
|
@@ -1358,5 +1400,13 @@ void DecideAction() | |
m_Info.CopyActions(actions); | ||
m_ActuatorManager.UpdateActions(actions); | ||
} | ||
|
||
internal void SetMultiAgentGroup(IMultiAgentGroup multiAgentGroup) | ||
{ | ||
// Unregister from current group if this agent has been assigned one before | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if you want the agent to unregister from all groups, you should allow passing a null multiAgentGroup here. This would be better as something like if(multiAgentGroup == null)
{
m_GroupId == 0;
}
else
{
var newId = multiAgentGroup.GetId();
if(m_GroupId == 0 || m_GroupId == newId)
{
// Idempotent, so setting the same group has no effect
m_GroupId = newId;
}
else
{
throw new UnityAgentsException("Agent is already registered with a group. Unregister it first.");
}
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made it internal and only being called in MultiAgentGroup.RegisterAgent so I don't see there's any cases that it can be passed in a null group. Also it shouldn't be unregistered by just overwriting m_GroupId otherwise there would be delegates left in Agent.UnregisterFromGroup. I can do throwing an exception if a user trying to assign an agent from one group directly to another group without unregistering from the first, instead of call unregister for them. |
||
UnregisterFromGroup?.Invoke(this); | ||
|
||
m_GroupId = multiAgentGroup.GetId(); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
m_StepCount and m_GroupMaxStep appear to be unused now?