Unity-Technologies · dongruoping · Feb 23, 2021 · Feb 1, 2021 · Feb 5, 2021 · Feb 5, 2021
diff --git a/com.unity.ml-agents.extensions/Runtime/MultiAgent.meta b/com.unity.ml-agents.extensions/Runtime/MultiAgent.meta
diff --git a/com.unity.ml-agents.extensions/Runtime/MultiAgent/BaseMultiAgentGroup.cs b/com.unity.ml-agents.extensions/Runtime/MultiAgent/BaseMultiAgentGroup.cs
@@ -0,0 +1,144 @@
+using System;
+using System.Linq;
+using System.Collections.Generic;
+
+namespace Unity.MLAgents.Extensions.MultiAgent
+{
+    /// <summary>
+    /// A base class implementation of MultiAgentGroup.
+    /// </summary>
+    public class BaseMultiAgentGroup : IMultiAgentGroup, IDisposable
+    {
+        int m_StepCount;
+        int m_GroupMaxStep;
+        readonly int m_Id = MultiAgentGroupIdCounter.GetGroupId();
+        HashSet<Agent> m_Agents = new HashSet<Agent>();
+
+
+        public void Dispose()
+        {
+            while (m_Agents.Count > 0)
+            {
+                UnregisterAgent(m_Agents.First());
+            }
+        }
+
+        /// <inheritdoc />
+        public virtual void RegisterAgent(Agent agent)
+        {
+            if (!m_Agents.Contains(agent))
+            {
+                agent.SetMultiAgentGroup(this);
+                m_Agents.Add(agent);
+                agent.UnregisterFromGroup += UnregisterAgent;
+            }
+        }
+
+        /// <inheritdoc />
+        public virtual void UnregisterAgent(Agent agent)
+        {
+            if (m_Agents.Contains(agent))
+            {
+                m_Agents.Remove(agent);
+                agent.UnregisterFromGroup -= UnregisterAgent;
+            }
+        }
+
+        /// <inheritdoc />
+        public int GetId()
+        {
+            return m_Id;
+        }
+
+        /// <summary>
+        /// Get list of all agents currently registered to this MultiAgentGroup.
+        /// </summary>
+        /// <returns>
+        /// List of agents registered to the MultiAgentGroup.
+        /// </returns>
+        public HashSet<Agent> GetRegisteredAgents()
+        {
+            return m_Agents;
+        }
+
+        /// <summary>
+        /// Increments the group rewards for all agents in this MultiAgentGroup.
+        /// </summary>
+        /// <remarks>
+        /// This function increases or decreases the group rewards by a given amount for all agents
+        /// in the group. Use <see cref="SetGroupReward(float)"/> to set the group reward assigned
+        /// to the current step with a specific value rather than increasing or decreasing it.
+        ///
+        /// A positive group reward indicates the whole group's accomplishments or desired behaviors.
+        /// Every agent in the group will receive the same group reward no matter whether the
+        /// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
+        /// to act in the group's best interest instead of individual ones.
+        /// Group rewards are treated differently than individual agent rewards during training, so
+        /// calling AddGroupReward() is not equivalent to calling agent.AddReward() on each agent in the group.
+        /// </remarks>
+        /// <param name="reward">Incremental group reward value.</param>
+        public void AddGroupReward(float reward)
+        {
+            foreach (var agent in m_Agents)
+            {
+                agent.AddGroupReward(reward);
+            }
+        }
+
+        /// <summary>
+        /// Set the group rewards for all agents in this MultiAgentGroup.
+        /// </summary>
+        /// <remarks>
+        /// This function replaces any group rewards given during the current step for all agents in the group.
+        /// Use <see cref="AddGroupReward(float)"/> to incrementally change the group reward rather than
+        /// overriding it.
+        ///
+        /// A positive group reward indicates the whole group's accomplishments or desired behaviors.
+        /// Every agent in the group will receive the same group reward no matter whether the
+        /// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
+        /// to act in the group's best interest instead of indivisual ones.
+        /// Group rewards are treated differently than individual agent rewards during training, so
+        /// calling SetGroupReward() is not equivalent to calling agent.SetReward() on each agent in the group.
+        /// </remarks>
+        /// <param name="reward">The new value of the group reward.</param>
+        public void SetGroupReward(float reward)
+        {
+            foreach (var agent in m_Agents)
+            {
+                agent.SetGroupReward(reward);
+            }
+        }
+
+        /// <summary>
+        /// End episodes for all agents in this MultiAgentGroup.
+        /// </summary>
+        /// <remarks>
+        /// This should be used when the episode can no longer continue, such as when the group
+        /// reaches the goal or fails at the task.
+        /// </remarks>
+        public void EndGroupEpisode()
+        {
+            foreach (var agent in m_Agents)
+            {
+                agent.EndEpisode();
+            }
+        }
+
+        /// <summary>
+        /// Indicate that the episode is over but not due to the "fault" of the group.
+        /// This has the same end result as calling <see cref="EndGroupEpisode"/>, but has a
+        /// slightly different effect on training.
+        /// </summary>
+        /// <remarks>
+        /// This should be used when the episode could continue, but has gone on for
+        /// a sufficient number of steps, such as if the environment hits some maximum number of steps.
+        /// </remarks>
+        public void GroupEpisodeInterrupted()
+        {
+            foreach (var agent in m_Agents)
+            {
+                agent.EpisodeInterrupted();
+            }
+        }
+    }
+}
diff --git a/com.unity.ml-agents.extensions/Runtime/MultiAgent/BaseMultiAgentGroup.cs.meta b/com.unity.ml-agents.extensions/Runtime/MultiAgent/BaseMultiAgentGroup.cs.meta
diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
@@ -34,6 +34,11 @@ internal struct AgentInfo
         /// </summary>
         public float reward;
 
+        /// <summary>
+        /// The current group reward received by the agent.
+        /// </summary>
+        public float groupReward;
+
         /// <summary>
         /// Whether the agent is done or not.
         /// </summary>
@@ -50,6 +55,11 @@ internal struct AgentInfo
         /// </summary>
         public int episodeId;
 
+        /// <summary>
+        /// MultiAgentGroup identifier.
+        /// </summary>
+        public int groupId;
+
         public void ClearActions()
         {
             storedActions.Clear();
@@ -243,6 +253,9 @@ internal struct AgentParameters
         /// Additionally, the magnitude of the reward should not exceed 1.0
         float m_Reward;
 
+        /// Represents the group reward the agent accumulated during the current step.
+        float m_GroupReward;
+
         /// Keeps track of the cumulative reward in this episode.
         float m_CumulativeReward;
 
@@ -317,6 +330,10 @@ internal struct AgentParameters
         /// </summary>
         float[] m_LegacyHeuristicCache;
 
+        int m_GroupId;
+
+        internal event Action<Agent> UnregisterFromGroup;
+
         /// <summary>
         /// Called when the attached [GameObject] becomes enabled and active.
         /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
@@ -448,6 +465,8 @@ public void LazyInitialize()
                 new int[m_ActuatorManager.NumDiscreteActions]
             );
 
+            m_Info.groupId = m_GroupId;
+
             // The first time the Academy resets, all Agents in the scene will be
             // forced to reset through the <see cref="AgentForceReset"/> event.
             // To avoid the Agent resetting twice, the Agents will not begin their
@@ -516,6 +535,7 @@ protected virtual void OnDisable()
                 NotifyAgentDone(DoneReason.Disabled);
             }
             m_Brain?.Dispose();
+            UnregisterFromGroup?.Invoke(this);
             m_Initialized = false;
         }
 
@@ -528,8 +548,10 @@ void NotifyAgentDone(DoneReason doneReason)
             }
             m_Info.episodeId = m_EpisodeId;
             m_Info.reward = m_Reward;
+            m_Info.groupReward = m_GroupReward;
             m_Info.done = true;
             m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
+            m_Info.groupId = m_GroupId;
             if (collectObservationsSensor != null)
             {
                 // Make sure the latest observations are being passed to training.
@@ -559,6 +581,7 @@ void NotifyAgentDone(DoneReason doneReason)
             }
 
             m_Reward = 0f;
+            m_GroupReward = 0f;
             m_CumulativeReward = 0f;
             m_RequestAction = false;
             m_RequestDecision = false;
@@ -698,6 +721,22 @@ public void AddReward(float increment)
             m_CumulativeReward += increment;
         }
 
+        internal void SetGroupReward(float reward)
+        {
+#if DEBUG
+            Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetGroupReward));
+#endif
+            m_GroupReward = reward;
+        }
+
+        internal void AddGroupReward(float increment)
+        {
+#if DEBUG
+            Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddGroupReward));
+#endif
+            m_GroupReward += increment;
+        }
+
         /// <summary>
         /// Retrieves the episode reward for the Agent.
         /// </summary>
@@ -1054,9 +1093,11 @@ void SendInfoToBrain()
 
             m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
             m_Info.reward = m_Reward;
+            m_Info.groupReward = m_GroupReward;
             m_Info.done = false;
             m_Info.maxStepReached = false;
             m_Info.episodeId = m_EpisodeId;
+            m_Info.groupId = m_GroupId;
 
             using (TimerStack.Instance.Scoped("RequestDecision"))
             {
@@ -1323,6 +1364,7 @@ void SendInfo()
             {
                 SendInfoToBrain();
                 m_Reward = 0f;
+                m_GroupReward = 0f;
                 m_RequestDecision = false;
             }
         }
@@ -1358,5 +1400,13 @@ void DecideAction()
             m_Info.CopyActions(actions);
             m_ActuatorManager.UpdateActions(actions);
         }
+
+        internal void SetMultiAgentGroup(IMultiAgentGroup multiAgentGroup)
+        {
+            // Unregister from current group if this agent has been assigned one before
+            UnregisterFromGroup?.Invoke(this);
+
+            m_GroupId = multiAgentGroup.GetId();
+        }
     }
 }
diff --git a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
@@ -58,9 +58,11 @@ public static AgentInfoProto ToAgentInfoProto(this AgentInfo ai)
             var agentInfoProto = new AgentInfoProto
             {
                 Reward = ai.reward,
+                GroupReward = ai.groupReward,
                 MaxStepReached = ai.maxStepReached,
                 Done = ai.done,
                 Id = ai.episodeId,
+                GroupId = ai.groupId,
             };
 
             if (ai.discreteActionMasks != null)