[API] Make the DecisionRequester public and add a delegate to its API…

… to allow users to customize it's behavior. - Rename Academy.AgentSetStatus to Academy.AgentPreStep and make it public. - Fix Unity library cache issues for backwards compatibility tests. - Collect standalone build and logs to artifacts for standalone build jobs. - cat standalone build log if the build fails. - Default verbose to False for standalone build test.
Unity-Technologies · Apr 8, 2020 · b405630 · b405630
1 parent d78c10c
commit b405630
Show file tree

Hide file tree

Showing 18 changed files with 253 additions and 574 deletions.
diff --git a/.yamato/gym-interface-test.yml b/.yamato/gym-interface-test.yml
@@ -13,7 +13,7 @@ test_gym_interface_{{ editor.version }}:
   commands:
     - pip install pyyaml
     - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=Project/testPlayer-Basic
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
   dependencies:
     - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
   triggers:

diff --git a/.yamato/protobuf-generation-test.yml b/.yamato/protobuf-generation-test.yml
@@ -36,6 +36,6 @@ test_mac_protobuf_generation:
         - "protobuf-definitions/*.md"
         - "protobuf-definitions/**/*.md"
   artifacts:
-    dist:
+    patch:
       paths:
-        - "artifacts/*"
+        - "artifacts/*.*"
diff --git a/.yamato/python-ll-api-test.yml b/.yamato/python-ll-api-test.yml
@@ -15,7 +15,7 @@ test_mac_ll_api_{{ editor.version }}:
     - python -u -m ml-agents.tests.yamato.setup_venv
     - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
   dependencies:
-    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }} --env=Project/testPlayer
+    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
   triggers:
     cancel_old_ci: true
     changes:

diff --git a/.yamato/standalone-build-test.yml b/.yamato/standalone-build-test.yml
@@ -27,7 +27,10 @@ test_mac_standalone_{{ editor.version }}:
         - "com.unity.ml-agents/*.md"
         - "com.unity.ml-agents/**/*.md"
   artifacts:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
     standalonebuild:
       paths:
-        - "Project/testPlayer*/**"
+        - "artifacts/testPlayer*/**"
 {% endfor %}
diff --git a/.yamato/training-int-tests.yml b/.yamato/training-int-tests.yml
@@ -35,7 +35,10 @@ test_mac_training_int_{{ editor.version }}:
         - "com.unity.ml-agents/*.md"
         - "com.unity.ml-agents/**/*.md"
   artifacts:
-    unit:
+    logs:
       paths:
-        - "artifacts/**"
+        - "artifacts/standalone_build.txt"
+    standalonebuild:
+      paths:
+        - "artifacts/testplayer*/**"
 {% endfor %}
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
  - Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
  - The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
  - Added ability to start training (initialize model weights) from a previous run ID. (#3710)
+ - The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
+ - The offset logic was removed from DecisionRequester.
 
 ### Minor Changes
  - Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616)

diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
@@ -138,11 +138,14 @@ public bool IsCommunicatorOn
         // This will mark the Agent as Done if it has reached its maxSteps.
         internal event Action AgentIncrementStep;
 
-        // Signals to all the agents at each environment step along with the
-        // Academy's maxStepReached, done and stepCount values. The agents rely
-        // on this event to update their own values of max step reached and done
-        // in addition to aligning on the step count of the global episode.
-        internal event Action<int> AgentSetStatus;
+
+        /// <summary>
+        /// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
+        /// This is a good time for an <see cref="Agent"/> to decide if it would like to
+        /// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
+        /// for this step.  Any other pre-step setup could be done during this even as well.
+        /// </summary>
+        public event Action<int> AgentPreStep;
 
         // Signals to all the agents at each environment step so they can send
         // their state to their Policy if they have requested a decision.
@@ -347,7 +350,7 @@ void ResetActions()
         {
             DecideAction = () => {};
             DestroyAction = () => {};
-            AgentSetStatus = i => {};
+            AgentPreStep = i => {};
             AgentSendState = () => {};
             AgentAct = () => {};
             AgentForceReset = () => {};
@@ -423,7 +426,7 @@ public void EnvironmentStep()
                 ForcedFullReset();
             }
 
-            AgentSetStatus?.Invoke(m_StepCount);
+            AgentPreStep?.Invoke(m_StepCount);
 
             m_StepCount += 1;
             m_TotalStepCount += 1;

diff --git a/com.unity.ml-agents/Runtime/DecisionRequester.cs b/com.unity.ml-agents/Runtime/DecisionRequester.cs
@@ -1,3 +1,4 @@
+using System;
 using UnityEngine;
 using UnityEngine.Serialization;
 
@@ -8,12 +9,12 @@ namespace MLAgents
     /// at regular intervals.
     /// </summary>
     [AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
-    internal class DecisionRequester : MonoBehaviour
+    [RequireComponent(typeof(Agent))]
+    public class DecisionRequester : MonoBehaviour
     {
         /// <summary>
         /// The frequency with which the agent requests a decision. A DecisionPeriod of 5 means
-        /// that the Agent will request a decision every 5 Academy steps.
-        /// </summary>
+        /// that the Agent will request a decision every 5 Academy steps. /// </summary>
         [Range(1, 20)]
         [Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
                  "of 5 means that the Agent will request a decision every 5 Academy steps.")]
@@ -29,37 +30,32 @@ internal class DecisionRequester : MonoBehaviour
         [FormerlySerializedAs("RepeatAction")]
         public bool TakeActionsBetweenDecisions = true;
 
-        /// <summary>
-        /// Whether or not the Agent decisions should start at an offset (different for each agent).
-        /// This does not affect <see cref="DecisionPeriod"/>. Turning this on will distribute
-        /// the decision-making computations for all the agents across multiple Academy steps.
-        /// This can be valuable in scenarios where you have many agents in the scene, particularly
-        /// during the inference phase.
-        /// </summary>
-        [Tooltip("Whether or not Agent decisions should start at an offset.")]
-        public bool offsetStep;
-
+        [NonSerialized]
         Agent m_Agent;
-        int m_Offset;
 
         internal void Awake()
         {
-            m_Offset = offsetStep ? gameObject.GetInstanceID() : 0;
             m_Agent = gameObject.GetComponent<Agent>();
-            Academy.Instance.AgentSetStatus += MakeRequests;
+            Debug.Assert(m_Agent != null, "Agent component was not found on this gameObject and is required.");
+            Academy.Instance.AgentPreStep += MakeRequests;
         }
 
         void OnDestroy()
         {
             if (Academy.IsInitialized)
             {
-                Academy.Instance.AgentSetStatus -= MakeRequests;
+                Academy.Instance.AgentPreStep -= MakeRequests;
             }
         }
 
-        void MakeRequests(int count)
+        /// <summary>
+        /// Method that hooks into the Academy in order inform the Agent on whether or not it should request a
+        /// decision, and whether or not it should take actions between decisions.
+        /// </summary>
+        /// <param name="academyStepCount">The current step count of the academy.</param>
+        void MakeRequests(int academyStepCount)
         {
-            if ((count + m_Offset) % DecisionPeriod == 0)
+            if (academyStepCount % DecisionPeriod == 0)
             {
                 m_Agent?.RequestDecision();
             }

diff --git a/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs b/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
@@ -4,6 +4,7 @@
 using MLAgents.Sensors;
 using NUnit.Framework;
 using UnityEngine;
+using UnityEngine.TestTools;
 
 namespace MLAgentsExamples
 {
@@ -71,106 +72,5 @@ public void CheckSetupRayPerceptionSensorComponent()
 
             sensorComponent.CreateSensor();
         }
-
-        class PublicApiAgent : Agent
-        {
-            public int numHeuristicCalls;
-
-            public override float[] Heuristic()
-            {
-                numHeuristicCalls++;
-                return base.Heuristic();
-            }
-        }
-
-        // Simple SensorComponent that sets up a StackingSensor
-        class StackingComponent : SensorComponent
-        {
-            public SensorComponent wrappedComponent;
-            public int numStacks;
-
-            public override ISensor CreateSensor()
-            {
-                var wrappedSensor = wrappedComponent.CreateSensor();
-                return new StackingSensor(wrappedSensor, numStacks);
-            }
-
-            public override int[] GetObservationShape()
-            {
-                int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
-                for (var i = 0; i < shape.Length; i++)
-                {
-                    shape[i] *= numStacks;
-                }
-
-                return shape;
-            }
-        }
-
-
-        [Test]
-        public void CheckSetupAgent()
-        {
-            var gameObject = new GameObject();
-
-            var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
-            behaviorParams.brainParameters.vectorObservationSize = 3;
-            behaviorParams.brainParameters.numStackedVectorObservations = 2;
-            behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
-            behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
-            behaviorParams.behaviorName = "TestBehavior";
-            behaviorParams.TeamId = 42;
-            behaviorParams.useChildSensors = true;
-
-            var agent = gameObject.AddComponent<PublicApiAgent>();
-            // Make sure we can set the behavior type correctly after the agent is added
-            behaviorParams.behaviorType = BehaviorType.InferenceOnly;
-            // Can't actually create an Agent with InferenceOnly and no model, so change back
-            behaviorParams.behaviorType = BehaviorType.Default;
-
-            // TODO -  not internal yet
-            // var decisionRequester = gameObject.AddComponent<DecisionRequester>();
-            // decisionRequester.DecisionPeriod = 2;
-
-            var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
-            sensorComponent.sensorName = "ray3d";
-            sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
-            sensorComponent.raysPerDirection = 3;
-
-            // Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
-            // This isn't necessarily practical, just to ensure that it can be done
-            var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
-            wrappingSensorComponent.wrappedComponent = sensorComponent;
-            wrappingSensorComponent.numStacks = 3;
-
-            // ISensor isn't set up yet.
-            Assert.IsNull(sensorComponent.raySensor);
-
-            agent.LazyInitialize();
-            // Make sure we can set the behavior type correctly after the agent is initialized
-            // (this creates a new policy).
-            behaviorParams.behaviorType = BehaviorType.HeuristicOnly;
-
-            // Initialization should set up the sensors
-            Assert.IsNotNull(sensorComponent.raySensor);
-
-            // Let's change the inference device
-            var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
-            agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);
-
-            agent.AddReward(1.0f);
-
-            agent.RequestAction();
-            agent.RequestDecision();
-
-            Academy.Instance.AutomaticSteppingEnabled = false;
-            Academy.Instance.EnvironmentStep();
-
-            var actions = agent.GetAction();
-            // default Heuristic implementation should return zero actions.
-            Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
-            Assert.AreEqual(1, agent.numHeuristicCalls);
-        }
     }
 }