Skip to content

Commit

Permalink
Make the DecisionRequester public and customizable. (#3716)
Browse files Browse the repository at this point in the history
* [API] Make the DecisionRequester public and add a delegate to its API to allow users to customize it's behavior.

- Rename Academy.AgentSetStatus to Academy.AgentPreStep and make it public.
- Fix Unity library cache issues for backwards compatibility tests.
- Collect standalone build and logs to artifacts for standalone build jobs.
- cat standalone build log if the build fails.
- Default verbose to False for standalone build test.

* disable backward compatibility test, bump communication version.

* still run training tests on latest.

* fix yml parse error.
  • Loading branch information
surfnerd authored Apr 8, 2020
1 parent 26f54f0 commit fa0b4be
Show file tree
Hide file tree
Showing 19 changed files with 257 additions and 578 deletions.
2 changes: 1 addition & 1 deletion .yamato/gym-interface-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ test_gym_interface_{{ editor.version }}:
commands:
- pip install pyyaml
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=Project/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
Expand Down
4 changes: 2 additions & 2 deletions .yamato/protobuf-generation-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ test_mac_protobuf_generation:
- "protobuf-definitions/*.md"
- "protobuf-definitions/**/*.md"
artifacts:
dist:
patch:
paths:
- "artifacts/*"
- "artifacts/*.*"
2 changes: 1 addition & 1 deletion .yamato/python-ll-api-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_mac_ll_api_{{ editor.version }}:
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }} --env=Project/testPlayer
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
changes:
Expand Down
5 changes: 4 additions & 1 deletion .yamato/standalone-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ test_mac_standalone_{{ editor.version }}:
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
logs:
paths:
- "artifacts/standalone_build.txt"
standalonebuild:
paths:
- "Project/testPlayer*/**"
- "artifacts/testPlayer*/**"
{% endfor %}
11 changes: 7 additions & 4 deletions .yamato/training-int-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ test_mac_training_int_{{ editor.version }}:
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
Expand All @@ -35,7 +35,10 @@ test_mac_training_int_{{ editor.version }}:
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
unit:
logs:
paths:
- "artifacts/**"
- "artifacts/standalone_build.txt"
standalonebuild:
paths:
- "artifacts/testplayer*/**"
{% endfor %}
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
- The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
- Added ability to start training (initialize model weights) from a previous run ID. (#3710)
- The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
- The offset logic was removed from DecisionRequester.

### Minor Changes
- Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616)
Expand Down
19 changes: 11 additions & 8 deletions com.unity.ml-agents/Runtime/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class Academy : IDisposable
/// on each side, although we may allow some flexibility in the future.
/// This should be incremented whenever a change is made to the communication protocol.
/// </summary>
const string k_ApiVersion = "0.15.0";
const string k_ApiVersion = "0.16.0";

/// <summary>
/// Unity package version of com.unity.ml-agents.
Expand Down Expand Up @@ -138,11 +138,14 @@ public bool IsCommunicatorOn
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;

// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
internal event Action<int> AgentSetStatus;

/// <summary>
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
/// This is a good time for an <see cref="Agent"/> to decide if it would like to
/// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
/// for this step. Any other pre-step setup could be done during this even as well.
/// </summary>
public event Action<int> AgentPreStep;

// Signals to all the agents at each environment step so they can send
// their state to their Policy if they have requested a decision.
Expand Down Expand Up @@ -347,7 +350,7 @@ void ResetActions()
{
DecideAction = () => {};
DestroyAction = () => {};
AgentSetStatus = i => {};
AgentPreStep = i => {};
AgentSendState = () => {};
AgentAct = () => {};
AgentForceReset = () => {};
Expand Down Expand Up @@ -423,7 +426,7 @@ public void EnvironmentStep()
ForcedFullReset();
}

AgentSetStatus?.Invoke(m_StepCount);
AgentPreStep?.Invoke(m_StepCount);

m_StepCount += 1;
m_TotalStepCount += 1;
Expand Down
34 changes: 15 additions & 19 deletions com.unity.ml-agents/Runtime/DecisionRequester.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System;
using UnityEngine;
using UnityEngine.Serialization;

Expand All @@ -8,12 +9,12 @@ namespace MLAgents
/// at regular intervals.
/// </summary>
[AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
internal class DecisionRequester : MonoBehaviour
[RequireComponent(typeof(Agent))]
public class DecisionRequester : MonoBehaviour
{
/// <summary>
/// The frequency with which the agent requests a decision. A DecisionPeriod of 5 means
/// that the Agent will request a decision every 5 Academy steps.
/// </summary>
/// that the Agent will request a decision every 5 Academy steps. /// </summary>
[Range(1, 20)]
[Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
"of 5 means that the Agent will request a decision every 5 Academy steps.")]
Expand All @@ -29,37 +30,32 @@ internal class DecisionRequester : MonoBehaviour
[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;

/// <summary>
/// Whether or not the Agent decisions should start at an offset (different for each agent).
/// This does not affect <see cref="DecisionPeriod"/>. Turning this on will distribute
/// the decision-making computations for all the agents across multiple Academy steps.
/// This can be valuable in scenarios where you have many agents in the scene, particularly
/// during the inference phase.
/// </summary>
[Tooltip("Whether or not Agent decisions should start at an offset.")]
public bool offsetStep;

[NonSerialized]
Agent m_Agent;
int m_Offset;

internal void Awake()
{
m_Offset = offsetStep ? gameObject.GetInstanceID() : 0;
m_Agent = gameObject.GetComponent<Agent>();
Academy.Instance.AgentSetStatus += MakeRequests;
Debug.Assert(m_Agent != null, "Agent component was not found on this gameObject and is required.");
Academy.Instance.AgentPreStep += MakeRequests;
}

void OnDestroy()
{
if (Academy.IsInitialized)
{
Academy.Instance.AgentSetStatus -= MakeRequests;
Academy.Instance.AgentPreStep -= MakeRequests;
}
}

void MakeRequests(int count)
/// <summary>
/// Method that hooks into the Academy in order inform the Agent on whether or not it should request a
/// decision, and whether or not it should take actions between decisions.
/// </summary>
/// <param name="academyStepCount">The current step count of the academy.</param>
void MakeRequests(int academyStepCount)
{
if ((count + m_Offset) % DecisionPeriod == 0)
if (academyStepCount % DecisionPeriod == 0)
{
m_Agent?.RequestDecision();
}
Expand Down
102 changes: 1 addition & 101 deletions com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using MLAgents.Sensors;
using NUnit.Framework;
using UnityEngine;
using UnityEngine.TestTools;

namespace MLAgentsExamples
{
Expand Down Expand Up @@ -71,106 +72,5 @@ public void CheckSetupRayPerceptionSensorComponent()

sensorComponent.CreateSensor();
}

class PublicApiAgent : Agent
{
public int numHeuristicCalls;

public override float[] Heuristic()
{
numHeuristicCalls++;
return base.Heuristic();
}
}

// Simple SensorComponent that sets up a StackingSensor
class StackingComponent : SensorComponent
{
public SensorComponent wrappedComponent;
public int numStacks;

public override ISensor CreateSensor()
{
var wrappedSensor = wrappedComponent.CreateSensor();
return new StackingSensor(wrappedSensor, numStacks);
}

public override int[] GetObservationShape()
{
int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
for (var i = 0; i < shape.Length; i++)
{
shape[i] *= numStacks;
}

return shape;
}
}


[Test]
public void CheckSetupAgent()
{
var gameObject = new GameObject();

var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
behaviorParams.brainParameters.vectorObservationSize = 3;
behaviorParams.brainParameters.numStackedVectorObservations = 2;
behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
behaviorParams.behaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.useChildSensors = true;

var agent = gameObject.AddComponent<PublicApiAgent>();
// Make sure we can set the behavior type correctly after the agent is added
behaviorParams.behaviorType = BehaviorType.InferenceOnly;
// Can't actually create an Agent with InferenceOnly and no model, so change back
behaviorParams.behaviorType = BehaviorType.Default;

// TODO - not internal yet
// var decisionRequester = gameObject.AddComponent<DecisionRequester>();
// decisionRequester.DecisionPeriod = 2;

var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
sensorComponent.sensorName = "ray3d";
sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
sensorComponent.raysPerDirection = 3;

// Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
// This isn't necessarily practical, just to ensure that it can be done
var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
wrappingSensorComponent.wrappedComponent = sensorComponent;
wrappingSensorComponent.numStacks = 3;

// ISensor isn't set up yet.
Assert.IsNull(sensorComponent.raySensor);

agent.LazyInitialize();
// Make sure we can set the behavior type correctly after the agent is initialized
// (this creates a new policy).
behaviorParams.behaviorType = BehaviorType.HeuristicOnly;

// Initialization should set up the sensors
Assert.IsNotNull(sensorComponent.raySensor);

// Let's change the inference device
var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);

agent.AddReward(1.0f);

agent.RequestAction();
agent.RequestDecision();

Academy.Instance.AutomaticSteppingEnabled = false;
Academy.Instance.EnvironmentStep();

var actions = agent.GetAction();
// default Heuristic implementation should return zero actions.
Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
Assert.AreEqual(1, agent.numHeuristicCalls);
}
}
}
Loading

0 comments on commit fa0b4be

Please sign in to comment.