Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLA-1724] Reduce use of IEnumerable during inference #4887

Merged
merged 4 commits into from
Jan 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public virtual int Write(ObservationWriter writer)
float[] buffer = new float[numFloats];
WriteObservation(buffer);

writer.AddRange(buffer);
writer.AddList(buffer);

return numFloats;
}
Expand Down
8 changes: 8 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,21 @@ removed when training with a player. The Editor still requires it to be clamped
- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
Updated the Basic example and the Match3 Example to use Actuators.
Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
- Added `VectorSensor.AddObservation(IList<float>)`. `VectorSensor.AddObservation(IEnumerable<float>)`
is deprecated. The `IList` version is recommended, as it does not generate any
additional memory allocations. (#4887)
- Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
`AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)

#### ml-agents / ml-agents-envs / gym-unity (Python)

### Bug Fixes
#### com.unity.ml-agents (C#)
- Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
- CameraSensor now logs an error if the GraphicsDevice is null. (#4880)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4887)

#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)
- Fixed a bug that can cause a crash if a behavior can appear during training in multi-environment training. (#4872)
Expand Down
21 changes: 13 additions & 8 deletions com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ public ContinuousActionOutputApplier(ActionSpec actionSpec)
m_ActionSpec = actionSpec;
}

public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];
Expand Down Expand Up @@ -65,7 +66,7 @@ public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAlloc
m_ActionSpec = actionSpec;
}

public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
Expand Down Expand Up @@ -109,9 +110,11 @@ public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionar
actionProbs.data.Dispose();
outputTensor.data.Dispose();
}

var agentIndex = 0;
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];
Expand Down Expand Up @@ -209,12 +212,13 @@ public MemoryOutputApplier(
m_Memories = memories;
}

public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
List<float> memory;
if (!m_Memories.TryGetValue(agentId, out memory)
|| memory.Count < memorySize)
Expand Down Expand Up @@ -246,13 +250,14 @@ public BarracudaMemoryOutputApplier(
m_Memories = memories;
}

public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];

foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
{
var agentId = actionIds[i];
List<float> memory;
if (!m_Memories.TryGetValue(agentId, out memory)
|| memory.Count < memorySize * m_MemoriesCount)
Expand Down
36 changes: 21 additions & 15 deletions com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public BiDimensionalOutputGenerator(ITensorAllocator allocator)
m_Allocator = allocator;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
}
Expand All @@ -40,7 +40,7 @@ public BatchSizeGenerator(ITensorAllocator allocator)
m_Allocator = allocator;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
tensorProxy.data?.Dispose();
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));
Expand All @@ -63,7 +63,7 @@ public SequenceLengthGenerator(ITensorAllocator allocator)
m_Allocator = allocator;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
tensorProxy.shape = new long[0];
tensorProxy.data?.Dispose();
Expand Down Expand Up @@ -92,14 +92,15 @@ public RecurrentInputGenerator(
}

public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
{
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
List<float> memory;

Expand Down Expand Up @@ -147,14 +148,15 @@ public BarracudaRecurrentInputGenerator(
m_Memories = memories;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
{
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
var offset = memorySize * m_MemoryIndex;
List<float> memory;
Expand Down Expand Up @@ -200,14 +202,15 @@ public PreviousActionInputGenerator(ITensorAllocator allocator)
m_Allocator = allocator;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
{
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
var pastAction = info.storedActions.DiscreteActions;
if (!pastAction.IsEmpty())
Expand Down Expand Up @@ -238,14 +241,15 @@ public ActionMaskInputGenerator(ITensorAllocator allocator)
m_Allocator = allocator;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

var maskSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
{
var infoSensorPair = infos[infoIndex];
var agentInfo = infoSensorPair.agentInfo;
var maskList = agentInfo.discreteActionMasks;
for (var j = 0; j < maskSize; j++)
Expand Down Expand Up @@ -274,7 +278,7 @@ public RandomNormalInputGenerator(int seed, ITensorAllocator allocator)
m_Allocator = allocator;
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal);
Expand Down Expand Up @@ -303,12 +307,13 @@ public void AddSensorIndex(int sensorIndex)
m_SensorIndices.Add(sensorIndex);
}

public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
var agentIndex = 0;
foreach (var info in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
{
var info = infos[infoIndex];
if (info.agentInfo.done)
{
// If the agent is done, we might have a stale reference to the sensors
Expand All @@ -320,8 +325,9 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentIn
{
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
for (var sensorIndexIndex = 0; sensorIndexIndex < m_SensorIndices.Count; sensorIndexIndex++)
{
var sensorIndex = m_SensorIndices[sensorIndexIndex];
var sensor = info.sensors[sensorIndex];
m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_ObservationWriter);
Expand Down
46 changes: 25 additions & 21 deletions com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ internal class ModelRunner
TensorApplier m_TensorApplier;

NNModel m_Model;
string m_ModelName;
InferenceDevice m_InferenceDevice;
IWorker m_Engine;
bool m_Verbose = false;
string[] m_OutputNames;
IReadOnlyList<TensorProxy> m_InferenceInputs;
IReadOnlyList<TensorProxy> m_InferenceOutputs;
List<TensorProxy> m_InferenceOutputs;
Dictionary<string, Tensor> m_InputsByName;
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();

SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();
Expand All @@ -56,6 +58,7 @@ public ModelRunner(
{
Model barracudaModel;
m_Model = model;
m_ModelName = model.name;
m_InferenceDevice = inferenceDevice;
m_TensorAllocator = new TensorCachingAllocator();
if (model != null)
Expand Down Expand Up @@ -84,6 +87,8 @@ public ModelRunner(
seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(
actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel);
m_InputsByName = new Dictionary<string, Tensor>();
m_InferenceOutputs = new List<TensorProxy>();
}

public InferenceDevice InferenceDevice
Expand All @@ -96,15 +101,14 @@ public NNModel Model
get { return m_Model; }
}

static Dictionary<string, Tensor> PrepareBarracudaInputs(IEnumerable<TensorProxy> infInputs)
void PrepareBarracudaInputs(IReadOnlyList<TensorProxy> infInputs)
{
var inputs = new Dictionary<string, Tensor>();
foreach (var inp in infInputs)
m_InputsByName.Clear();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed this and FetchBarracudaOutputs to reuse their results between frames.

for (var i = 0; i < infInputs.Count; i++)
{
inputs[inp.name] = inp.data;
var inp = infInputs[i];
m_InputsByName[inp.name] = inp.data;
}

return inputs;
}

public void Dispose()
Expand All @@ -114,16 +118,14 @@ public void Dispose()
m_TensorAllocator?.Reset(false);
}

List<TensorProxy> FetchBarracudaOutputs(string[] names)
void FetchBarracudaOutputs(string[] names)
{
var outputs = new List<TensorProxy>();
m_InferenceOutputs.Clear();
foreach (var n in names)
{
var output = m_Engine.PeekOutput(n);
outputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
m_InferenceOutputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
}

return outputs;
}

public void PutObservations(AgentInfo info, List<ISensor> sensors)
Expand Down Expand Up @@ -169,31 +171,33 @@ public void DecideBatch()
}

Profiler.BeginSample("ModelRunner.DecideAction");
Profiler.BeginSample(m_ModelName);

Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These were allocating for the string concatenation, and m_Model.name also allocates, so cache it at the start.

Profiler.BeginSample($"GenerateTensors");
// Prepare the input tensors to be feed into the engine
m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Infos);
Profiler.EndSample();

Profiler.BeginSample($"MLAgents.{m_Model.name}.PrepareBarracudaInputs");
var inputs = PrepareBarracudaInputs(m_InferenceInputs);
Profiler.BeginSample($"PrepareBarracudaInputs");
PrepareBarracudaInputs(m_InferenceInputs);
Profiler.EndSample();

// Execute the Model
Profiler.BeginSample($"MLAgents.{m_Model.name}.ExecuteGraph");
m_Engine.Execute(inputs);
Profiler.BeginSample($"ExecuteGraph");
m_Engine.Execute(m_InputsByName);
Profiler.EndSample();

Profiler.BeginSample($"MLAgents.{m_Model.name}.FetchBarracudaOutputs");
m_InferenceOutputs = FetchBarracudaOutputs(m_OutputNames);
Profiler.BeginSample($"FetchBarracudaOutputs");
FetchBarracudaOutputs(m_OutputNames);
Profiler.EndSample();

Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors");
Profiler.BeginSample($"ApplyTensors");
// Update the outputs
m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
Profiler.EndSample();

Profiler.EndSample();
Profiler.EndSample(); // end name
Profiler.EndSample(); // end ModelRunner.DecideAction

m_Infos.Clear();

Expand Down
7 changes: 4 additions & 3 deletions com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public interface IApplier
/// </param>
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
}

readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
Expand Down Expand Up @@ -90,10 +90,11 @@ public TensorApplier(
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
foreach (var tensor in tensors)
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
{
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(
Expand Down
7 changes: 4 additions & 3 deletions com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public interface IGenerator
/// the tensor's data.
/// </param>
void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos);
TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos);
}

readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();
Expand Down Expand Up @@ -149,10 +149,11 @@ public void InitializeObservations(List<ISensor> sensors, ITensorAllocator alloc
/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated generator.</exception>
public void GenerateTensors(
IEnumerable<TensorProxy> tensors, int currentBatchSize, IEnumerable<AgentInfoSensorsPair> infos)
IReadOnlyList<TensorProxy> tensors, int currentBatchSize, IList<AgentInfoSensorsPair> infos)
{
foreach (var tensor in tensors)
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
{
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(
Expand Down
Loading