Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLA-1762] reduce memory allocations from DiscreteActionOutputApplier #4922

Merged
merged 6 commits into from
Feb 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ and this project adheres to
- Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4887)
- Removed several memory allocations that happened during inference with discrete actions. (#4922)
- Properly catch permission errors when writing timer files. (#4921)

#### ml-agents / ml-agents-envs / gym-unity (Python)
Expand Down
129 changes: 28 additions & 101 deletions com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Inference.Utils;
Expand Down Expand Up @@ -55,62 +54,26 @@ internal class DiscreteActionOutputApplier : TensorApplier.IApplier
{
readonly int[] m_ActionSize;
readonly Multinomial m_Multinomial;
readonly ITensorAllocator m_Allocator;
readonly ActionSpec m_ActionSpec;
readonly int[] m_StartActionIndices;
readonly float[] m_CdfBuffer;


public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
{
m_ActionSize = actionSpec.BranchSizes;
m_Multinomial = new Multinomial(seed);
m_Allocator = allocator;
m_ActionSpec = actionSpec;
m_StartActionIndices = Utilities.CumSum(m_ActionSize);

// Scratch space for computing the cumulative distribution function.
// In order to reuse it, make it the size of the largest branch.
var largestBranch = Mathf.Max(m_ActionSize);
m_CdfBuffer = new float[largestBranch];
}

public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
var batchSize = idActionPairList.Count;
var actionValues = new float[batchSize, m_ActionSize.Length];
var startActionIndices = Utilities.CumSum(m_ActionSize);
for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
{
var nBranchAction = m_ActionSize[actionIndex];
var actionProbs = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] { batchSize, nBranchAction },
data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
};

for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
{
for (var branchActionIndex = 0;
branchActionIndex < nBranchAction;
branchActionIndex++)
{
actionProbs.data[batchIndex, branchActionIndex] =
tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
}
}

var outputTensor = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] { batchSize, 1 },
data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
};

Eval(actionProbs, outputTensor, m_Multinomial);

for (var ii = 0; ii < batchSize; ii++)
{
actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
}
actionProbs.data.Dispose();
outputTensor.data.Dispose();
}

var agentIndex = 0;
for (var i = 0; i < actionIds.Count; i++)
{
Expand All @@ -126,74 +89,38 @@ public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int,
var discreteBuffer = actionBuffer.DiscreteActions;
for (var j = 0; j < m_ActionSize.Length; j++)
{
discreteBuffer[j] = (int)actionValues[agentIndex, j];
ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
}
}
agentIndex++;
}
}

/// <summary>
/// Draw samples from a multinomial distribution based on log-probabilities specified
/// in tensor src. The samples will be saved in the dst tensor.
/// Compute the cumulative distribution function for a given agent's action
/// given the log-probabilities.
/// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
/// </summary>
/// <param name="src">2-D tensor with shape batch_size x num_classes</param>
/// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
/// <param name="multinomial">Multinomial object used to sample values</param>
/// <exception cref="NotImplementedException">
/// Multinomial doesn't support integer tensors
/// </exception>
/// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
/// <exception cref="ArgumentNullException">
/// At least one of the tensors is not allocated
/// </exception>
public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
/// <param name="logProbs"></param>
/// <param name="batch">Index of the agent being considered</param>
/// <param name="channelOffset">Offset into the tensor's channel.</param>
/// <param name="branchSize"></param>
internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
{
if (src.DataType != typeof(float))
// Find the class maximum
var maxProb = float.NegativeInfinity;
for (var cls = 0; cls < branchSize; ++cls)
{
throw new NotImplementedException("Only float tensors are currently supported");
maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
}

if (src.valueType != dst.valueType)
// Sum the log probabilities and compute CDF
var sumProb = 0.0f;
for (var cls = 0; cls < branchSize; ++cls)
{
throw new ArgumentException(
"Source and destination tensors have different types!");
}

if (src.data == null || dst.data == null)
{
throw new ArgumentNullException();
}

if (src.data.batch != dst.data.batch)
{
throw new ArgumentException("Batch size for input and output data is different!");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these exceptions no longer needed ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure they were ever necessary in the first place. The temporary tensors they were referencing are no longer needed.

}

var cdf = new float[src.data.channels];

for (var batch = 0; batch < src.data.batch; ++batch)
{
// Find the class maximum
var maxProb = float.NegativeInfinity;
for (var cls = 0; cls < src.data.channels; ++cls)
{
maxProb = Mathf.Max(src.data[batch, cls], maxProb);
}

// Sum the log probabilities and compute CDF
var sumProb = 0.0f;
for (var cls = 0; cls < src.data.channels; ++cls)
{
sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
cdf[cls] = sumProb;
}

// Generate the samples
for (var sample = 0; sample < dst.data.channels; ++sample)
{
dst.data[batch, sample] = multinomial.Sample(cdf);
}
sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
m_CdfBuffer[cls] = sumProb;
}
}
}
Expand Down
17 changes: 14 additions & 3 deletions com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ public Multinomial(int seed)
/// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
/// the array will be 1.0.
/// </param>
/// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
public int Sample(float[] cmf)
/// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
/// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
public int Sample(float[] cmf, int branchSize)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the float[] might be larger than we need now, we also pass the effective size of the array.

We could instead (or in additionally) repeat the final sumProb value in ComputeCdf()

{
var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
var cls = 0;
while (cmf[cls] < p)
{
Expand All @@ -44,5 +45,15 @@ public int Sample(float[] cmf)

return cls;
}

/// <summary>
/// Samples from the Multinomial distribution defined by the provided cumulative
/// mass function.
/// </summary>
/// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
public int Sample(float[] cmf)
{
return Sample(cmf, cmf.Length);
}
}
}
Loading