Unity-Technologies · chriselion · Feb 9, 2021 · Feb 6, 2021 · Feb 8, 2021 · Feb 8, 2021
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -52,6 +52,7 @@ and this project adheres to
 - Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
 - Removed several memory allocations that happened during inference. On a test scene, this
   reduced the amount of memory allocated by approximately 25%. (#4887)
+- Removed several memory allocations that happened during inference with discrete actions. (#4922)
 - Properly catch permission errors when writing timer files. (#4921)
 
 #### ml-agents / ml-agents-envs / gym-unity (Python)

diff --git a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
@@ -1,4 +1,3 @@
-using System;
 using System.Collections.Generic;
 using System.Linq;
 using Unity.MLAgents.Inference.Utils;
@@ -55,62 +54,26 @@ internal class DiscreteActionOutputApplier : TensorApplier.IApplier
     {
         readonly int[] m_ActionSize;
         readonly Multinomial m_Multinomial;
-        readonly ITensorAllocator m_Allocator;
         readonly ActionSpec m_ActionSpec;
+        readonly int[] m_StartActionIndices;
+        readonly float[] m_CdfBuffer;
+
 
         public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
         {
             m_ActionSize = actionSpec.BranchSizes;
             m_Multinomial = new Multinomial(seed);
-            m_Allocator = allocator;
             m_ActionSpec = actionSpec;
+            m_StartActionIndices = Utilities.CumSum(m_ActionSize);
+
+            // Scratch space for computing the cumulative distribution function.
+            // In order to reuse it, make it the size of the largest branch.
+            var largestBranch = Mathf.Max(m_ActionSize);
+            m_CdfBuffer = new float[largestBranch];
         }
 
         public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
         {
-            //var tensorDataProbabilities = tensorProxy.Data as float[,];
-            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
-            var batchSize = idActionPairList.Count;
-            var actionValues = new float[batchSize, m_ActionSize.Length];
-            var startActionIndices = Utilities.CumSum(m_ActionSize);
-            for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
-            {
-                var nBranchAction = m_ActionSize[actionIndex];
-                var actionProbs = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, nBranchAction },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
-                };
-
-                for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
-                {
-                    for (var branchActionIndex = 0;
-                         branchActionIndex < nBranchAction;
-                         branchActionIndex++)
-                    {
-                        actionProbs.data[batchIndex, branchActionIndex] =
-                            tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
-                    }
-                }
-
-                var outputTensor = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, 1 },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
-                };
-
-                Eval(actionProbs, outputTensor, m_Multinomial);
-
-                for (var ii = 0; ii < batchSize; ii++)
-                {
-                    actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
-                }
-                actionProbs.data.Dispose();
-                outputTensor.data.Dispose();
-            }
-
             var agentIndex = 0;
             for (var i = 0; i < actionIds.Count; i++)
             {
@@ -126,74 +89,38 @@ public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int,
                     var discreteBuffer = actionBuffer.DiscreteActions;
                     for (var j = 0; j < m_ActionSize.Length; j++)
                     {
-                        discreteBuffer[j] = (int)actionValues[agentIndex, j];
+                        ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
+                        discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
                     }
                 }
                 agentIndex++;
             }
         }
 
         /// <summary>
-        /// Draw samples from a multinomial distribution based on log-probabilities specified
-        /// in tensor src. The samples will be saved in the dst tensor.
+        /// Compute the cumulative distribution function for a given agent's action
+        /// given the log-probabilities.
+        /// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
         /// </summary>
-        /// <param name="src">2-D tensor with shape batch_size x num_classes</param>
-        /// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
-        /// <param name="multinomial">Multinomial object used to sample values</param>
-        /// <exception cref="NotImplementedException">
-        /// Multinomial doesn't support integer tensors
-        /// </exception>
-        /// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
-        /// <exception cref="ArgumentNullException">
-        /// At least one of the tensors is not allocated
-        /// </exception>
-        public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
+        /// <param name="logProbs"></param>
+        /// <param name="batch">Index of the agent being considered</param>
+        /// <param name="channelOffset">Offset into the tensor's channel.</param>
+        /// <param name="branchSize"></param>
+        internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
         {
-            if (src.DataType != typeof(float))
+            // Find the class maximum
+            var maxProb = float.NegativeInfinity;
+            for (var cls = 0; cls < branchSize; ++cls)
             {
-                throw new NotImplementedException("Only float tensors are currently supported");
+                maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
             }
 
-            if (src.valueType != dst.valueType)
+            // Sum the log probabilities and compute CDF
+            var sumProb = 0.0f;
+            for (var cls = 0; cls < branchSize; ++cls)
             {
-                throw new ArgumentException(
-                    "Source and destination tensors have different types!");
-            }
-
-            if (src.data == null || dst.data == null)
-            {
-                throw new ArgumentNullException();
-            }
-
-            if (src.data.batch != dst.data.batch)
-            {
-                throw new ArgumentException("Batch size for input and output data is different!");
-            }
-
-            var cdf = new float[src.data.channels];
-
-            for (var batch = 0; batch < src.data.batch; ++batch)
-            {
-                // Find the class maximum
-                var maxProb = float.NegativeInfinity;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    maxProb = Mathf.Max(src.data[batch, cls], maxProb);
-                }
-
-                // Sum the log probabilities and compute CDF
-                var sumProb = 0.0f;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
-                    cdf[cls] = sumProb;
-                }
-
-                // Generate the samples
-                for (var sample = 0; sample < dst.data.channels; ++sample)
-                {
-                    dst.data[batch, sample] = multinomial.Sample(cdf);
-                }
+                sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
+                m_CdfBuffer[cls] = sumProb;
             }
         }
     }

diff --git a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
@@ -32,10 +32,11 @@ public Multinomial(int seed)
         /// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
         /// the array will be 1.0.
         /// </param>
-        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
-        public int Sample(float[] cmf)
+        /// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
+        /// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
+        public int Sample(float[] cmf, int branchSize)
         {
-            var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
+            var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
             var cls = 0;
             while (cmf[cls] < p)
             {
@@ -44,5 +45,15 @@ public int Sample(float[] cmf)
 
             return cls;
         }
+
+        /// <summary>
+        /// Samples from the Multinomial distribution defined by the provided cumulative
+        /// mass function.
+        /// </summary>
+        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
+        public int Sample(float[] cmf)
+        {
+            return Sample(cmf, cmf.Length);
+        }
     }
 }