Unity-Technologies · vincentpierre · Apr 13, 2021 · Apr 8, 2021 · Apr 8, 2021 · Apr 9, 2021
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -29,6 +29,7 @@ produces two `Match3Sensor`s (unless there are no special types). Previously tra
 sizes and will need to be retrained. (#5181)
 - The `AbstractBoard` class for integration with Match-3 games was changed to make it easier to support boards with
 different sizes using the same model. For a summary of the interface changes, please see the Migration Guide. (##5189)
+- Updated the Barracuda package to version `1.3.3-preview`(#5236)
 
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - The `--resume` flag now supports resuming experiments with additional reward providers or
@@ -38,7 +39,7 @@ different sizes using the same model. For a summary of the interface changes, pl
 
 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
-- The `.onnx` models input names have changed. All input placeholders will now use the prefix `obs_` removing the distinction between visual and vector observations. Models created with this version will not be usable with previous versions of the package (#5080)
+- The `.onnx` models input names have changed. All input placeholders will now use the prefix `obs_` removing the distinction between visual and vector observations. Models created with this version will not be usable with previous versions of the package (#5080, #5236)
 - The `.onnx` models discrete action output now contains the discrete actions values and not the logits. Models created with this version will not be usable with previous versions of the package (#5080)
 - Added ML-Agents package settings. (#5027)
 - Make com.unity.modules.unityanalytics an optional dependency. (#5109)

diff --git a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
@@ -182,7 +182,7 @@ public MemoryOutputApplier(
         public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
         {
             var agentIndex = 0;
-            var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
+            var memorySize = tensorProxy.data.width;
             for (var i = 0; i < actionIds.Count; i++)
             {
                 var agentId = actionIds[i];
@@ -194,6 +194,11 @@ public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int,
                     memory.AddRange(Enumerable.Repeat(0f, memorySize));
                 }
 
+                for (var j = 0; j < memorySize; j++)
+                {
+                    memory[j] = tensorProxy.data[agentIndex, 0, j, 0];
+                }
+
                 m_Memories[agentId] = memory;
                 agentIndex++;
             }

diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
@@ -77,15 +77,19 @@ public static IReadOnlyList<TensorProxy> GetInputTensors(this Model model)
                 });
             }
 
-            foreach (var mem in model.memories)
+            var modelVersion = model.GetVersion();
+            if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
             {
-                tensors.Add(new TensorProxy
+                foreach (var mem in model.memories)
                 {
-                    name = mem.input,
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    data = null,
-                    shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
-                });
+                    tensors.Add(new TensorProxy
+                    {
+                        name = mem.input,
+                        valueType = TensorProxy.TensorType.FloatingPoint,
+                        data = null,
+                        shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
+                    });
+                }
             }
 
             tensors.Sort((el1, el2) => string.Compare(el1.name, el2.name, StringComparison.InvariantCulture));
@@ -142,12 +146,20 @@ public static string[] GetOutputNames(this Model model)
                 names.Add(model.DiscreteOutputName());
             }
 
+            var modelVersion = model.GetVersion();
             var memory = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
             if (memory > 0)
             {
-                foreach (var mem in model.memories)
+                if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
+                {
+                    foreach (var mem in model.memories)
+                    {
+                        names.Add(mem.output);
+                    }
+                }
+                else
                 {
-                    names.Add(mem.output);
+                    names.Add(TensorNames.RecurrentOutput);
                 }
             }
 

diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
@@ -17,7 +17,21 @@ internal class BarracudaModelParamLoader
 
         internal enum ModelApiVersion
         {
+            /// <summary>
+            /// ML-Agents model version for versions 1.x.y
+            /// The observations are split between vector and visual observations
+            /// There are legacy action outputs for discrete and continuous actions
+            /// LSTM inputs and outputs are handled by Barracuda
+            /// </summary>
             MLAgents1_0 = 2,
+
+            /// <summary>
+            /// All observations are treated the same and named obs_{i} with i being
+            /// the sensor index
+            /// Legacy "action" output is no longer present
+            /// LSTM inputs and outputs are treated like regular inputs and outputs
+            /// and no longer managed by Barracuda
+            /// </summary>
             MLAgents2_0 = 3,
             MinSupportedVersion = MLAgents1_0,
             MaxSupportedVersion = MLAgents2_0
@@ -289,12 +303,22 @@ ISensor[] sensors
             // If the model has a non-negative memory size but requires a recurrent input
             if (memory > 0)
             {
-                if (!tensorsNames.Any(x => x.EndsWith("_h")) ||
-                    !tensorsNames.Any(x => x.EndsWith("_c")))
+                var modelVersion = model.GetVersion();
+                var netHasMemories = false;
+                if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
+                {
+                    netHasMemories = tensorsNames.Any(x => x.EndsWith("_h")) &&
+                        tensorsNames.Any(x => x.EndsWith("_c"));
+                }
+                else
+                {
+                    netHasMemories = tensorsNames.Any(x => x == TensorNames.RecurrentInPlaceholder);
+                }
+                if (!netHasMemories)
                 {
                     failedModelChecks.Add(
-                        FailedCheck.Warning("The model does not contain a Recurrent Input Node but has memory_size.")
-                        );
+                            FailedCheck.Warning("The model does not contain a Recurrent Input Node but has memory_size.")
+                            );
                 }
             }
 
@@ -329,15 +353,27 @@ static IEnumerable<FailedCheck> CheckOutputTensorPresence(Model model, int memor
             // If there is no Recurrent Output but the model is Recurrent.
             if (memory > 0)
             {
-                var memOutputs = model.memories.Select(x => x.output).ToList();
 
-                if (!memOutputs.Any(x => x.EndsWith("_h")) ||
-                    !memOutputs.Any(x => x.EndsWith("_c")))
+                var netHasMemories = false;
+                var modelVersion = model.GetVersion();
+                if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
+                {
+                    var memOutputs = model.memories.Select(x => x.output).ToList();
+                    netHasMemories = memOutputs.Any(x => x.EndsWith("_h")) &&
+                        memOutputs.Any(x => x.EndsWith("_c"));
+                }
+                else
+                {
+                    var allOutputs = model.GetOutputNames().ToList();
+                    netHasMemories = allOutputs.Any(x => x == TensorNames.RecurrentOutput);
+                }
+                if (!netHasMemories)
                 {
                     failedModelChecks.Add(
                         FailedCheck.Warning("The model does not contain a Recurrent Output Node but has memory_size.")
                         );
                 }
+
             }
             return failedModelChecks;
         }

diff --git a/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs b/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
@@ -96,7 +96,8 @@ public void Generate(
         {
             TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
 
-            var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1];
+            var memorySize = tensorProxy.data.width;
+
             var agentIndex = 0;
             for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
             {
@@ -112,7 +113,7 @@ public void Generate(
                 {
                     for (var j = 0; j < memorySize; j++)
                     {
-                        tensorProxy.data[agentIndex, j] = 0;
+                        tensorProxy.data[agentIndex, 0, j, 0] = 0;
                     }
                     agentIndex++;
                     continue;
@@ -123,7 +124,7 @@ public void Generate(
                     {
                         break;
                     }
-                    tensorProxy.data[agentIndex, j] = memory[j];
+                    tensorProxy.data[agentIndex, 0, j, 0] = memory[j];
                 }
                 agentIndex++;
             }

diff --git a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
@@ -67,10 +67,10 @@ public TensorApplier(
                 var tensorName = model.ContinuousOutputName();
                 m_Dict[tensorName] = new ContinuousActionOutputApplier(actionSpec);
             }
+            var modelVersion = model.GetVersion();
             if (actionSpec.NumDiscreteActions > 0)
             {
                 var tensorName = model.DiscreteOutputName();
-                var modelVersion = model.GetVersion();
                 if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents1_0)
                 {
                     m_Dict[tensorName] = new LegacyDiscreteActionOutputApplier(actionSpec, seed, allocator);
@@ -82,10 +82,13 @@ public TensorApplier(
             }
             m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);
 
-            for (var i = 0; i < model?.memories.Count; i++)
+            if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
             {
-                m_Dict[model.memories[i].output] =
-                    new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
+                for (var i = 0; i < model?.memories.Count; i++)
+                {
+                    m_Dict[model.memories[i].output] =
+                        new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
+                }
             }
         }
 

diff --git a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
@@ -67,10 +67,13 @@ public TensorGenerator(
             m_Dict[TensorNames.RecurrentInPlaceholder] =
                 new RecurrentInputGenerator(allocator, memories);
 
-            for (var i = 0; i < model.memories.Count; i++)
+            if (m_ApiVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
             {
-                m_Dict[model.memories[i].input] =
-                    new BarracudaRecurrentInputGenerator(i, allocator, memories);
+                for (var i = 0; i < model.memories.Count; i++)
+                {
+                    m_Dict[model.memories[i].input] =
+                        new BarracudaRecurrentInputGenerator(i, allocator, memories);
+                }
             }
 
             m_Dict[TensorNames.PreviousActionPlaceholder] =

diff --git a/com.unity.ml-agents/package.json b/com.unity.ml-agents/package.json
@@ -5,7 +5,7 @@
   "unity": "2019.4",
   "description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
   "dependencies": {
-    "com.unity.barracuda": "1.3.2-preview",
+    "com.unity.barracuda": "1.3.3-preview",
     "com.unity.modules.imageconversion": "1.0.0",
     "com.unity.modules.jsonserialize": "1.0.0"
   }

diff --git a/ml-agents/mlagents/trainers/torch/layers.py b/ml-agents/mlagents/trainers/torch/layers.py
@@ -2,6 +2,7 @@
 import abc
 from typing import Tuple
 from enum import Enum
+from mlagents.trainers.torch.model_serialization import exporting_to_onnx
 
 
 class Swish(torch.nn.Module):
@@ -206,7 +207,19 @@ def forward(
         # We don't use torch.split here since it is not supported by Barracuda
         h0 = memories[:, :, : self.hidden_size].contiguous()
         c0 = memories[:, :, self.hidden_size :].contiguous()
+
+        if exporting_to_onnx.is_exporting():
+            # This transpose is needed both at input and output of the LSTM when
+            # exporting because ONNX will expect (sequence_len, batch, memory_size)
+            # instead of (batch, sequence_len, memory_size)
+            h0 = torch.transpose(h0, 0, 1)
+            c0 = torch.transpose(c0, 0, 1)
+
         hidden = (h0, c0)
         lstm_out, hidden_out = self.lstm(input_tensor, hidden)
         output_mem = torch.cat(hidden_out, dim=-1)
+
+        if exporting_to_onnx.is_exporting():
+            output_mem = torch.transpose(output_mem, 0, 1)
+
         return lstm_out, output_mem
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
@@ -546,7 +546,7 @@ def forward(
 
 
 class SimpleActor(nn.Module, Actor):
-    MODEL_EXPORT_VERSION = 3
+    MODEL_EXPORT_VERSION = 3  # Corresponds to ModelApiVersion.MLAgents2_0
 
     def __init__(
         self,
@@ -643,6 +643,7 @@ def forward(
         At this moment, torch.onnx.export() doesn't accept None as tensor to be exported,
         so the size of return tuple varies with action spec.
         """
+
         encoding, memories_out = self.network_body(
             inputs, memories=memories, sequence_length=1
         )