Unity-Technologies · vincentpierre · Apr 13, 2021 · Apr 8, 2021 · Apr 8, 2021 · Apr 9, 2021
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -29,6 +29,7 @@ produces two `Match3Sensor`s (unless there are no special types). Previously tra
 sizes and will need to be retrained. (#5181)
 - The `AbstractBoard` class for integration with Match-3 games was changed to make it easier to support boards with
 different sizes using the same model. For a summary of the interface changes, please see the Migration Guide. (##5189)
+- Updated the Barracuda package to version `1.3.3-preview`(#5236)
 
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - The `--resume` flag now supports resuming experiments with additional reward providers or
@@ -38,7 +39,7 @@ different sizes using the same model. For a summary of the interface changes, pl
 
 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
-- The `.onnx` models input names have changed. All input placeholders will now use the prefix `obs_` removing the distinction between visual and vector observations. Models created with this version will not be usable with previous versions of the package (#5080)
+- The `.onnx` models input names have changed. All input placeholders will now use the prefix `obs_` removing the distinction between visual and vector observations. In addition, the inputs and outputs of LSTM changed. Models created with this version will not be usable with previous versions of the package (#5080, #5236)
 - The `.onnx` models discrete action output now contains the discrete actions values and not the logits. Models created with this version will not be usable with previous versions of the package (#5080)
 - Added ML-Agents package settings. (#5027)
 - Make com.unity.modules.unityanalytics an optional dependency. (#5109)

diff --git a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
@@ -182,7 +182,7 @@ public MemoryOutputApplier(
         public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
         {
             var agentIndex = 0;
-            var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
+            var memorySize = tensorProxy.data.width;
             for (var i = 0; i < actionIds.Count; i++)
             {
                 var agentId = actionIds[i];
@@ -194,6 +194,11 @@ public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int,
                     memory.AddRange(Enumerable.Repeat(0f, memorySize));
                 }
 
+                for (var j = 0; j < memorySize; j++)
+                {
+                    memory[j] = tensorProxy.data[agentIndex, 0, j, 0];
+                }
+
                 m_Memories[agentId] = memory;
                 agentIndex++;
             }

diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
@@ -77,15 +77,19 @@ public static IReadOnlyList<TensorProxy> GetInputTensors(this Model model)
                 });
             }
 
-            foreach (var mem in model.memories)
+            var modelVersion = model.GetVersion();
+            if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
             {
-                tensors.Add(new TensorProxy
+                foreach (var mem in model.memories)
                 {
-                    name = mem.input,
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    data = null,
-                    shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
-                });
+                    tensors.Add(new TensorProxy
+                    {
+                        name = mem.input,
+                        valueType = TensorProxy.TensorType.FloatingPoint,
+                        data = null,
+                        shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
+                    });
+                }
             }
 
             tensors.Sort((el1, el2) => string.Compare(el1.name, el2.name, StringComparison.InvariantCulture));
@@ -142,12 +146,20 @@ public static string[] GetOutputNames(this Model model)
                 names.Add(model.DiscreteOutputName());
             }
 
+            var modelVersion = model.GetVersion();
             var memory = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
             if (memory > 0)
             {
-                foreach (var mem in model.memories)
+                if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
+                {
+                    foreach (var mem in model.memories)
+                    {
+                        names.Add(mem.output);
+                    }
+                }
+                else
                 {
-                    names.Add(mem.output);
+                    names.Add(TensorNames.RecurrentOutput);
                 }
             }
 

diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
@@ -17,7 +17,21 @@ internal class BarracudaModelParamLoader
 
         internal enum ModelApiVersion
         {
+            /// <summary>
+            /// ML-Agents model version for versions 1.x.y
+            /// The observations are split between vector and visual observations
+            /// There are legacy action outputs for discrete and continuous actions
+            /// LSTM inputs and outputs are handled by Barracuda
+            /// </summary>
             MLAgents1_0 = 2,
+
+            /// <summary>
+            /// All observations are treated the same and named obs_{i} with i being
+            /// the sensor index
+            /// Legacy "action" output is no longer present
+            /// LSTM inputs and outputs are treated like regular inputs and outputs
+            /// and no longer managed by Barracuda
+            /// </summary>
             MLAgents2_0 = 3,
             MinSupportedVersion = MLAgents1_0,
             MaxSupportedVersion = MLAgents2_0
@@ -289,12 +303,22 @@ ISensor[] sensors
             // If the model has a non-negative memory size but requires a recurrent input
             if (memory > 0)
             {
-                if (!tensorsNames.Any(x => x.EndsWith("_h")) ||
-                    !tensorsNames.Any(x => x.EndsWith("_c")))
+                var modelVersion = model.GetVersion();
+                var netHasMemories = false;
+                if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
+                {
+                    netHasMemories = tensorsNames.Any(x => x.EndsWith("_h")) &&
+                        tensorsNames.Any(x => x.EndsWith("_c"));
+                }
+                else
+                {
+                    netHasMemories = tensorsNames.Any(x => x == TensorNames.RecurrentInPlaceholder);
+                }
+                if (!netHasMemories)
                 {
                     failedModelChecks.Add(
-                        FailedCheck.Warning("The model does not contain a Recurrent Input Node but has memory_size.")
-                        );
+                            FailedCheck.Warning("The model does not contain a Recurrent Input Node but has memory_size.")
+                            );
                 }
             }
 
@@ -329,15 +353,27 @@ static IEnumerable<FailedCheck> CheckOutputTensorPresence(Model model, int memor
             // If there is no Recurrent Output but the model is Recurrent.
             if (memory > 0)
             {
-                var memOutputs = model.memories.Select(x => x.output).ToList();
 
-                if (!memOutputs.Any(x => x.EndsWith("_h")) ||
-                    !memOutputs.Any(x => x.EndsWith("_c")))
+                var netHasMemories = false;
+                var modelVersion = model.GetVersion();
+                if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
+                {
+                    var memOutputs = model.memories.Select(x => x.output).ToList();
+                    netHasMemories = memOutputs.Any(x => x.EndsWith("_h")) &&
+                        memOutputs.Any(x => x.EndsWith("_c"));
+                }
+                else
+                {
+                    var allOutputs = model.GetOutputNames().ToList();
+                    netHasMemories = allOutputs.Any(x => x == TensorNames.RecurrentOutput);
+                }
+                if (!netHasMemories)
                 {
                     failedModelChecks.Add(
                         FailedCheck.Warning("The model does not contain a Recurrent Output Node but has memory_size.")
                         );
                 }
+
             }
             return failedModelChecks;
         }

diff --git a/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs b/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
@@ -96,7 +96,8 @@ public void Generate(
         {
             TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
 
-            var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1];
+            var memorySize = tensorProxy.data.width;
+
             var agentIndex = 0;
             for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
             {
@@ -112,7 +113,7 @@ public void Generate(
                 {
                     for (var j = 0; j < memorySize; j++)
                     {
-                        tensorProxy.data[agentIndex, j] = 0;
+                        tensorProxy.data[agentIndex, 0, j, 0] = 0;
                     }
                     agentIndex++;
                     continue;
@@ -123,7 +124,7 @@ public void Generate(
                     {
                         break;
                     }
-                    tensorProxy.data[agentIndex, j] = memory[j];
+                    tensorProxy.data[agentIndex, 0, j, 0] = memory[j];
                 }
                 agentIndex++;
             }

diff --git a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
@@ -67,10 +67,10 @@ public TensorApplier(
                 var tensorName = model.ContinuousOutputName();
                 m_Dict[tensorName] = new ContinuousActionOutputApplier(actionSpec);
             }
+            var modelVersion = model.GetVersion();
             if (actionSpec.NumDiscreteActions > 0)
             {
                 var tensorName = model.DiscreteOutputName();
-                var modelVersion = model.GetVersion();
                 if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents1_0)
                 {
                     m_Dict[tensorName] = new LegacyDiscreteActionOutputApplier(actionSpec, seed, allocator);
@@ -82,10 +82,13 @@ public TensorApplier(
             }
             m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(memories);
 
-            for (var i = 0; i < model?.memories.Count; i++)
+            if (modelVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
             {
-                m_Dict[model.memories[i].output] =
-                    new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
+                for (var i = 0; i < model?.memories.Count; i++)
+                {
+                    m_Dict[model.memories[i].output] =
+                        new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
+                }
             }
         }
 

diff --git a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
@@ -67,10 +67,13 @@ public TensorGenerator(
             m_Dict[TensorNames.RecurrentInPlaceholder] =
                 new RecurrentInputGenerator(allocator, memories);
 
-            for (var i = 0; i < model.memories.Count; i++)
+            if (m_ApiVersion < (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents2_0)
             {
-                m_Dict[model.memories[i].input] =
-                    new BarracudaRecurrentInputGenerator(i, allocator, memories);
+                for (var i = 0; i < model.memories.Count; i++)
+                {
+                    m_Dict[model.memories[i].input] =
+                        new BarracudaRecurrentInputGenerator(i, allocator, memories);
+                }
             }
 
             m_Dict[TensorNames.PreviousActionPlaceholder] =

diff --git a/com.unity.ml-agents/package.json b/com.unity.ml-agents/package.json
@@ -5,7 +5,7 @@
   "unity": "2019.4",
   "description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
   "dependencies": {
-    "com.unity.barracuda": "1.3.2-preview",
+    "com.unity.barracuda": "1.3.3-preview",
     "com.unity.modules.imageconversion": "1.0.0",
     "com.unity.modules.jsonserialize": "1.0.0"
   }

diff --git a/ml-agents/mlagents/trainers/torch/layers.py b/ml-agents/mlagents/trainers/torch/layers.py
@@ -2,6 +2,7 @@
 import abc
 from typing import Tuple
 from enum import Enum
+from mlagents.trainers.torch.model_serialization import exporting_to_onnx
 
 
 class Swish(torch.nn.Module):
@@ -203,10 +204,22 @@ def memory_size(self) -> int:
     def forward(
         self, input_tensor: torch.Tensor, memories: torch.Tensor
     ) -> Tuple[torch.Tensor, torch.Tensor]:
+
+        if exporting_to_onnx.is_exporting():
+            # This transpose is needed both at input and output of the LSTM when
+            # exporting because ONNX will expect (sequence_len, batch, memory_size)
+            # instead of (batch, sequence_len, memory_size)
+            memories = torch.transpose(memories, 0, 1)
+
         # We don't use torch.split here since it is not supported by Barracuda
         h0 = memories[:, :, : self.hidden_size].contiguous()
         c0 = memories[:, :, self.hidden_size :].contiguous()
+
         hidden = (h0, c0)
         lstm_out, hidden_out = self.lstm(input_tensor, hidden)
         output_mem = torch.cat(hidden_out, dim=-1)
+
+        if exporting_to_onnx.is_exporting():
+            output_mem = torch.transpose(output_mem, 0, 1)
+
         return lstm_out, output_mem
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
@@ -546,7 +546,7 @@ def forward(
 
 
 class SimpleActor(nn.Module, Actor):
-    MODEL_EXPORT_VERSION = 3
+    MODEL_EXPORT_VERSION = 3  # Corresponds to ModelApiVersion.MLAgents2_0
 
     def __init__(
         self,