From 5624d002cd33f8eb9a935793aa446a662a529039 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 13:49:22 -0700 Subject: [PATCH 01/17] Initial commit removing memories from C# and deprecating memory fields in proto --- .../Editor/Tests/DemonstrationTests.cs | 11 +- .../EditModeTestInternalBrainTensorApplier.cs | 54 ++----- ...ditModeTestInternalBrainTensorGenerator.cs | 32 +--- UnitySDK/Assets/ML-Agents/Scripts/Agent.cs | 31 ---- .../ML-Agents/Scripts/Grpc/GrpcExtensions.cs | 7 +- .../Assets/ML-Agents/Scripts/ICommunicator.cs | 17 ++- .../Scripts/InferenceBrain/ApplierImpl.cs | 71 +++++---- .../Scripts/InferenceBrain/GeneratorImpl.cs | 139 +++++++++++------- .../Scripts/InferenceBrain/ModelRunner.cs | 9 +- .../Scripts/InferenceBrain/TensorApplier.cs | 13 +- .../Scripts/InferenceBrain/TensorGenerator.cs | 15 +- .../Scripts/Policy/BarracudaPolicy.cs | 5 +- .../ML-Agents/Scripts/Policy/RemotePolicy.cs | 2 +- .../communicator_objects/agent_action.proto | 2 +- .../communicator_objects/agent_info.proto | 2 +- 15 files changed, 190 insertions(+), 220 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs index 4500bd21ed..8c79d51d1d 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs @@ -33,8 +33,8 @@ public void TestStoreInitalize() { vectorObservationSize = 3, numStackedVectorObservations = 2, - vectorActionDescriptions = new[] {"TestActionA", "TestActionB"}, - vectorActionSize = new[] {2, 2}, + vectorActionDescriptions = new[] { "TestActionA", "TestActionB" }, + vectorActionSize = new[] { 2, 2 }, vectorActionSpaceType = SpaceType.Discrete }; @@ -46,14 +46,13 @@ public void TestStoreInitalize() var agentInfo = new AgentInfo { reward = 1f, - actionMasks = new[] {false, true}, + actionMasks = new[] { false, true }, done = true, id = 5, maxStepReached = true, - memories = new List(), - stackedVectorObservation = new List() {1f, 1f, 1f}, + stackedVectorObservation = new List() { 1f, 1f, 1f }, storedTextActions = "TestAction", - storedVectorActions = new[] {0f, 1f}, + storedVectorActions = new[] { 0f, 1f }, textObservation = "TestAction", }; diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs index 3b3ef2245d..232dbbcc91 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs @@ -13,7 +13,7 @@ private class TestAgent : Agent { public AgentAction GetAction() { - var f = typeof(Agent).GetField( + var f = typeof(Agent).GetField( "m_Action", BindingFlags.Instance | BindingFlags.NonPublic); return (AgentAction)f.GetValue(this); } @@ -26,7 +26,7 @@ private List GetFakeAgentInfos() var goB = new GameObject("goB"); var agentB = goB.AddComponent(); - return new List {agentA, agentB}; + return new List { agentA, agentB }; } [Test] @@ -44,13 +44,13 @@ public void ApplyContinuousActionOutput() { var inputTensor = new TensorProxy() { - shape = new long[] {2, 3}, - data = new Tensor(2, 3, new float[] {1, 2, 3, 4, 5, 6}) + shape = new long[] { 2, 3 }, + data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 }) }; var agentInfos = GetFakeAgentInfos(); var applier = new ContinuousActionOutputApplier(); - applier.Apply(inputTensor, agentInfos); + applier.Apply(inputTensor, agentInfos, null); var agents = agentInfos; var agent = agents[0] as TestAgent; @@ -73,16 +73,16 @@ public void ApplyDiscreteActionOutput() { var inputTensor = new TensorProxy() { - shape = new long[] {2, 5}, + shape = new long[] { 2, 5 }, data = new Tensor( 2, 5, - new[] {0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f}) + new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f }) }; var agentInfos = GetFakeAgentInfos(); var alloc = new TensorCachingAllocator(); - var applier = new DiscreteActionOutputApplier(new[] {2, 3}, 0, alloc); - applier.Apply(inputTensor, agentInfos); + var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc); + applier.Apply(inputTensor, agentInfos, null); var agents = agentInfos; var agent = agents[0] as TestAgent; @@ -99,48 +99,18 @@ public void ApplyDiscreteActionOutput() alloc.Dispose(); } - [Test] - public void ApplyMemoryOutput() - { - var inputTensor = new TensorProxy() - { - shape = new long[] {2, 5}, - data = new Tensor( - 2, - 5, - new[] {0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f}) - }; - var agentInfos = GetFakeAgentInfos(); - - var applier = new MemoryOutputApplier(); - applier.Apply(inputTensor, agentInfos); - var agents = agentInfos; - - var agent = agents[0] as TestAgent; - Assert.NotNull(agent); - var action = agent.GetAction(); - Assert.AreEqual(action.memories[0], 0.5f); - Assert.AreEqual(action.memories[1], 22.5f); - - agent = agents[1] as TestAgent; - Assert.NotNull(agent); - action = agent.GetAction(); - Assert.AreEqual(action.memories[2], 6); - Assert.AreEqual(action.memories[3], 7); - } - [Test] public void ApplyValueEstimate() { var inputTensor = new TensorProxy() { - shape = new long[] {2, 1}, - data = new Tensor(2, 1, new[] {0.5f, 8f}) + shape = new long[] { 2, 1 }, + data = new Tensor(2, 1, new[] { 0.5f, 8f }) }; var agentInfos = GetFakeAgentInfos(); var applier = new ValueEstimateApplier(); - applier.Apply(inputTensor, agentInfos); + applier.Apply(inputTensor, agentInfos, null); var agents = agentInfos; var agent = agents[0] as TestAgent; diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs index 0d64b92214..b58b985746 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs @@ -16,7 +16,6 @@ private static IEnumerable GetFakeAgentInfos() var infoA = new AgentInfo { stackedVectorObservation = new[] { 1f, 2f, 3f }.ToList(), - memories = null, storedVectorActions = new[] { 1f, 2f }, actionMasks = null }; @@ -25,7 +24,6 @@ private static IEnumerable GetFakeAgentInfos() var infoB = new AgentInfo { stackedVectorObservation = new[] { 4f, 5f, 6f }.ToList(), - memories = new[] { 1f, 1f, 1f }.ToList(), storedVectorActions = new[] { 3f, 4f }, actionMasks = new[] { true, false, false, false, false }, }; @@ -52,7 +50,7 @@ public void GenerateBatchSize() var alloc = new TensorCachingAllocator(); const int batchSize = 4; var generator = new BatchSizeGenerator(alloc); - generator.Generate(inputTensor, batchSize, null); + generator.Generate(inputTensor, batchSize, null, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0], batchSize); alloc.Dispose(); @@ -65,7 +63,7 @@ public void GenerateSequenceLength() var alloc = new TensorCachingAllocator(); const int batchSize = 4; var generator = new SequenceLengthGenerator(alloc); - generator.Generate(inputTensor, batchSize, null); + generator.Generate(inputTensor, batchSize, null, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0], 1); alloc.Dispose(); @@ -82,7 +80,7 @@ public void GenerateVectorObservation() var agentInfos = GetFakeAgentInfos(); var alloc = new TensorCachingAllocator(); var generator = new VectorObservationGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos); + generator.Generate(inputTensor, batchSize, agentInfos, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0, 0], 1); Assert.AreEqual(inputTensor.data[0, 2], 3); @@ -91,26 +89,6 @@ public void GenerateVectorObservation() alloc.Dispose(); } - [Test] - public void GenerateRecurrentInput() - { - var inputTensor = new TensorProxy - { - shape = new long[] { 2, 5 } - }; - const int batchSize = 4; - var agentInfos = GetFakeAgentInfos(); - var alloc = new TensorCachingAllocator(); - var generator = new RecurrentInputGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos); - Assert.IsNotNull(inputTensor.data); - Assert.AreEqual(inputTensor.data[0, 0], 0); - Assert.AreEqual(inputTensor.data[0, 4], 0); - Assert.AreEqual(inputTensor.data[1, 0], 1); - Assert.AreEqual(inputTensor.data[1, 4], 0); - alloc.Dispose(); - } - [Test] public void GeneratePreviousActionInput() { @@ -124,7 +102,7 @@ public void GeneratePreviousActionInput() var alloc = new TensorCachingAllocator(); var generator = new PreviousActionInputGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos); + generator.Generate(inputTensor, batchSize, agentInfos, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0, 0], 1); Assert.AreEqual(inputTensor.data[0, 1], 2); @@ -145,7 +123,7 @@ public void GenerateActionMaskInput() var agentInfos = GetFakeAgentInfos(); var alloc = new TensorCachingAllocator(); var generator = new ActionMaskInputGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos); + generator.Generate(inputTensor, batchSize, agentInfos, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0, 0], 1); Assert.AreEqual(inputTensor.data[0, 4], 1); diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs index d05a65526b..108720b90e 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs @@ -51,14 +51,6 @@ public struct AgentInfo /// public bool[] actionMasks; - /// - /// Used by the Trainer to store information about the agent. This data - /// structure is not consumed or modified by the agent directly, they are - /// just the owners of their trainier's memory. Currently, however, the - /// size of the memory is in the Brain properties. - /// - public List memories; - /// /// Current agent reward. /// @@ -96,7 +88,6 @@ public struct AgentAction { public float[] vectorActions; public string textActions; - public List memories; public float value; /// TODO(cgoy): All references to protobuf objects should be removed. public CommunicatorObjects.CustomActionProto customAction; @@ -484,8 +475,6 @@ void ResetData() if (m_Info.textObservation == null) m_Info.textObservation = ""; m_Action.textActions = ""; - m_Info.memories = new List(); - m_Action.memories = new List(); m_Info.vectorObservation = new List(param.vectorObservationSize); m_Info.stackedVectorObservation = @@ -563,7 +552,6 @@ void SendInfoToBrain() return; } - m_Info.memories = m_Action.memories; m_Info.storedVectorActions = m_Action.vectorActions; m_Info.storedTextActions = m_Action.textActions; m_Info.vectorObservation.Clear(); @@ -902,25 +890,6 @@ public void UpdateVectorAction(float[] vectorActions) m_Action.vectorActions = vectorActions; } - /// - /// Updates the memories action. - /// - /// Memories. - public void UpdateMemoriesAction(List memories) - { - m_Action.memories = memories; - } - - public void AppendMemoriesAction(List memories) - { - m_Action.memories.AddRange(memories); - } - - public List GetMemoriesAction() - { - return m_Action.memories; - } - /// /// Updates the value of the agent. /// diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs index cc6cf9534c..31ec89e9e9 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs @@ -29,10 +29,6 @@ public static AgentInfoProto ToProto(this AgentInfo ai) Id = ai.id, CustomObservation = ai.customObservation }; - if (ai.memories != null) - { - agentInfoProto.Memories.Add(ai.memories); - } if (ai.actionMasks != null) { @@ -164,7 +160,6 @@ public static AgentAction ToAgentAction(this AgentActionProto aap) { vectorActions = aap.VectorActions.ToArray(), textActions = aap.TextActions, - memories = aap.Memories.ToList(), value = aap.Value, customAction = aap.CustomAction }; @@ -185,7 +180,7 @@ public static CompressedObservationProto ToProto(this CompressedObservation obs) var obsProto = new CompressedObservationProto { Data = ByteString.CopyFrom(obs.Data), - CompressionType = (CompressionTypeProto) obs.CompressionType, + CompressionType = (CompressionTypeProto)obs.CompressionType, }; obsProto.Shape.AddRange(obs.Shape); return obsProto; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs index 936a29394b..a706223c10 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs @@ -108,7 +108,7 @@ Since the messages are sent back and forth with exchange and simultaneously when UnityOutput and UnityInput can be extended to provide functionalities beyond RL UnityRLOutput and UnityRLInput can be extended to provide new RL functionalities */ - public interface ICommunicator : IBatchedDecisionMaker + public interface ICommunicator { /// /// Quit was received by the communicator. @@ -141,6 +141,15 @@ public interface ICommunicator : IBatchedDecisionMaker /// The Parameters for the Brain being registered void SubscribeBrain(string name, BrainParameters brainParameters); + /// + /// Sends the observations of one Agent. + /// + /// Batch Key. + /// Agent info. + void PutObservations(string brainKey, Agent agent); + + void DecideBatch(); + /// /// Gets the AgentActions based on the batching key. /// @@ -148,10 +157,4 @@ public interface ICommunicator : IBatchedDecisionMaker /// Dictionary GetActions(string key); } - - public interface IBatchedDecisionMaker : IDisposable - { - void PutObservations(string key, Agent agent); - void DecideBatch(); - } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs index ecdb434e20..4e756f597f 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs @@ -13,7 +13,10 @@ namespace MLAgents.InferenceBrain /// public class ContinuousActionOutputApplier : TensorApplier.IApplier { - public void Apply(TensorProxy tensorProxy, IEnumerable agents) + public void Apply(TensorProxy tensorProxy, + IEnumerable agents, + Dictionary> memories) { var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; var agentIndex = 0; @@ -47,7 +50,9 @@ public DiscreteActionOutputApplier(int[] actionSize, int seed, ITensorAllocator m_Allocator = allocator; } - public void Apply(TensorProxy tensorProxy, IEnumerable agents) + public void Apply(TensorProxy tensorProxy, + IEnumerable agents, + Dictionary> memories) { //var tensorDataProbabilities = tensorProxy.Data as float[,]; var agentsArray = agents as List ?? agents.ToList(); @@ -60,7 +65,7 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) var actionProbs = new TensorProxy() { valueType = TensorProxy.TensorType.FloatingPoint, - shape = new long[] {batchSize, nBranchAction}, + shape = new long[] { batchSize, nBranchAction }, data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction)) }; @@ -78,7 +83,7 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) var outputTensor = new TensorProxy() { valueType = TensorProxy.TensorType.FloatingPoint, - shape = new long[] {batchSize, 1}, + shape = new long[] { batchSize, 1 }, data = m_Allocator.Alloc(new TensorShape(batchSize, 1)) }; @@ -180,14 +185,16 @@ public BarracudaMemoryOutputApplier(int memoriesCount, int memoryIndex) m_MemoryIndex = memoryIndex; } - public void Apply(TensorProxy tensorProxy, IEnumerable agents) + public void Apply(TensorProxy tensorProxy, + IEnumerable agents, + Dictionary> memories) { var agentIndex = 0; var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1]; foreach (var agent in agents) { - var memory = agent.GetMemoriesAction(); + var memory = memories.ElementAtOrDefault(agent.Info.id).Value; if (memory == null || memory.Count < memorySize * m_MemoriesCount) { @@ -200,36 +207,38 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) memory[memorySize * m_MemoryIndex + j] = tensorProxy.data[agentIndex, j]; } - agent.UpdateMemoriesAction(memory); + memories[agent.Info.id] = memory; agentIndex++; } } } - /// - /// The Applier for the Memory output tensor. Tensor is assumed to contain the new - /// memory data of the agents in the batch. - /// - public class MemoryOutputApplier : TensorApplier.IApplier - { - public void Apply(TensorProxy tensorProxy, IEnumerable agents) - { - var agentIndex = 0; - var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1]; - foreach (var agent in agents) - { - var memory = new List(); - for (var j = 0; j < memorySize; j++) - { - memory.Add(tensorProxy.data[agentIndex, j]); - } + // /// + // /// The Applier for the Memory output tensor. Tensor is assumed to contain the new + // /// memory data of the agents in the batch. + // /// + // public class MemoryOutputApplier : TensorApplier.IApplier + // { + // public void Apply(TensorProxy tensorProxy, + // IEnumerable agents, + // Dictionary> memories) + // { + // var agentIndex = 0; + // var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1]; + // foreach (var agent in agents) + // { + // var memory = new List(); + // for (var j = 0; j < memorySize; j++) + // { + // memory.Add(tensorProxy.data[agentIndex, j]); + // } - agent.UpdateMemoriesAction(memory); - agentIndex++; - } - } - } + // agent.UpdateMemoriesAction(memory); + // agentIndex++; + // } + // } + // } /// /// The Applier for the Value Estimate output tensor. Tensor is assumed to contain the @@ -237,7 +246,9 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) /// public class ValueEstimateApplier : TensorApplier.IApplier { - public void Apply(TensorProxy tensorProxy, IEnumerable agents) + public void Apply(TensorProxy tensorProxy, + IEnumerable agents, + Dictionary> memories) { var agentIndex = 0; foreach (var agent in agents) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index c35461356b..08c6601284 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -20,7 +20,11 @@ public BiDimensionalOutputGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) + public void Generate( + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); } @@ -39,7 +43,11 @@ public BatchSizeGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) + public void Generate( + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { tensorProxy.data?.Dispose(); tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1)); @@ -62,7 +70,11 @@ public SequenceLengthGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) + public void Generate( + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { tensorProxy.shape = new long[0]; tensorProxy.data?.Dispose(); @@ -86,7 +98,10 @@ public VectorObservationGenerator(ITensorAllocator allocator) } public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1]; @@ -104,49 +119,52 @@ public void Generate( } } - /// - /// Generates the Tensor corresponding to the Recurrent input : Will be a two - /// dimensional float array of dimension [batchSize x memorySize]. - /// It will use the Memory data contained in the agentInfo to fill the data - /// of the tensor. - /// - public class RecurrentInputGenerator : TensorGenerator.IGenerator - { - private readonly ITensorAllocator m_Allocator; - - public RecurrentInputGenerator(ITensorAllocator allocator) - { - m_Allocator = allocator; - } - - public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) - { - TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); - - var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1]; - var agentIndex = 0; - foreach (var agent in agents) - { - var info = agent.Info; - var memory = info.memories; - if (memory == null) - { - agentIndex++; - continue; - } - for (var j = 0; j < Math.Min(memorySize, memory.Count); j++) - { - if (j >= memory.Count) - { - break; - } - tensorProxy.data[agentIndex, j] = memory[j]; - } - agentIndex++; - } - } - } + // /// + // /// Generates the Tensor corresponding to the Recurrent input : Will be a two + // /// dimensional float array of dimension [batchSize x memorySize]. + // /// It will use the Memory data contained in the agentInfo to fill the data + // /// of the tensor. + // /// + // public class RecurrentInputGenerator : TensorGenerator.IGenerator + // { + // private readonly ITensorAllocator m_Allocator; + + // public RecurrentInputGenerator(ITensorAllocator allocator) + // { + // m_Allocator = allocator; + // } + + // public void Generate( + // TensorProxy tensorProxy, + // int batchSize, + // IEnumerable agents, + // Dictionary> memories) + // { + // TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); + + // var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1]; + // var agentIndex = 0; + // foreach (var agent in agents) + // { + // var info = agent.Info; + // var memory = info.memories; + // if (memory == null) + // { + // agentIndex++; + // continue; + // } + // for (var j = 0; j < Math.Min(memorySize, memory.Count); j++) + // { + // if (j >= memory.Count) + // { + // break; + // } + // tensorProxy.data[agentIndex, j] = memory[j]; + // } + // agentIndex++; + // } + // } + // } public class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator { @@ -161,7 +179,10 @@ public BarracudaRecurrentInputGenerator(int memoryIndex, ITensorAllocator alloca } public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); @@ -170,7 +191,7 @@ public void Generate( foreach (var agent in agents) { var agentInfo = agent.Info; - var memory = agentInfo.memories; + var memory = memories.ElementAtOrDefault(agentInfo.id).Value; var offset = memorySize * m_MemoryIndex; @@ -208,7 +229,10 @@ public PreviousActionInputGenerator(ITensorAllocator allocator) } public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); @@ -244,7 +268,10 @@ public ActionMaskInputGenerator(ITensorAllocator allocator) } public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); @@ -281,7 +308,10 @@ public RandomNormalInputGenerator(int seed, ITensorAllocator allocator) } public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal); @@ -308,7 +338,10 @@ public VisualObservationInputGenerator( } public void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents) + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); var agentIndex = 0; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs index 85301e9313..51a74f5e05 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs @@ -4,7 +4,7 @@ namespace MLAgents.InferenceBrain { - public class ModelRunner : IBatchedDecisionMaker + public class ModelRunner { private List m_Agents = new List(); private ITensorAllocator m_TensorAllocator; @@ -18,6 +18,7 @@ public class ModelRunner : IBatchedDecisionMaker private string[] m_OutputNames; private IReadOnlyList m_InferenceInputs; private IReadOnlyList m_InferenceOutputs; + private Dictionary> m_Memories = new Dictionary>(); private bool m_visualObservationsInitialized = false; @@ -100,7 +101,7 @@ private List FetchBarracudaOutputs(string[] names) return outputs; } - public void PutObservations(string key, Agent agent) + public void PutObservations(Agent agent) { m_Agents.Add(agent); } @@ -125,7 +126,7 @@ public void DecideBatch() Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors"); // Prepare the input tensors to be feed into the engine - m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Agents); + m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Agents, m_Memories); Profiler.EndSample(); Profiler.BeginSample($"MLAgents.{m_Model.name}.PrepareBarracudaInputs"); @@ -143,7 +144,7 @@ public void DecideBatch() Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors"); // Update the outputs - m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_Agents); + m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_Agents, m_Memories); Profiler.EndSample(); Profiler.EndSample(); diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs index 610abb6cf9..00a2be112c 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs @@ -30,7 +30,12 @@ public interface IApplier /// /// List of Agents that will receive the values of the Tensor. /// - void Apply(TensorProxy tensorProxy, IEnumerable agents); + /// + /// The memories of all the agents + /// + void Apply(TensorProxy tensorProxy, + IEnumerable agents, + Dictionary> memories); } private readonly Dictionary m_Dict = new Dictionary(); @@ -56,7 +61,7 @@ public TensorApplier( m_Dict[TensorNames.ActionOutput] = new DiscreteActionOutputApplier(bp.vectorActionSize, seed, allocator); } - m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(); + // m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(); if (barracudaModel != null) { @@ -78,7 +83,7 @@ public TensorApplier( /// One of the tensor does not have an /// associated applier. public void ApplyTensors( - IEnumerable tensors, IEnumerable agents) + IEnumerable tensors, IEnumerable agents, Dictionary> memories) { foreach (var tensor in tensors) { @@ -87,7 +92,7 @@ public void ApplyTensors( throw new UnityAgentsException( $"Unknown tensorProxy expected as output : {tensor.name}"); } - m_Dict[tensor.name].Apply(tensor, agents); + m_Dict[tensor.name].Apply(tensor, agents, memories); } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs index 452f15f92b..cd3853bbce 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs @@ -28,7 +28,11 @@ public interface IGenerator /// List of Agents containing the /// information that will be used to populate the tensor's data void Generate( - TensorProxy tensorProxy, int batchSize, IEnumerable agents); + TensorProxy tensorProxy, + int batchSize, + IEnumerable agents, + Dictionary> memories + ); } private readonly Dictionary m_Dict = new Dictionary(); @@ -51,8 +55,8 @@ public TensorGenerator( new SequenceLengthGenerator(allocator); m_Dict[TensorNames.VectorObservationPlacholder] = new VectorObservationGenerator(allocator); - m_Dict[TensorNames.RecurrentInPlaceholder] = - new RecurrentInputGenerator(allocator); + // m_Dict[TensorNames.RecurrentInPlaceholder] = + // new RecurrentInputGenerator(allocator); if (barracudaModel != null) { @@ -102,7 +106,8 @@ public void InitializeVisualObservations(Agent agent, ITensorAllocator allocator public void GenerateTensors( IEnumerable tensors, int currentBatchSize, - IEnumerable agents) + IEnumerable agents, + Dictionary> memories) { foreach (var tensor in tensors) { @@ -111,7 +116,7 @@ public void GenerateTensors( throw new UnityAgentsException( $"Unknown tensorProxy expected as input : {tensor.name}"); } - m_Dict[tensor.name].Generate(tensor, currentBatchSize, agents); + m_Dict[tensor.name].Generate(tensor, currentBatchSize, agents, memories); } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs index 65d66f4239..f1fe5852e6 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs @@ -1,6 +1,7 @@ using UnityEngine; using Barracuda; using System.Collections.Generic; +using MLAgents.InferenceBrain; namespace MLAgents { @@ -18,7 +19,7 @@ public enum InferenceDevice public class BarracudaPolicy : IPolicy { - protected IBatchedDecisionMaker m_BatchedDecisionMaker; + protected ModelRunner m_BatchedDecisionMaker; /// /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their sensors. @@ -43,7 +44,7 @@ public void RequestDecision(Agent agent) #if DEBUG ValidateAgentSensorShapes(agent); #endif - m_BatchedDecisionMaker?.PutObservations(null, agent); + m_BatchedDecisionMaker?.PutObservations(agent); } /// diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs index 6ad30fde75..dfc3a2cb99 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs @@ -11,7 +11,7 @@ public class RemotePolicy : IPolicy { private string m_BehaviorName; - protected IBatchedDecisionMaker m_BatchedDecisionMaker; + protected ICommunicator m_BatchedDecisionMaker; /// /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their sensors. diff --git a/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_action.proto b/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_action.proto index 05a00a7f78..65f06e06cb 100644 --- a/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_action.proto +++ b/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_action.proto @@ -8,7 +8,7 @@ package communicator_objects; message AgentActionProto { repeated float vector_actions = 1; string text_actions = 2; - repeated float memories = 3; + reserved 3; //deprecated repeated float memories = 3; float value = 4; CustomActionProto custom_action = 5; } diff --git a/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_info.proto b/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_info.proto index f48130eb63..fee14c036b 100644 --- a/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_info.proto +++ b/protobuf-definitions/proto/mlagents/envs/communicator_objects/agent_info.proto @@ -12,7 +12,7 @@ message AgentInfoProto { string text_observation = 3; repeated float stored_vector_actions = 4; string stored_text_actions = 5; - repeated float memories = 6; + reserved 6; //repeated float memories = 6; float reward = 7; bool done = 8; bool max_step_reached = 9; From 33de00e5a4f9dfa895b8c347b30236e9dd8f79ed Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 14:21:52 -0700 Subject: [PATCH 02/17] initial changes to Python --- ml-agents-envs/mlagents/envs/brain.py | 42 ------------------- ml-agents-envs/mlagents/envs/environment.py | 34 +-------------- .../mlagents/envs/subprocess_env_manager.py | 4 +- ml-agents/mlagents/trainers/tf_policy.py | 2 +- 4 files changed, 4 insertions(+), 78 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/brain.py b/ml-agents-envs/mlagents/envs/brain.py index 1c33f7ec4d..e427a81414 100644 --- a/ml-agents-envs/mlagents/envs/brain.py +++ b/ml-agents-envs/mlagents/envs/brain.py @@ -95,7 +95,6 @@ def __init__( visual_observation, vector_observation, text_observations, - memory=None, reward=None, agents=None, local_done=None, @@ -111,7 +110,6 @@ def __init__( self.visual_observations = visual_observation self.vector_observations = vector_observation self.text_observations = text_observations - self.memories = memory self.rewards = reward self.local_done = local_done self.max_reached = max_reached @@ -121,33 +119,6 @@ def __init__( self.action_masks = action_mask self.custom_observations = custom_observations - def merge(self, other): - for i in range(len(self.visual_observations)): - self.visual_observations[i].extend(other.visual_observations[i]) - self.vector_observations = np.append( - self.vector_observations, other.vector_observations, axis=0 - ) - self.text_observations.extend(other.text_observations) - self.memories = self.merge_memories( - self.memories, other.memories, self.agents, other.agents - ) - self.rewards = safe_concat_lists(self.rewards, other.rewards) - self.local_done = safe_concat_lists(self.local_done, other.local_done) - self.max_reached = safe_concat_lists(self.max_reached, other.max_reached) - self.agents = safe_concat_lists(self.agents, other.agents) - self.previous_vector_actions = safe_concat_np_ndarray( - self.previous_vector_actions, other.previous_vector_actions - ) - self.previous_text_actions = safe_concat_lists( - self.previous_text_actions, other.previous_text_actions - ) - self.action_masks = safe_concat_np_ndarray( - self.action_masks, other.action_masks - ) - self.custom_observations = safe_concat_lists( - self.custom_observations, other.custom_observations - ) - @staticmethod def merge_memories(m1, m2, agents1, agents2): if len(m1) == 0 and len(m2) != 0: @@ -204,18 +175,6 @@ def from_agent_proto( for x in agent_info_list ] vis_obs += [obs] - if len(agent_info_list) == 0: - memory_size = 0 - else: - memory_size = max(len(x.memories) for x in agent_info_list) - if memory_size == 0: - memory = np.zeros((0, 0)) - else: - [ - x.memories.extend([0] * (memory_size - len(x.memories))) - for x in agent_info_list - ] - memory = np.array([list(x.memories) for x in agent_info_list]) total_num_actions = sum(brain_params.vector_action_space_size) mask_actions = np.ones((len(agent_info_list), total_num_actions)) for agent_index, agent_info in enumerate(agent_info_list): @@ -270,7 +229,6 @@ def from_agent_proto( visual_observation=vis_obs, vector_observation=vector_obs, text_observations=[x.text_observation for x in agent_info_list], - memory=memory, reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list], agents=agents, local_done=[x.done for x in agent_info_list], diff --git a/ml-agents-envs/mlagents/envs/environment.py b/ml-agents-envs/mlagents/envs/environment.py index 07ef4f78e6..45cefcacd4 100644 --- a/ml-agents-envs/mlagents/envs/environment.py +++ b/ml-agents-envs/mlagents/envs/environment.py @@ -347,7 +347,6 @@ def reset( def step( self, vector_action: Dict[str, np.ndarray] = None, - memory: Optional[Dict[str, np.ndarray]] = None, text_action: Optional[Dict[str, List[str]]] = None, value: Optional[Dict[str, np.ndarray]] = None, custom_action: Dict[str, Any] = None, @@ -365,7 +364,6 @@ def step( if self._is_first_message: return self.reset() vector_action = {} if vector_action is None else vector_action - memory = {} if memory is None else memory text_action = {} if text_action is None else text_action value = {} if value is None else value custom_action = {} if custom_action is None else custom_action @@ -388,20 +386,6 @@ def step( "step cannot take a vector_action input" ) - if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES): - if self._num_external_brains == 1: - memory = {self._external_brain_names[0]: memory} - elif self._num_external_brains > 1: - raise UnityActionException( - "You have {0} brains, you need to feed a dictionary of brain names as keys " - "and memories as values".format(self._num_external_brains) - ) - else: - raise UnityActionException( - "There are no external brains in the environment, " - "step cannot take a memory input" - ) - if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES): if self._num_external_brains == 1: text_action = {self._external_brain_names[0]: text_action} @@ -448,11 +432,7 @@ def step( "step cannot take a custom_action input" ) - for brain_name in ( - list(vector_action.keys()) - + list(memory.keys()) - + list(text_action.keys()) - ): + for brain_name in list(vector_action.keys()) + list(text_action.keys()): if brain_name not in self._external_brain_names: raise UnityActionException( "The name {0} does not correspond to an external brain " @@ -476,13 +456,6 @@ def step( ) else: vector_action[brain_name] = self._flatten(vector_action[brain_name]) - if brain_name not in memory: - memory[brain_name] = [] - else: - if memory[brain_name] is None: - memory[brain_name] = [] - else: - memory[brain_name] = self._flatten(memory[brain_name]) if brain_name not in text_action: text_action[brain_name] = [""] * n_agent else: @@ -548,7 +521,7 @@ def step( ) step_input = self._generate_step_input( - vector_action, memory, text_action, value, custom_action + vector_action, text_action, value, custom_action ) with hierarchical_timer("communicator.exchange"): outputs = self.communicator.exchange(step_input) @@ -639,7 +612,6 @@ def _update_brain_parameters(self, output: UnityOutputProto) -> None: def _generate_step_input( self, vector_action: Dict[str, np.ndarray], - memory: Dict[str, np.ndarray], text_action: Dict[str, list], value: Dict[str, np.ndarray], custom_action: Dict[str, list], @@ -650,11 +622,9 @@ def _generate_step_input( if n_agents == 0: continue _a_s = len(vector_action[b]) // n_agents - _m_s = len(memory[b]) // n_agents for i in range(n_agents): action = AgentActionProto( vector_actions=vector_action[b][i * _a_s : (i + 1) * _a_s], - memories=memory[b][i * _m_s : (i + 1) * _m_s], text_actions=text_action[b][i], custom_action=custom_action[b][i], ) diff --git a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py index a91a49650c..fb26f6f3ad 100644 --- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py +++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py @@ -90,15 +90,13 @@ def _send_response(cmd_name, payload): if cmd.name == "step": all_action_info = cmd.payload actions = {} - memories = {} texts = {} values = {} for brain_name, action_info in all_action_info.items(): actions[brain_name] = action_info.action - memories[brain_name] = action_info.memory texts[brain_name] = action_info.text values[brain_name] = action_info.value - all_brain_info = env.step(actions, memories, texts, values) + all_brain_info = env.step(actions, texts, values, None) # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index df33746d21..f0d169dc27 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -56,6 +56,7 @@ def __init__(self, seed, brain, trainer_parameters): self.seed = seed self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] + self._memories = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" self.model_path = trainer_parameters["model_path"] @@ -126,7 +127,6 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo: run_out = self.evaluate(brain_info) return ActionInfo( action=run_out.get("action"), - memory=run_out.get("memory_out"), text=None, value=run_out.get("value"), outputs=run_out, From 0b8dded2c560d03ff6844dc3c980aa0cd698a9e9 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 14:27:01 -0700 Subject: [PATCH 03/17] Adding functionalities --- ml-agents/mlagents/trainers/tf_policy.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index f0d169dc27..a0fbeb7497 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -169,6 +169,24 @@ def make_empty_memory(self, num_agents): """ return np.zeros((num_agents, self.m_size)) + def _save_memories(self, agent_ids, memory_matrix): + if not isinstance(memory_matrix, np.ndarray): + return + for index, id in enumerate(agent_ids): + self.memory_dict[id] = memory_matrix[index, :] + + def _retrieve_memories(self, agent_ids): + memory_matrix = np.zeros((len(agent_ids), self.m_size)) + for index, id in enumerate(agent_ids): + if id in self.memory_dict.keys(): + memory_matrix[index, :] = self.memory_dict[id] + return memory_matrix + + def _remove_memories(self, agent_ids): + for id in agent_ids: + if id in self.memory_dict.keys(): + self.memory_dict.pop(id) + def get_current_step(self): """ Gets current model step. From ef57029ed866a3aa0fb5841f0b2be463046cbb1d Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 14:48:02 -0700 Subject: [PATCH 04/17] Fixes --- .../Scripts/InferenceBrain/ApplierImpl.cs | 25 ---------- .../Scripts/InferenceBrain/GeneratorImpl.cs | 46 ------------------- ml-agents-envs/mlagents/envs/action_info.py | 1 - ml-agents/mlagents/trainers/bc/policy.py | 4 +- ml-agents/mlagents/trainers/ppo/policy.py | 8 +--- 5 files changed, 3 insertions(+), 81 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs index 4e756f597f..54ab54cd95 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs @@ -214,31 +214,6 @@ public void Apply(TensorProxy tensorProxy, } } - // /// - // /// The Applier for the Memory output tensor. Tensor is assumed to contain the new - // /// memory data of the agents in the batch. - // /// - // public class MemoryOutputApplier : TensorApplier.IApplier - // { - // public void Apply(TensorProxy tensorProxy, - // IEnumerable agents, - // Dictionary> memories) - // { - // var agentIndex = 0; - // var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1]; - // foreach (var agent in agents) - // { - // var memory = new List(); - // for (var j = 0; j < memorySize; j++) - // { - // memory.Add(tensorProxy.data[agentIndex, j]); - // } - - // agent.UpdateMemoriesAction(memory); - // agentIndex++; - // } - // } - // } /// /// The Applier for the Value Estimate output tensor. Tensor is assumed to contain the diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index 08c6601284..cf21dc8b0a 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -119,52 +119,6 @@ public void Generate( } } - // /// - // /// Generates the Tensor corresponding to the Recurrent input : Will be a two - // /// dimensional float array of dimension [batchSize x memorySize]. - // /// It will use the Memory data contained in the agentInfo to fill the data - // /// of the tensor. - // /// - // public class RecurrentInputGenerator : TensorGenerator.IGenerator - // { - // private readonly ITensorAllocator m_Allocator; - - // public RecurrentInputGenerator(ITensorAllocator allocator) - // { - // m_Allocator = allocator; - // } - - // public void Generate( - // TensorProxy tensorProxy, - // int batchSize, - // IEnumerable agents, - // Dictionary> memories) - // { - // TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); - - // var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1]; - // var agentIndex = 0; - // foreach (var agent in agents) - // { - // var info = agent.Info; - // var memory = info.memories; - // if (memory == null) - // { - // agentIndex++; - // continue; - // } - // for (var j = 0; j < Math.Min(memorySize, memory.Count); j++) - // { - // if (j >= memory.Count) - // { - // break; - // } - // tensorProxy.data[agentIndex, j] = memory[j]; - // } - // agentIndex++; - // } - // } - // } public class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator { diff --git a/ml-agents-envs/mlagents/envs/action_info.py b/ml-agents-envs/mlagents/envs/action_info.py index f6bd4561fc..747bc3bd79 100644 --- a/ml-agents-envs/mlagents/envs/action_info.py +++ b/ml-agents-envs/mlagents/envs/action_info.py @@ -5,7 +5,6 @@ class ActionInfo(NamedTuple): action: Any - memory: Any text: Any value: Any outputs: ActionInfoOutputs diff --git a/ml-agents/mlagents/trainers/bc/policy.py b/ml-agents/mlagents/trainers/bc/policy.py index 2c31a19dec..9358bdd076 100644 --- a/ml-agents/mlagents/trainers/bc/policy.py +++ b/ml-agents/mlagents/trainers/bc/policy.py @@ -59,9 +59,7 @@ def evaluate(self, brain_info): feed_dict = self.fill_eval_dict(feed_dict, brain_info) if self.use_recurrent: - if brain_info.memories.shape[1] == 0: - brain_info.memories = self.make_empty_memory(len(brain_info.agents)) - feed_dict[self.model.memory_in] = brain_info.memories + feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) run_out = self._execute_model(feed_dict, self.inference_dict) return run_out diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index a81b97b03d..692a8cafac 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -156,9 +156,7 @@ def evaluate(self, brain_info): ] = brain_info.previous_vector_actions.reshape( [-1, len(self.model.act_size)] ) - if brain_info.memories.shape[1] == 0: - brain_info.memories = self.make_empty_memory(len(brain_info.agents)) - feed_dict[self.model.memory_in] = brain_info.memories + feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) if self.use_continuous_act: epsilon = np.random.normal( size=(len(brain_info.vector_observations), self.model.act_size[0]) @@ -253,9 +251,7 @@ def get_value_estimates( if self.use_vec_obs: feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]] if self.use_recurrent: - if brain_info.memories.shape[1] == 0: - brain_info.memories = self.make_empty_memory(len(brain_info.agents)) - feed_dict[self.model.memory_in] = [brain_info.memories[idx]] + feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) if not self.use_continuous_act and self.use_recurrent: feed_dict[self.model.prev_action] = [ brain_info.previous_vector_actions[idx] From a0709ff678c62015fd62edde22ec7f644abe4033 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 14:50:51 -0700 Subject: [PATCH 05/17] adding the memories to the dictionary --- ml-agents/mlagents/trainers/tf_policy.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index a0fbeb7497..b24c91bed9 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -124,7 +124,15 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo: if len(brain_info.agents) == 0: return ActionInfo([], [], [], None, None) + self._remove_memories( + [ + brain_info.agents[i] + for i in range(len(brain_info.agents)) + if brain_info.local_done[i] + ] + ) run_out = self.evaluate(brain_info) + self._save_memories(brain_info.agents, run_out.get("memory_out")) return ActionInfo( action=run_out.get("action"), text=None, From c72751bb83927f3695ddecb79e901503ad0e13d6 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 15:18:12 -0700 Subject: [PATCH 06/17] Fixing bugs --- .../mlagents/envs/simple_env_manager.py | 4 +--- ml-agents/mlagents/trainers/ppo/policy.py | 2 +- ml-agents/mlagents/trainers/rl_trainer.py | 18 ------------------ ml-agents/mlagents/trainers/sac/policy.py | 4 +--- ml-agents/mlagents/trainers/tf_policy.py | 2 +- 5 files changed, 4 insertions(+), 26 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/simple_env_manager.py b/ml-agents-envs/mlagents/envs/simple_env_manager.py index e2d5feeb3b..29ed41044a 100644 --- a/ml-agents-envs/mlagents/envs/simple_env_manager.py +++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py @@ -25,15 +25,13 @@ def step(self) -> List[EnvironmentStep]: self.previous_all_action_info = all_action_info actions = {} - memories = {} texts = {} values = {} for brain_name, action_info in all_action_info.items(): actions[brain_name] = action_info.action - memories[brain_name] = action_info.memory texts[brain_name] = action_info.text values[brain_name] = action_info.value - all_brain_info = self.env.step(actions, memories, texts, values) + all_brain_info = self.env.step(actions, texts, values, None) step_brain_info = all_brain_info step_info = EnvironmentStep( diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index 692a8cafac..10f3537e3e 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -251,7 +251,7 @@ def get_value_estimates( if self.use_vec_obs: feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]] if self.use_recurrent: - feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) + feed_dict[self.model.memory_in] = self._retrieve_memories([idx]) if not self.use_continuous_act and self.use_recurrent: feed_dict[self.model.prev_action] = [ brain_info.previous_vector_actions[idx] diff --git a/ml-agents/mlagents/trainers/rl_trainer.py b/ml-agents/mlagents/trainers/rl_trainer.py index ce3bb9e377..0f4181615e 100644 --- a/ml-agents/mlagents/trainers/rl_trainer.py +++ b/ml-agents/mlagents/trainers/rl_trainer.py @@ -58,7 +58,6 @@ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: ] # TODO add types to brain.py methods vector_observations = [] text_observations = [] - memories = [] rewards = [] local_dones = [] max_reacheds = [] @@ -79,11 +78,6 @@ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: agent_brain_info.vector_observations[agent_index] ) text_observations.append(agent_brain_info.text_observations[agent_index]) - if self.policy.use_recurrent: - if len(agent_brain_info.memories) > 0: - memories.append(agent_brain_info.memories[agent_index]) - else: - memories.append(self.policy.make_empty_memory(1)) rewards.append(agent_brain_info.rewards[agent_index]) local_dones.append(agent_brain_info.local_done[agent_index]) max_reacheds.append(agent_brain_info.max_reached[agent_index]) @@ -95,14 +89,10 @@ def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo: agent_brain_info.previous_text_actions[agent_index] ) action_masks.append(agent_brain_info.action_masks[agent_index]) - # Check if memories exists (i.e. next_info is not empty) before attempting vstack - if self.policy.use_recurrent and memories: - memories = np.vstack(memories) curr_info = BrainInfo( visual_observations, vector_observations, text_observations, - memories, rewards, agents, local_dones, @@ -184,14 +174,6 @@ def add_experiences( self.training_buffer[agent_id]["next_vector_in"].append( next_info.vector_observations[next_idx] ) - if self.policy.use_recurrent: - if stored_info.memories.shape[1] == 0: - stored_info.memories = np.zeros( - (len(stored_info.agents), self.policy.m_size) - ) - self.training_buffer[agent_id]["memory"].append( - stored_info.memories[idx] - ) self.training_buffer[agent_id]["masks"].append(1.0) self.training_buffer[agent_id]["done"].append( diff --git a/ml-agents/mlagents/trainers/sac/policy.py b/ml-agents/mlagents/trainers/sac/policy.py index b8b996a427..a8fbb597a2 100644 --- a/ml-agents/mlagents/trainers/sac/policy.py +++ b/ml-agents/mlagents/trainers/sac/policy.py @@ -180,9 +180,7 @@ def evaluate(self, brain_info: BrainInfo) -> Dict[str, np.ndarray]: ] = brain_info.previous_vector_actions.reshape( [-1, len(self.model.act_size)] ) - if brain_info.memories.shape[1] == 0: - brain_info.memories = self.make_empty_memory(len(brain_info.agents)) - feed_dict[self.model.memory_in] = brain_info.memories + feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) feed_dict = self.fill_eval_dict(feed_dict, brain_info) run_out = self._execute_model(feed_dict, self.inference_dict) diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index b24c91bed9..7362dba69d 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -56,7 +56,7 @@ def __init__(self, seed, brain, trainer_parameters): self.seed = seed self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] - self._memories = {} + self.memory_dict = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" self.model_path = trainer_parameters["model_path"] From 7a17eb4c358a33473e16e48a6ffc9a26d3df17ee Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Wed, 23 Oct 2019 15:41:30 -0700 Subject: [PATCH 07/17] tweeks --- ml-agents/mlagents/trainers/rl_trainer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/rl_trainer.py b/ml-agents/mlagents/trainers/rl_trainer.py index 0f4181615e..6c9416585c 100644 --- a/ml-agents/mlagents/trainers/rl_trainer.py +++ b/ml-agents/mlagents/trainers/rl_trainer.py @@ -174,7 +174,10 @@ def add_experiences( self.training_buffer[agent_id]["next_vector_in"].append( next_info.vector_observations[next_idx] ) - + if self.policy.use_recurrent: + self.training_buffer[agent_id]["memory"].append( + self.policy._retrieve_memories([idx])[0, :] + ) self.training_buffer[agent_id]["masks"].append(1.0) self.training_buffer[agent_id]["done"].append( next_info.local_done[next_idx] From 3604c24e33e4f47917e78ea427acc0899e28ac07 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 11:20:18 -0700 Subject: [PATCH 08/17] Resolving bugs --- ml-agents/mlagents/trainers/bc/policy.py | 2 +- ml-agents/mlagents/trainers/ppo/policy.py | 4 ++-- ml-agents/mlagents/trainers/rl_trainer.py | 2 +- ml-agents/mlagents/trainers/sac/policy.py | 2 +- .../mlagents/trainers/tests/test_policy.py | 20 +++++++++---------- .../trainers/tests/test_rl_trainer.py | 1 + ml-agents/mlagents/trainers/tf_policy.py | 12 +++++------ 7 files changed, 22 insertions(+), 21 deletions(-) diff --git a/ml-agents/mlagents/trainers/bc/policy.py b/ml-agents/mlagents/trainers/bc/policy.py index 9358bdd076..4370e84911 100644 --- a/ml-agents/mlagents/trainers/bc/policy.py +++ b/ml-agents/mlagents/trainers/bc/policy.py @@ -59,7 +59,7 @@ def evaluate(self, brain_info): feed_dict = self.fill_eval_dict(feed_dict, brain_info) if self.use_recurrent: - feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) + feed_dict[self.model.memory_in] = self.retrieve_memories(brain_info.agents) run_out = self._execute_model(feed_dict, self.inference_dict) return run_out diff --git a/ml-agents/mlagents/trainers/ppo/policy.py b/ml-agents/mlagents/trainers/ppo/policy.py index 10f3537e3e..41412c15fc 100644 --- a/ml-agents/mlagents/trainers/ppo/policy.py +++ b/ml-agents/mlagents/trainers/ppo/policy.py @@ -156,7 +156,7 @@ def evaluate(self, brain_info): ] = brain_info.previous_vector_actions.reshape( [-1, len(self.model.act_size)] ) - feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) + feed_dict[self.model.memory_in] = self.retrieve_memories(brain_info.agents) if self.use_continuous_act: epsilon = np.random.normal( size=(len(brain_info.vector_observations), self.model.act_size[0]) @@ -251,7 +251,7 @@ def get_value_estimates( if self.use_vec_obs: feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]] if self.use_recurrent: - feed_dict[self.model.memory_in] = self._retrieve_memories([idx]) + feed_dict[self.model.memory_in] = self.retrieve_memories([idx]) if not self.use_continuous_act and self.use_recurrent: feed_dict[self.model.prev_action] = [ brain_info.previous_vector_actions[idx] diff --git a/ml-agents/mlagents/trainers/rl_trainer.py b/ml-agents/mlagents/trainers/rl_trainer.py index 6c9416585c..9cef6fb877 100644 --- a/ml-agents/mlagents/trainers/rl_trainer.py +++ b/ml-agents/mlagents/trainers/rl_trainer.py @@ -176,7 +176,7 @@ def add_experiences( ) if self.policy.use_recurrent: self.training_buffer[agent_id]["memory"].append( - self.policy._retrieve_memories([idx])[0, :] + self.policy.retrieve_memories([agent_id])[0, :] ) self.training_buffer[agent_id]["masks"].append(1.0) self.training_buffer[agent_id]["done"].append( diff --git a/ml-agents/mlagents/trainers/sac/policy.py b/ml-agents/mlagents/trainers/sac/policy.py index a8fbb597a2..ef13184f7e 100644 --- a/ml-agents/mlagents/trainers/sac/policy.py +++ b/ml-agents/mlagents/trainers/sac/policy.py @@ -180,7 +180,7 @@ def evaluate(self, brain_info: BrainInfo) -> Dict[str, np.ndarray]: ] = brain_info.previous_vector_actions.reshape( [-1, len(self.model.act_size)] ) - feed_dict[self.model.memory_in] = self._retrieve_memories(brain_info.agents) + feed_dict[self.model.memory_in] = self.retrieve_memories(brain_info.agents) feed_dict = self.fill_eval_dict(feed_dict, brain_info) run_out = self._execute_model(feed_dict, self.inference_dict) diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py index 34c09ed297..2b6a1fee88 100644 --- a/ml-agents/mlagents/trainers/tests/test_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_policy.py @@ -20,16 +20,18 @@ def test_take_action_returns_empty_with_no_agents(): policy = TFPolicy(test_seed, basic_mock_brain(), basic_params()) no_agent_brain_info = BrainInfo([], [], [], agents=[]) result = policy.get_action(no_agent_brain_info) - assert result == ActionInfo([], [], [], None, None) + assert result == ActionInfo([], [], [], None) def test_take_action_returns_nones_on_missing_values(): test_seed = 3 policy = TFPolicy(test_seed, basic_mock_brain(), basic_params()) policy.evaluate = MagicMock(return_value={}) - brain_info_with_agents = BrainInfo([], [], [], agents=["an-agent-id"]) + brain_info_with_agents = BrainInfo( + [], [], [], agents=["an-agent-id"], local_done=[False] + ) result = policy.get_action(brain_info_with_agents) - assert result == ActionInfo(None, None, None, None, {}) + assert result == ActionInfo(None, None, None, {}) def test_take_action_returns_action_info_when_available(): @@ -37,17 +39,15 @@ def test_take_action_returns_action_info_when_available(): policy = TFPolicy(test_seed, basic_mock_brain(), basic_params()) policy_eval_out = { "action": np.array([1.0]), - "memory_out": np.array([2.5]), + "memory_out": np.array([[2.5]]), "value": np.array([1.1]), } policy.evaluate = MagicMock(return_value=policy_eval_out) - brain_info_with_agents = BrainInfo([], [], [], agents=["an-agent-id"]) + brain_info_with_agents = BrainInfo( + [], [], [], agents=["an-agent-id"], local_done=[False] + ) result = policy.get_action(brain_info_with_agents) expected = ActionInfo( - policy_eval_out["action"], - policy_eval_out["memory_out"], - None, - policy_eval_out["value"], - policy_eval_out, + policy_eval_out["action"], None, policy_eval_out["value"], policy_eval_out ) assert result == expected diff --git a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py index 36da0ae769..b89d7d94df 100644 --- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py +++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py @@ -43,6 +43,7 @@ def create_mock_all_brain_info(brain_info): def create_mock_policy(): mock_policy = mock.Mock() mock_policy.reward_signals = {} + mock_policy.retrieve_memories.return_value = np.zeros((1, 1)) return mock_policy diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index 7362dba69d..152f6a8552 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -122,9 +122,9 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo: to be passed to add experiences """ if len(brain_info.agents) == 0: - return ActionInfo([], [], [], None, None) + return ActionInfo([], [], [], None) - self._remove_memories( + self.remove_memories( [ brain_info.agents[i] for i in range(len(brain_info.agents)) @@ -132,7 +132,7 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo: ] ) run_out = self.evaluate(brain_info) - self._save_memories(brain_info.agents, run_out.get("memory_out")) + self.save_memories(brain_info.agents, run_out.get("memory_out")) return ActionInfo( action=run_out.get("action"), text=None, @@ -177,20 +177,20 @@ def make_empty_memory(self, num_agents): """ return np.zeros((num_agents, self.m_size)) - def _save_memories(self, agent_ids, memory_matrix): + def save_memories(self, agent_ids, memory_matrix): if not isinstance(memory_matrix, np.ndarray): return for index, id in enumerate(agent_ids): self.memory_dict[id] = memory_matrix[index, :] - def _retrieve_memories(self, agent_ids): + def retrieve_memories(self, agent_ids): memory_matrix = np.zeros((len(agent_ids), self.m_size)) for index, id in enumerate(agent_ids): if id in self.memory_dict.keys(): memory_matrix[index, :] = self.memory_dict[id] return memory_matrix - def _remove_memories(self, agent_ids): + def remove_memories(self, agent_ids): for id in agent_ids: if id in self.memory_dict.keys(): self.memory_dict.pop(id) From d7cf784a74474bf61f782523f7fcdefce2253f46 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 14:27:05 -0700 Subject: [PATCH 09/17] Recreating the proto --- .../Grpc/CommunicatorObjects/AgentAction.cs | 33 +++------------ .../Grpc/CommunicatorObjects/AgentInfo.cs | 41 +++++-------------- .../UnityToExternalGrpc.cs | 8 ++-- .../communicator_objects/agent_action_pb2.py | 15 ++----- .../communicator_objects/agent_action_pb2.pyi | 6 +-- .../communicator_objects/agent_info_pb2.py | 25 ++++------- .../communicator_objects/agent_info_pb2.pyi | 6 +-- .../unity_to_external_pb2.py | 10 ++--- 8 files changed, 42 insertions(+), 102 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentAction.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentAction.cs index 33a893aa9d..2f5cfd1dd6 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentAction.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentAction.cs @@ -26,16 +26,16 @@ static AgentActionReflection() { string.Concat( "CjVtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2Fj", "dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMaNm1sYWdlbnRzL2Vu", - "dnMvY29tbXVuaWNhdG9yX29iamVjdHMvY3VzdG9tX2FjdGlvbi5wcm90byKh", + "dnMvY29tbXVuaWNhdG9yX29iamVjdHMvY3VzdG9tX2FjdGlvbi5wcm90byKV", "AQoQQWdlbnRBY3Rpb25Qcm90bxIWCg52ZWN0b3JfYWN0aW9ucxgBIAMoAhIU", - "Cgx0ZXh0X2FjdGlvbnMYAiABKAkSEAoIbWVtb3JpZXMYAyADKAISDQoFdmFs", - "dWUYBCABKAISPgoNY3VzdG9tX2FjdGlvbhgFIAEoCzInLmNvbW11bmljYXRv", - "cl9vYmplY3RzLkN1c3RvbUFjdGlvblByb3RvQh+qAhxNTEFnZW50cy5Db21t", - "dW5pY2F0b3JPYmplY3RzYgZwcm90bzM=")); + "Cgx0ZXh0X2FjdGlvbnMYAiABKAkSDQoFdmFsdWUYBCABKAISPgoNY3VzdG9t", + "X2FjdGlvbhgFIAEoCzInLmNvbW11bmljYXRvcl9vYmplY3RzLkN1c3RvbUFj", + "dGlvblByb3RvSgQIAxAEQh+qAhxNTEFnZW50cy5Db21tdW5pY2F0b3JPYmpl", + "Y3RzYgZwcm90bzM=")); descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData, new pbr::FileDescriptor[] { global::MLAgents.CommunicatorObjects.CustomActionReflection.Descriptor, }, new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] { - new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentActionProto), global::MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "TextActions", "Memories", "Value", "CustomAction" }, null, null, null) + new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentActionProto), global::MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "TextActions", "Value", "CustomAction" }, null, null, null) })); } #endregion @@ -69,7 +69,6 @@ public AgentActionProto() { public AgentActionProto(AgentActionProto other) : this() { vectorActions_ = other.vectorActions_.Clone(); textActions_ = other.textActions_; - memories_ = other.memories_.Clone(); value_ = other.value_; CustomAction = other.customAction_ != null ? other.CustomAction.Clone() : null; _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields); @@ -101,16 +100,6 @@ public string TextActions { } } - /// Field number for the "memories" field. - public const int MemoriesFieldNumber = 3; - private static readonly pb::FieldCodec _repeated_memories_codec - = pb::FieldCodec.ForFloat(26); - private readonly pbc::RepeatedField memories_ = new pbc::RepeatedField(); - [global::System.Diagnostics.DebuggerNonUserCodeAttribute] - public pbc::RepeatedField Memories { - get { return memories_; } - } - /// Field number for the "value" field. public const int ValueFieldNumber = 4; private float value_; @@ -148,7 +137,6 @@ public bool Equals(AgentActionProto other) { } if(!vectorActions_.Equals(other.vectorActions_)) return false; if (TextActions != other.TextActions) return false; - if(!memories_.Equals(other.memories_)) return false; if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(Value, other.Value)) return false; if (!object.Equals(CustomAction, other.CustomAction)) return false; return Equals(_unknownFields, other._unknownFields); @@ -159,7 +147,6 @@ public override int GetHashCode() { int hash = 1; hash ^= vectorActions_.GetHashCode(); if (TextActions.Length != 0) hash ^= TextActions.GetHashCode(); - hash ^= memories_.GetHashCode(); if (Value != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(Value); if (customAction_ != null) hash ^= CustomAction.GetHashCode(); if (_unknownFields != null) { @@ -180,7 +167,6 @@ public void WriteTo(pb::CodedOutputStream output) { output.WriteRawTag(18); output.WriteString(TextActions); } - memories_.WriteTo(output, _repeated_memories_codec); if (Value != 0F) { output.WriteRawTag(37); output.WriteFloat(Value); @@ -201,7 +187,6 @@ public int CalculateSize() { if (TextActions.Length != 0) { size += 1 + pb::CodedOutputStream.ComputeStringSize(TextActions); } - size += memories_.CalculateSize(_repeated_memories_codec); if (Value != 0F) { size += 1 + 4; } @@ -223,7 +208,6 @@ public void MergeFrom(AgentActionProto other) { if (other.TextActions.Length != 0) { TextActions = other.TextActions; } - memories_.Add(other.memories_); if (other.Value != 0F) { Value = other.Value; } @@ -253,11 +237,6 @@ public void MergeFrom(pb::CodedInputStream input) { TextActions = input.ReadString(); break; } - case 26: - case 29: { - memories_.AddEntriesFrom(input, _repeated_memories_codec); - break; - } case 37: { Value = input.ReadFloat(); break; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentInfo.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentInfo.cs index dfe9158fd9..83756e69f9 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentInfo.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentInfo.cs @@ -28,21 +28,21 @@ static AgentInfoReflection() { "Zm8ucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGj9tbGFnZW50cy9lbnZz", "L2NvbW11bmljYXRvcl9vYmplY3RzL2NvbXByZXNzZWRfb2JzZXJ2YXRpb24u", "cHJvdG8aO21sYWdlbnRzL2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvY3Vz", - "dG9tX29ic2VydmF0aW9uLnByb3RvIpgDCg5BZ2VudEluZm9Qcm90bxIiChpz", + "dG9tX29ic2VydmF0aW9uLnByb3RvIowDCg5BZ2VudEluZm9Qcm90bxIiChpz", "dGFja2VkX3ZlY3Rvcl9vYnNlcnZhdGlvbhgBIAMoAhIYChB0ZXh0X29ic2Vy", "dmF0aW9uGAMgASgJEh0KFXN0b3JlZF92ZWN0b3JfYWN0aW9ucxgEIAMoAhIb", - "ChNzdG9yZWRfdGV4dF9hY3Rpb25zGAUgASgJEhAKCG1lbW9yaWVzGAYgAygC", - "Eg4KBnJld2FyZBgHIAEoAhIMCgRkb25lGAggASgIEhgKEG1heF9zdGVwX3Jl", - "YWNoZWQYCSABKAgSCgoCaWQYCiABKAUSEwoLYWN0aW9uX21hc2sYCyADKAgS", - "SAoSY3VzdG9tX29ic2VydmF0aW9uGAwgASgLMiwuY29tbXVuaWNhdG9yX29i", - "amVjdHMuQ3VzdG9tT2JzZXJ2YXRpb25Qcm90bxJRChdjb21wcmVzc2VkX29i", - "c2VydmF0aW9ucxgNIAMoCzIwLmNvbW11bmljYXRvcl9vYmplY3RzLkNvbXBy", - "ZXNzZWRPYnNlcnZhdGlvblByb3RvSgQIAhADQh+qAhxNTEFnZW50cy5Db21t", - "dW5pY2F0b3JPYmplY3RzYgZwcm90bzM=")); + "ChNzdG9yZWRfdGV4dF9hY3Rpb25zGAUgASgJEg4KBnJld2FyZBgHIAEoAhIM", + "CgRkb25lGAggASgIEhgKEG1heF9zdGVwX3JlYWNoZWQYCSABKAgSCgoCaWQY", + "CiABKAUSEwoLYWN0aW9uX21hc2sYCyADKAgSSAoSY3VzdG9tX29ic2VydmF0", + "aW9uGAwgASgLMiwuY29tbXVuaWNhdG9yX29iamVjdHMuQ3VzdG9tT2JzZXJ2", + "YXRpb25Qcm90bxJRChdjb21wcmVzc2VkX29ic2VydmF0aW9ucxgNIAMoCzIw", + "LmNvbW11bmljYXRvcl9vYmplY3RzLkNvbXByZXNzZWRPYnNlcnZhdGlvblBy", + "b3RvSgQIAhADSgQIBhAHQh+qAhxNTEFnZW50cy5Db21tdW5pY2F0b3JPYmpl", + "Y3RzYgZwcm90bzM=")); descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData, new pbr::FileDescriptor[] { global::MLAgents.CommunicatorObjects.CompressedObservationReflection.Descriptor, global::MLAgents.CommunicatorObjects.CustomObservationReflection.Descriptor, }, new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] { - new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentInfoProto), global::MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "StackedVectorObservation", "TextObservation", "StoredVectorActions", "StoredTextActions", "Memories", "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "CustomObservation", "CompressedObservations" }, null, null, null) + new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentInfoProto), global::MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "StackedVectorObservation", "TextObservation", "StoredVectorActions", "StoredTextActions", "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "CustomObservation", "CompressedObservations" }, null, null, null) })); } #endregion @@ -78,7 +78,6 @@ public AgentInfoProto(AgentInfoProto other) : this() { textObservation_ = other.textObservation_; storedVectorActions_ = other.storedVectorActions_.Clone(); storedTextActions_ = other.storedTextActions_; - memories_ = other.memories_.Clone(); reward_ = other.reward_; done_ = other.done_; maxStepReached_ = other.maxStepReached_; @@ -136,16 +135,6 @@ public string StoredTextActions { } } - /// Field number for the "memories" field. - public const int MemoriesFieldNumber = 6; - private static readonly pb::FieldCodec _repeated_memories_codec - = pb::FieldCodec.ForFloat(50); - private readonly pbc::RepeatedField memories_ = new pbc::RepeatedField(); - [global::System.Diagnostics.DebuggerNonUserCodeAttribute] - public pbc::RepeatedField Memories { - get { return memories_; } - } - /// Field number for the "reward" field. public const int RewardFieldNumber = 7; private float reward_; @@ -238,7 +227,6 @@ public bool Equals(AgentInfoProto other) { if (TextObservation != other.TextObservation) return false; if(!storedVectorActions_.Equals(other.storedVectorActions_)) return false; if (StoredTextActions != other.StoredTextActions) return false; - if(!memories_.Equals(other.memories_)) return false; if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(Reward, other.Reward)) return false; if (Done != other.Done) return false; if (MaxStepReached != other.MaxStepReached) return false; @@ -256,7 +244,6 @@ public override int GetHashCode() { if (TextObservation.Length != 0) hash ^= TextObservation.GetHashCode(); hash ^= storedVectorActions_.GetHashCode(); if (StoredTextActions.Length != 0) hash ^= StoredTextActions.GetHashCode(); - hash ^= memories_.GetHashCode(); if (Reward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(Reward); if (Done != false) hash ^= Done.GetHashCode(); if (MaxStepReached != false) hash ^= MaxStepReached.GetHashCode(); @@ -287,7 +274,6 @@ public void WriteTo(pb::CodedOutputStream output) { output.WriteRawTag(42); output.WriteString(StoredTextActions); } - memories_.WriteTo(output, _repeated_memories_codec); if (Reward != 0F) { output.WriteRawTag(61); output.WriteFloat(Reward); @@ -326,7 +312,6 @@ public int CalculateSize() { if (StoredTextActions.Length != 0) { size += 1 + pb::CodedOutputStream.ComputeStringSize(StoredTextActions); } - size += memories_.CalculateSize(_repeated_memories_codec); if (Reward != 0F) { size += 1 + 4; } @@ -363,7 +348,6 @@ public void MergeFrom(AgentInfoProto other) { if (other.StoredTextActions.Length != 0) { StoredTextActions = other.StoredTextActions; } - memories_.Add(other.memories_); if (other.Reward != 0F) { Reward = other.Reward; } @@ -413,11 +397,6 @@ public void MergeFrom(pb::CodedInputStream input) { StoredTextActions = input.ReadString(); break; } - case 50: - case 53: { - memories_.AddEntriesFrom(input, _repeated_memories_codec); - break; - } case 61: { Reward = input.ReadFloat(); break; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs index 2ab1cbe0a4..b10d2baa9b 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs @@ -3,7 +3,7 @@ // Generated by the protocol buffer compiler. DO NOT EDIT! // source: mlagents/envs/communicator_objects/unity_to_external.proto // -#pragma warning disable 0414, 1591 +#pragma warning disable 1591 #region Designer generated code using grpc = global::Grpc.Core; @@ -13,14 +13,14 @@ public static partial class UnityToExternalProto { static readonly string __ServiceName = "communicator_objects.UnityToExternalProto"; - static readonly grpc::Marshaller __Marshaller_communicator_objects_UnityMessageProto = grpc::Marshallers.Create((arg) => global::Google.Protobuf.MessageExtensions.ToByteArray(arg), global::MLAgents.CommunicatorObjects.UnityMessageProto.Parser.ParseFrom); + static readonly grpc::Marshaller __Marshaller_UnityMessageProto = grpc::Marshallers.Create((arg) => global::Google.Protobuf.MessageExtensions.ToByteArray(arg), global::MLAgents.CommunicatorObjects.UnityMessageProto.Parser.ParseFrom); static readonly grpc::Method __Method_Exchange = new grpc::Method( grpc::MethodType.Unary, __ServiceName, "Exchange", - __Marshaller_communicator_objects_UnityMessageProto, - __Marshaller_communicator_objects_UnityMessageProto); + __Marshaller_UnityMessageProto, + __Marshaller_UnityMessageProto); /// Service descriptor public static global::Google.Protobuf.Reflection.ServiceDescriptor Descriptor diff --git a/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.py b/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.py index 9b2454e53d..4908e5a55a 100644 --- a/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.py +++ b/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.py @@ -20,7 +20,7 @@ name='mlagents/envs/communicator_objects/agent_action.proto', package='communicator_objects', syntax='proto3', - serialized_pb=_b('\n5mlagents/envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/custom_action.proto\"\xa1\x01\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\x10\n\x08memories\x18\x03 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12>\n\rcustom_action\x18\x05 \x01(\x0b\x32\'.communicator_objects.CustomActionProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') + serialized_pb=_b('\n5mlagents/envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/custom_action.proto\"\x95\x01\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\r\n\x05value\x18\x04 \x01(\x02\x12>\n\rcustom_action\x18\x05 \x01(\x0b\x32\'.communicator_objects.CustomActionProtoJ\x04\x08\x03\x10\x04\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') , dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2.DESCRIPTOR,]) @@ -49,21 +49,14 @@ is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='memories', full_name='communicator_objects.AgentActionProto.memories', index=2, - number=3, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='value', full_name='communicator_objects.AgentActionProto.value', index=3, + name='value', full_name='communicator_objects.AgentActionProto.value', index=2, number=4, type=2, cpp_type=6, label=1, has_default_value=False, default_value=float(0), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='custom_action', full_name='communicator_objects.AgentActionProto.custom_action', index=4, + name='custom_action', full_name='communicator_objects.AgentActionProto.custom_action', index=3, number=5, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, @@ -82,7 +75,7 @@ oneofs=[ ], serialized_start=136, - serialized_end=297, + serialized_end=285, ) _AGENTACTIONPROTO.fields_by_name['custom_action'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2._CUSTOMACTIONPROTO diff --git a/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.pyi b/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.pyi index d96652aee0..4e1eef1774 100644 --- a/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.pyi +++ b/ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.pyi @@ -37,7 +37,6 @@ class AgentActionProto(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float] text_actions = ... # type: typing___Text - memories = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float] value = ... # type: builtin___float @property @@ -47,7 +46,6 @@ class AgentActionProto(google___protobuf___message___Message): *, vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None, text_actions : typing___Optional[typing___Text] = None, - memories : typing___Optional[typing___Iterable[builtin___float]] = None, value : typing___Optional[builtin___float] = None, custom_action : typing___Optional[mlagents___envs___communicator_objects___custom_action_pb2___CustomActionProto] = None, ) -> None: ... @@ -57,7 +55,7 @@ class AgentActionProto(google___protobuf___message___Message): def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): def HasField(self, field_name: typing_extensions___Literal[u"custom_action"]) -> builtin___bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",u"memories",u"text_actions",u"value",u"vector_actions"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",u"text_actions",u"value",u"vector_actions"]) -> None: ... else: def HasField(self, field_name: typing_extensions___Literal[u"custom_action",b"custom_action"]) -> builtin___bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",b"custom_action",u"memories",b"memories",u"text_actions",b"text_actions",u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",b"custom_action",u"text_actions",b"text_actions",u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ... diff --git a/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.py b/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.py index 8818a369fd..ceebf9d7f8 100644 --- a/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.py +++ b/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.py @@ -21,7 +21,7 @@ name='mlagents/envs/communicator_objects/agent_info.proto', package='communicator_objects', syntax='proto3', - serialized_pb=_b('\n3mlagents/envs/communicator_objects/agent_info.proto\x12\x14\x63ommunicator_objects\x1a?mlagents/envs/communicator_objects/compressed_observation.proto\x1a;mlagents/envs/communicator_objects/custom_observation.proto\"\x98\x03\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12H\n\x12\x63ustom_observation\x18\x0c \x01(\x0b\x32,.communicator_objects.CustomObservationProto\x12Q\n\x17\x63ompressed_observations\x18\r \x03(\x0b\x32\x30.communicator_objects.CompressedObservationProtoJ\x04\x08\x02\x10\x03\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') + serialized_pb=_b('\n3mlagents/envs/communicator_objects/agent_info.proto\x12\x14\x63ommunicator_objects\x1a?mlagents/envs/communicator_objects/compressed_observation.proto\x1a;mlagents/envs/communicator_objects/custom_observation.proto\"\x8c\x03\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12H\n\x12\x63ustom_observation\x18\x0c \x01(\x0b\x32,.communicator_objects.CustomObservationProto\x12Q\n\x17\x63ompressed_observations\x18\r \x03(\x0b\x32\x30.communicator_objects.CompressedObservationProtoJ\x04\x08\x02\x10\x03J\x04\x08\x06\x10\x07\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') , dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_compressed__observation__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2.DESCRIPTOR,]) @@ -64,56 +64,49 @@ is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='memories', full_name='communicator_objects.AgentInfoProto.memories', index=4, - number=6, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=5, + name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=4, number=7, type=2, cpp_type=6, label=1, has_default_value=False, default_value=float(0), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='done', full_name='communicator_objects.AgentInfoProto.done', index=6, + name='done', full_name='communicator_objects.AgentInfoProto.done', index=5, number=8, type=8, cpp_type=7, label=1, has_default_value=False, default_value=False, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=7, + name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=6, number=9, type=8, cpp_type=7, label=1, has_default_value=False, default_value=False, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='id', full_name='communicator_objects.AgentInfoProto.id', index=8, + name='id', full_name='communicator_objects.AgentInfoProto.id', index=7, number=10, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=9, + name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=8, number=11, type=8, cpp_type=7, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='custom_observation', full_name='communicator_objects.AgentInfoProto.custom_observation', index=10, + name='custom_observation', full_name='communicator_objects.AgentInfoProto.custom_observation', index=9, number=12, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='compressed_observations', full_name='communicator_objects.AgentInfoProto.compressed_observations', index=11, + name='compressed_observations', full_name='communicator_objects.AgentInfoProto.compressed_observations', index=10, number=13, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, @@ -132,7 +125,7 @@ oneofs=[ ], serialized_start=204, - serialized_end=612, + serialized_end=600, ) _AGENTINFOPROTO.fields_by_name['custom_observation'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2._CUSTOMOBSERVATIONPROTO diff --git a/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.pyi b/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.pyi index 4aaa82c3a7..efca605704 100644 --- a/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.pyi +++ b/ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.pyi @@ -44,7 +44,6 @@ class AgentInfoProto(google___protobuf___message___Message): text_observation = ... # type: typing___Text stored_vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float] stored_text_actions = ... # type: typing___Text - memories = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float] reward = ... # type: builtin___float done = ... # type: builtin___bool max_step_reached = ... # type: builtin___bool @@ -63,7 +62,6 @@ class AgentInfoProto(google___protobuf___message___Message): text_observation : typing___Optional[typing___Text] = None, stored_vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None, stored_text_actions : typing___Optional[typing___Text] = None, - memories : typing___Optional[typing___Iterable[builtin___float]] = None, reward : typing___Optional[builtin___float] = None, done : typing___Optional[builtin___bool] = None, max_step_reached : typing___Optional[builtin___bool] = None, @@ -78,7 +76,7 @@ class AgentInfoProto(google___protobuf___message___Message): def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): def HasField(self, field_name: typing_extensions___Literal[u"custom_observation"]) -> builtin___bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",u"compressed_observations",u"custom_observation",u"done",u"id",u"max_step_reached",u"memories",u"reward",u"stacked_vector_observation",u"stored_text_actions",u"stored_vector_actions",u"text_observation"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",u"compressed_observations",u"custom_observation",u"done",u"id",u"max_step_reached",u"reward",u"stacked_vector_observation",u"stored_text_actions",u"stored_vector_actions",u"text_observation"]) -> None: ... else: def HasField(self, field_name: typing_extensions___Literal[u"custom_observation",b"custom_observation"]) -> builtin___bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",b"action_mask",u"compressed_observations",b"compressed_observations",u"custom_observation",b"custom_observation",u"done",b"done",u"id",b"id",u"max_step_reached",b"max_step_reached",u"memories",b"memories",u"reward",b"reward",u"stacked_vector_observation",b"stacked_vector_observation",u"stored_text_actions",b"stored_text_actions",u"stored_vector_actions",b"stored_vector_actions",u"text_observation",b"text_observation"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",b"action_mask",u"compressed_observations",b"compressed_observations",u"custom_observation",b"custom_observation",u"done",b"done",u"id",b"id",u"max_step_reached",b"max_step_reached",u"reward",b"reward",u"stacked_vector_observation",b"stacked_vector_observation",u"stored_text_actions",b"stored_text_actions",u"stored_vector_actions",b"stored_vector_actions",u"text_observation",b"text_observation"]) -> None: ... diff --git a/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py b/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py index 5e18fa5204..dc1f03e387 100644 --- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py +++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: mlagents/envs/communicator_objects/unity_to_external.proto @@ -7,7 +8,6 @@ from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -20,6 +20,7 @@ name='mlagents/envs/communicator_objects/unity_to_external.proto', package='communicator_objects', syntax='proto3', + serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'), serialized_pb=_b('\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2v\n\x14UnityToExternalProto\x12^\n\x08\x45xchange\x12\'.communicator_objects.UnityMessageProto\x1a\'.communicator_objects.UnityMessageProto\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') , dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,]) @@ -29,15 +30,14 @@ _sym_db.RegisterFileDescriptor(DESCRIPTOR) -DESCRIPTOR.has_options = True -DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\034MLAgents.CommunicatorObjects')) +DESCRIPTOR._options = None _UNITYTOEXTERNALPROTO = _descriptor.ServiceDescriptor( name='UnityToExternalProto', full_name='communicator_objects.UnityToExternalProto', file=DESCRIPTOR, index=0, - options=None, + serialized_options=None, serialized_start=140, serialized_end=258, methods=[ @@ -48,7 +48,7 @@ containing_service=None, input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO, output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO, - options=None, + serialized_options=None, ), ]) _sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNALPROTO) From 5f01e4c388f515e8a9be05a6a7bcadac0ae75bea Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 15:05:13 -0700 Subject: [PATCH 10/17] Addressing comments --- .../EditModeTestInternalBrainTensorApplier.cs | 8 +-- ...ditModeTestInternalBrainTensorGenerator.cs | 12 ++-- .../Scripts/InferenceBrain/ApplierImpl.cs | 31 ++++----- .../Scripts/InferenceBrain/GeneratorImpl.cs | 64 +++++-------------- .../Scripts/InferenceBrain/ModelRunner.cs | 10 +-- .../Scripts/InferenceBrain/TensorApplier.cs | 20 +++--- .../Scripts/InferenceBrain/TensorGenerator.cs | 21 +++--- .../Scripts/Policy/BarracudaPolicy.cs | 8 +-- ml-agents/mlagents/trainers/tf_policy.py | 30 ++++----- 9 files changed, 83 insertions(+), 121 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs index 232dbbcc91..1ac66d0607 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs @@ -34,7 +34,7 @@ public void Construction() { var bp = new BrainParameters(); var alloc = new TensorCachingAllocator(); - var tensorGenerator = new TensorApplier(bp, 0, alloc); + var tensorGenerator = new TensorApplier(bp, 0, alloc, null); Assert.IsNotNull(tensorGenerator); alloc.Dispose(); } @@ -50,7 +50,7 @@ public void ApplyContinuousActionOutput() var agentInfos = GetFakeAgentInfos(); var applier = new ContinuousActionOutputApplier(); - applier.Apply(inputTensor, agentInfos, null); + applier.Apply(inputTensor, agentInfos); var agents = agentInfos; var agent = agents[0] as TestAgent; @@ -82,7 +82,7 @@ public void ApplyDiscreteActionOutput() var agentInfos = GetFakeAgentInfos(); var alloc = new TensorCachingAllocator(); var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc); - applier.Apply(inputTensor, agentInfos, null); + applier.Apply(inputTensor, agentInfos); var agents = agentInfos; var agent = agents[0] as TestAgent; @@ -110,7 +110,7 @@ public void ApplyValueEstimate() var agentInfos = GetFakeAgentInfos(); var applier = new ValueEstimateApplier(); - applier.Apply(inputTensor, agentInfos, null); + applier.Apply(inputTensor, agentInfos); var agents = agentInfos; var agent = agents[0] as TestAgent; diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs index b58b985746..a7815d9484 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs @@ -38,7 +38,7 @@ public void Construction() { var bp = new BrainParameters(); var alloc = new TensorCachingAllocator(); - var tensorGenerator = new TensorGenerator(bp, 0, alloc); + var tensorGenerator = new TensorGenerator(bp, 0, alloc, null); Assert.IsNotNull(tensorGenerator); alloc.Dispose(); } @@ -50,7 +50,7 @@ public void GenerateBatchSize() var alloc = new TensorCachingAllocator(); const int batchSize = 4; var generator = new BatchSizeGenerator(alloc); - generator.Generate(inputTensor, batchSize, null, null); + generator.Generate(inputTensor, batchSize, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0], batchSize); alloc.Dispose(); @@ -63,7 +63,7 @@ public void GenerateSequenceLength() var alloc = new TensorCachingAllocator(); const int batchSize = 4; var generator = new SequenceLengthGenerator(alloc); - generator.Generate(inputTensor, batchSize, null, null); + generator.Generate(inputTensor, batchSize, null); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0], 1); alloc.Dispose(); @@ -80,7 +80,7 @@ public void GenerateVectorObservation() var agentInfos = GetFakeAgentInfos(); var alloc = new TensorCachingAllocator(); var generator = new VectorObservationGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos, null); + generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0, 0], 1); Assert.AreEqual(inputTensor.data[0, 2], 3); @@ -102,7 +102,7 @@ public void GeneratePreviousActionInput() var alloc = new TensorCachingAllocator(); var generator = new PreviousActionInputGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos, null); + generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0, 0], 1); Assert.AreEqual(inputTensor.data[0, 1], 2); @@ -123,7 +123,7 @@ public void GenerateActionMaskInput() var agentInfos = GetFakeAgentInfos(); var alloc = new TensorCachingAllocator(); var generator = new ActionMaskInputGenerator(alloc); - generator.Generate(inputTensor, batchSize, agentInfos, null); + generator.Generate(inputTensor, batchSize, agentInfos); Assert.IsNotNull(inputTensor.data); Assert.AreEqual(inputTensor.data[0, 0], 1); Assert.AreEqual(inputTensor.data[0, 4], 1); diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs index 54ab54cd95..a0450bb449 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs @@ -13,10 +13,7 @@ namespace MLAgents.InferenceBrain /// public class ContinuousActionOutputApplier : TensorApplier.IApplier { - public void Apply(TensorProxy tensorProxy, - IEnumerable agents, - Dictionary> memories) + public void Apply(TensorProxy tensorProxy, IEnumerable agents) { var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; var agentIndex = 0; @@ -50,9 +47,7 @@ public DiscreteActionOutputApplier(int[] actionSize, int seed, ITensorAllocator m_Allocator = allocator; } - public void Apply(TensorProxy tensorProxy, - IEnumerable agents, - Dictionary> memories) + public void Apply(TensorProxy tensorProxy, IEnumerable agents) { //var tensorDataProbabilities = tensorProxy.Data as float[,]; var agentsArray = agents as List ?? agents.ToList(); @@ -179,24 +174,28 @@ public class BarracudaMemoryOutputApplier : TensorApplier.IApplier private readonly int m_MemoriesCount; private readonly int m_MemoryIndex; - public BarracudaMemoryOutputApplier(int memoriesCount, int memoryIndex) + private Dictionary> m_Memories; + + public BarracudaMemoryOutputApplier( + int memoriesCount, + int memoryIndex, + Dictionary> memories) { m_MemoriesCount = memoriesCount; m_MemoryIndex = memoryIndex; + m_Memories = memories; } - public void Apply(TensorProxy tensorProxy, - IEnumerable agents, - Dictionary> memories) + public void Apply(TensorProxy tensorProxy, IEnumerable agents) { var agentIndex = 0; var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1]; foreach (var agent in agents) { - var memory = memories.ElementAtOrDefault(agent.Info.id).Value; + var memory = m_Memories.ElementAtOrDefault(agent.Info.id).Value; - if (memory == null || memory.Count < memorySize * m_MemoriesCount) + if (memory == null || m_Memories.Count < memorySize * m_MemoriesCount) { memory = new List(); memory.AddRange(Enumerable.Repeat(0f, memorySize * m_MemoriesCount)); @@ -207,7 +206,7 @@ public void Apply(TensorProxy tensorProxy, memory[memorySize * m_MemoryIndex + j] = tensorProxy.data[agentIndex, j]; } - memories[agent.Info.id] = memory; + m_Memories[agent.Info.id] = memory; agentIndex++; } @@ -221,9 +220,7 @@ public void Apply(TensorProxy tensorProxy, /// public class ValueEstimateApplier : TensorApplier.IApplier { - public void Apply(TensorProxy tensorProxy, - IEnumerable agents, - Dictionary> memories) + public void Apply(TensorProxy tensorProxy, IEnumerable agents) { var agentIndex = 0; foreach (var agent in agents) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index cf21dc8b0a..c464a770e4 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -20,11 +20,7 @@ public BiDimensionalOutputGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); } @@ -43,11 +39,7 @@ public BatchSizeGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { tensorProxy.data?.Dispose(); tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1)); @@ -70,11 +62,7 @@ public SequenceLengthGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { tensorProxy.shape = new long[0]; tensorProxy.data?.Dispose(); @@ -97,11 +85,7 @@ public VectorObservationGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1]; @@ -126,17 +110,19 @@ public class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator private readonly int m_MemoryIndex; private readonly ITensorAllocator m_Allocator; - public BarracudaRecurrentInputGenerator(int memoryIndex, ITensorAllocator allocator) + private Dictionary> m_Memories; + + public BarracudaRecurrentInputGenerator( + int memoryIndex, + ITensorAllocator allocator, + Dictionary> memories) { m_MemoryIndex = memoryIndex; m_Allocator = allocator; + m_Memories = memories; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); @@ -145,7 +131,7 @@ public void Generate( foreach (var agent in agents) { var agentInfo = agent.Info; - var memory = memories.ElementAtOrDefault(agentInfo.id).Value; + var memory = m_Memories.ElementAtOrDefault(agentInfo.id).Value; var offset = memorySize * m_MemoryIndex; @@ -182,11 +168,7 @@ public PreviousActionInputGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); @@ -221,11 +203,7 @@ public ActionMaskInputGenerator(ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); @@ -261,11 +239,7 @@ public RandomNormalInputGenerator(int seed, ITensorAllocator allocator) m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal); @@ -291,11 +265,7 @@ public VisualObservationInputGenerator( m_Allocator = allocator; } - public void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories) + public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) { TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator); var agentIndex = 0; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs index 51a74f5e05..e21f0cef74 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs @@ -67,8 +67,10 @@ public ModelRunner( m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel); m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel); - m_TensorGenerator = new TensorGenerator(brainParameters, seed, m_TensorAllocator, barracudaModel); - m_TensorApplier = new TensorApplier(brainParameters, seed, m_TensorAllocator, barracudaModel); + m_TensorGenerator = new TensorGenerator( + brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel); + m_TensorApplier = new TensorApplier( + brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel); } private static Dictionary PrepareBarracudaInputs(IEnumerable infInputs) @@ -126,7 +128,7 @@ public void DecideBatch() Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors"); // Prepare the input tensors to be feed into the engine - m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Agents, m_Memories); + m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Agents); Profiler.EndSample(); Profiler.BeginSample($"MLAgents.{m_Model.name}.PrepareBarracudaInputs"); @@ -144,7 +146,7 @@ public void DecideBatch() Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors"); // Update the outputs - m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_Agents, m_Memories); + m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_Agents); Profiler.EndSample(); Profiler.EndSample(); diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs index 00a2be112c..379f6d0adc 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs @@ -30,12 +30,7 @@ public interface IApplier /// /// List of Agents that will receive the values of the Tensor. /// - /// - /// The memories of all the agents - /// - void Apply(TensorProxy tensorProxy, - IEnumerable agents, - Dictionary> memories); + void Apply(TensorProxy tensorProxy, IEnumerable agents); } private readonly Dictionary m_Dict = new Dictionary(); @@ -49,7 +44,11 @@ void Apply(TensorProxy tensorProxy, /// Tensor allocator /// public TensorApplier( - BrainParameters bp, int seed, ITensorAllocator allocator, object barracudaModel = null) + BrainParameters bp, + int seed, + ITensorAllocator allocator, + Dictionary> memories, + object barracudaModel = null) { m_Dict[TensorNames.ValueEstimateOutput] = new ValueEstimateApplier(); if (bp.vectorActionSpaceType == SpaceType.Continuous) @@ -61,7 +60,6 @@ public TensorApplier( m_Dict[TensorNames.ActionOutput] = new DiscreteActionOutputApplier(bp.vectorActionSize, seed, allocator); } - // m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier(); if (barracudaModel != null) { @@ -70,7 +68,7 @@ public TensorApplier( for (var i = 0; i < model?.memories.Length; i++) { m_Dict[model.memories[i].output] = - new BarracudaMemoryOutputApplier(model.memories.Length, i); + new BarracudaMemoryOutputApplier(model.memories.Length, i, memories); } } } @@ -83,7 +81,7 @@ public TensorApplier( /// One of the tensor does not have an /// associated applier. public void ApplyTensors( - IEnumerable tensors, IEnumerable agents, Dictionary> memories) + IEnumerable tensors, IEnumerable agents) { foreach (var tensor in tensors) { @@ -92,7 +90,7 @@ public void ApplyTensors( throw new UnityAgentsException( $"Unknown tensorProxy expected as output : {tensor.name}"); } - m_Dict[tensor.name].Apply(tensor, agents, memories); + m_Dict[tensor.name].Apply(tensor, agents); } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs index cd3853bbce..ae49f10a86 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs @@ -28,11 +28,7 @@ public interface IGenerator /// List of Agents containing the /// information that will be used to populate the tensor's data void Generate( - TensorProxy tensorProxy, - int batchSize, - IEnumerable agents, - Dictionary> memories - ); + TensorProxy tensorProxy, int batchSize, IEnumerable agents); } private readonly Dictionary m_Dict = new Dictionary(); @@ -46,7 +42,11 @@ Dictionary> memories /// Tensor allocator /// public TensorGenerator( - BrainParameters bp, int seed, ITensorAllocator allocator, object barracudaModel = null) + BrainParameters bp, + int seed, + ITensorAllocator allocator, + Dictionary> memories, + object barracudaModel = null) { // Generator for Inputs m_Dict[TensorNames.BatchSizePlaceholder] = @@ -64,7 +64,7 @@ public TensorGenerator( for (var i = 0; i < model?.memories.Length; i++) { m_Dict[model.memories[i].input] = - new BarracudaRecurrentInputGenerator(i, allocator); + new BarracudaRecurrentInputGenerator(i, allocator, memories); } } @@ -104,10 +104,7 @@ public void InitializeVisualObservations(Agent agent, ITensorAllocator allocator /// One of the tensor does not have an /// associated generator. public void GenerateTensors( - IEnumerable tensors, - int currentBatchSize, - IEnumerable agents, - Dictionary> memories) + IEnumerable tensors, int currentBatchSize, IEnumerable agents) { foreach (var tensor in tensors) { @@ -116,7 +113,7 @@ public void GenerateTensors( throw new UnityAgentsException( $"Unknown tensorProxy expected as input : {tensor.name}"); } - m_Dict[tensor.name].Generate(tensor, currentBatchSize, agents, memories); + m_Dict[tensor.name].Generate(tensor, currentBatchSize, agents); } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs index f1fe5852e6..97535f1708 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs @@ -19,7 +19,7 @@ public enum InferenceDevice public class BarracudaPolicy : IPolicy { - protected ModelRunner m_BatchedDecisionMaker; + protected ModelRunner m_ModelRunner; /// /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their sensors. @@ -35,7 +35,7 @@ public BarracudaPolicy( var aca = GameObject.FindObjectOfType(); aca.LazyInitialization(); var modelRunner = aca.GetOrCreateModelRunner(model, brainParameters, inferenceDevice); - m_BatchedDecisionMaker = modelRunner; + m_ModelRunner = modelRunner; } /// @@ -44,13 +44,13 @@ public void RequestDecision(Agent agent) #if DEBUG ValidateAgentSensorShapes(agent); #endif - m_BatchedDecisionMaker?.PutObservations(agent); + m_ModelRunner?.PutObservations(agent); } /// public void DecideAction() { - m_BatchedDecisionMaker?.DecideBatch(); + m_ModelRunner?.DecideBatch(); } /// diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index 152f6a8552..73d9caa78c 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict +from typing import Any, Dict, List import numpy as np import tensorflow as tf @@ -56,7 +56,7 @@ def __init__(self, seed, brain, trainer_parameters): self.seed = seed self.brain = brain self.use_recurrent = trainer_parameters["use_recurrent"] - self.memory_dict = {} + self.memory_dict: Dict[int, np.ndarray] = {} self.normalize = trainer_parameters.get("normalize", False) self.use_continuous_act = brain.vector_action_space_type == "continuous" self.model_path = trainer_parameters["model_path"] @@ -177,23 +177,21 @@ def make_empty_memory(self, num_agents): """ return np.zeros((num_agents, self.m_size)) - def save_memories(self, agent_ids, memory_matrix): - if not isinstance(memory_matrix, np.ndarray): - return - for index, id in enumerate(agent_ids): - self.memory_dict[id] = memory_matrix[index, :] - - def retrieve_memories(self, agent_ids): - memory_matrix = np.zeros((len(agent_ids), self.m_size)) - for index, id in enumerate(agent_ids): - if id in self.memory_dict.keys(): - memory_matrix[index, :] = self.memory_dict[id] + def save_memories(self, agent_ids: List[int], memory_matrix: np.ndarray) -> None: + for index, agent_id in enumerate(agent_ids): + self.memory_dict[agent_id] = memory_matrix[index, :] + + def retrieve_memories(self, agent_ids: List[int]) -> np.ndarray: + memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float) + for index, agent_id in enumerate(agent_ids): + if agent_id in self.memory_dict: + memory_matrix[index, :] = self.memory_dict[agent_id] return memory_matrix def remove_memories(self, agent_ids): - for id in agent_ids: - if id in self.memory_dict.keys(): - self.memory_dict.pop(id) + for agent_id in agent_ids: + if agent_id in self.memory_dict: + self.memory_dict.pop(agent_id) def get_current_step(self): """ From d9e72b6eaac6ffe78eb92936b605029e56657937 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 15:24:55 -0700 Subject: [PATCH 11/17] Passing by reference does not work. Do not merge --- .../Editor/Tests/EditModeTestInternalBrainTensorApplier.cs | 3 ++- .../Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs | 3 ++- .../Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs | 4 ++-- .../Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs | 5 ++++- .../Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs | 4 ++-- .../Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs | 4 ++-- .../ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs | 4 ++-- 7 files changed, 16 insertions(+), 11 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs index 1ac66d0607..c35e3dadf5 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs @@ -34,7 +34,8 @@ public void Construction() { var bp = new BrainParameters(); var alloc = new TensorCachingAllocator(); - var tensorGenerator = new TensorApplier(bp, 0, alloc, null); + var mem = new Dictionary>(); + var tensorGenerator = new TensorApplier(bp, 0, alloc, ref mem); Assert.IsNotNull(tensorGenerator); alloc.Dispose(); } diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs index a7815d9484..6761ce5acd 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs @@ -38,7 +38,8 @@ public void Construction() { var bp = new BrainParameters(); var alloc = new TensorCachingAllocator(); - var tensorGenerator = new TensorGenerator(bp, 0, alloc, null); + var mem = new Dictionary>(); + var tensorGenerator = new TensorGenerator(bp, 0, alloc, ref mem); Assert.IsNotNull(tensorGenerator); alloc.Dispose(); } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs index a0450bb449..2fc981f293 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs @@ -179,7 +179,7 @@ public class BarracudaMemoryOutputApplier : TensorApplier.IApplier public BarracudaMemoryOutputApplier( int memoriesCount, int memoryIndex, - Dictionary> memories) + ref Dictionary> memories) { m_MemoriesCount = memoriesCount; m_MemoryIndex = memoryIndex; @@ -207,7 +207,7 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) } m_Memories[agent.Info.id] = memory; - + UnityEngine.Debug.Log(m_Memories[agent.Info.id].Count + " " + agent.Info.id); agentIndex++; } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index c464a770e4..c1d2f6ed37 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -115,11 +115,12 @@ public class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator public BarracudaRecurrentInputGenerator( int memoryIndex, ITensorAllocator allocator, - Dictionary> memories) + ref Dictionary> memories) { m_MemoryIndex = memoryIndex; m_Allocator = allocator; m_Memories = memories; + } public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable agents) @@ -135,6 +136,7 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable var offset = memorySize * m_MemoryIndex; + UnityEngine.Debug.Log(memory + " " + agent.Info.id); if (memory == null) { agentIndex++; @@ -146,6 +148,7 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable { break; } + tensorProxy.data[agentIndex, j] = memory[j + offset]; } agentIndex++; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs index e21f0cef74..c9978d1297 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs @@ -68,9 +68,9 @@ public ModelRunner( m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel); m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel); m_TensorGenerator = new TensorGenerator( - brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel); + brainParameters, seed, m_TensorAllocator, ref m_Memories, barracudaModel); m_TensorApplier = new TensorApplier( - brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel); + brainParameters, seed, m_TensorAllocator, ref m_Memories, barracudaModel); } private static Dictionary PrepareBarracudaInputs(IEnumerable infInputs) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs index 379f6d0adc..adfc2ecac6 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs @@ -47,7 +47,7 @@ public TensorApplier( BrainParameters bp, int seed, ITensorAllocator allocator, - Dictionary> memories, + ref Dictionary> memories, object barracudaModel = null) { m_Dict[TensorNames.ValueEstimateOutput] = new ValueEstimateApplier(); @@ -68,7 +68,7 @@ public TensorApplier( for (var i = 0; i < model?.memories.Length; i++) { m_Dict[model.memories[i].output] = - new BarracudaMemoryOutputApplier(model.memories.Length, i, memories); + new BarracudaMemoryOutputApplier(model.memories.Length, i, ref memories); } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs index ae49f10a86..0cbb22e6e6 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs @@ -45,7 +45,7 @@ public TensorGenerator( BrainParameters bp, int seed, ITensorAllocator allocator, - Dictionary> memories, + ref Dictionary> memories, object barracudaModel = null) { // Generator for Inputs @@ -64,7 +64,7 @@ public TensorGenerator( for (var i = 0; i < model?.memories.Length; i++) { m_Dict[model.memories[i].input] = - new BarracudaRecurrentInputGenerator(i, allocator, memories); + new BarracudaRecurrentInputGenerator(i, allocator, ref memories); } } From 876e30f9822bd62547e7dbe01bed5ed059dd3a65 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 16:13:55 -0700 Subject: [PATCH 12/17] Fixing huge bug in Inference --- .../Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs | 5 ++--- .../Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs | 5 +---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs index 2fc981f293..39ec4d6c7e 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs @@ -193,9 +193,9 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) foreach (var agent in agents) { - var memory = m_Memories.ElementAtOrDefault(agent.Info.id).Value; + var memory = m_Memories.ContainsKey(agent.Info.id) ? m_Memories[agent.Info.id] : null; - if (memory == null || m_Memories.Count < memorySize * m_MemoriesCount) + if (memory == null || memory.Count < memorySize * m_MemoriesCount) { memory = new List(); memory.AddRange(Enumerable.Repeat(0f, memorySize * m_MemoriesCount)); @@ -207,7 +207,6 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) } m_Memories[agent.Info.id] = memory; - UnityEngine.Debug.Log(m_Memories[agent.Info.id].Count + " " + agent.Info.id); agentIndex++; } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index c1d2f6ed37..f584c6d9c6 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -131,12 +131,9 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable var agentIndex = 0; foreach (var agent in agents) { - var agentInfo = agent.Info; - var memory = m_Memories.ElementAtOrDefault(agentInfo.id).Value; - + var memory = m_Memories.ContainsKey(agent.Info.id) ? m_Memories[agent.Info.id] : null; var offset = memorySize * m_MemoryIndex; - UnityEngine.Debug.Log(memory + " " + agent.Info.id); if (memory == null) { agentIndex++; From 6595e7630abecbf2c7f092d9c5c61c2025a71736 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 16:26:50 -0700 Subject: [PATCH 13/17] Applying patches --- .../Grpc/CommunicatorObjects/UnityToExternalGrpc.cs | 8 ++++---- .../envs/communicator_objects/unity_to_external_pb2.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs index b10d2baa9b..2ab1cbe0a4 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityToExternalGrpc.cs @@ -3,7 +3,7 @@ // Generated by the protocol buffer compiler. DO NOT EDIT! // source: mlagents/envs/communicator_objects/unity_to_external.proto // -#pragma warning disable 1591 +#pragma warning disable 0414, 1591 #region Designer generated code using grpc = global::Grpc.Core; @@ -13,14 +13,14 @@ public static partial class UnityToExternalProto { static readonly string __ServiceName = "communicator_objects.UnityToExternalProto"; - static readonly grpc::Marshaller __Marshaller_UnityMessageProto = grpc::Marshallers.Create((arg) => global::Google.Protobuf.MessageExtensions.ToByteArray(arg), global::MLAgents.CommunicatorObjects.UnityMessageProto.Parser.ParseFrom); + static readonly grpc::Marshaller __Marshaller_communicator_objects_UnityMessageProto = grpc::Marshallers.Create((arg) => global::Google.Protobuf.MessageExtensions.ToByteArray(arg), global::MLAgents.CommunicatorObjects.UnityMessageProto.Parser.ParseFrom); static readonly grpc::Method __Method_Exchange = new grpc::Method( grpc::MethodType.Unary, __ServiceName, "Exchange", - __Marshaller_UnityMessageProto, - __Marshaller_UnityMessageProto); + __Marshaller_communicator_objects_UnityMessageProto, + __Marshaller_communicator_objects_UnityMessageProto); /// Service descriptor public static global::Google.Protobuf.Reflection.ServiceDescriptor Descriptor diff --git a/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py b/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py index dc1f03e387..5e18fa5204 100644 --- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py +++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_to_external_pb2.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: mlagents/envs/communicator_objects/unity_to_external.proto @@ -8,6 +7,7 @@ from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database +from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -20,7 +20,6 @@ name='mlagents/envs/communicator_objects/unity_to_external.proto', package='communicator_objects', syntax='proto3', - serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'), serialized_pb=_b('\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2v\n\x14UnityToExternalProto\x12^\n\x08\x45xchange\x12\'.communicator_objects.UnityMessageProto\x1a\'.communicator_objects.UnityMessageProto\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3') , dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,]) @@ -30,14 +29,15 @@ _sym_db.RegisterFileDescriptor(DESCRIPTOR) -DESCRIPTOR._options = None +DESCRIPTOR.has_options = True +DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\034MLAgents.CommunicatorObjects')) _UNITYTOEXTERNALPROTO = _descriptor.ServiceDescriptor( name='UnityToExternalProto', full_name='communicator_objects.UnityToExternalProto', file=DESCRIPTOR, index=0, - serialized_options=None, + options=None, serialized_start=140, serialized_end=258, methods=[ @@ -48,7 +48,7 @@ containing_service=None, input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO, output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGEPROTO, - serialized_options=None, + options=None, ), ]) _sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNALPROTO) From b1b90f6560997971a393b5131b635844d9df5b2b Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 24 Oct 2019 16:58:26 -0700 Subject: [PATCH 14/17] fixing tests --- ml-agents-envs/mlagents/envs/mock_communicator.py | 1 - ml-agents/mlagents/trainers/tests/test_policy.py | 1 + ml-agents/mlagents/trainers/tf_policy.py | 8 ++++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ml-agents-envs/mlagents/envs/mock_communicator.py b/ml-agents-envs/mlagents/envs/mock_communicator.py index d3cc1e8f88..c360ea94ef 100755 --- a/ml-agents-envs/mlagents/envs/mock_communicator.py +++ b/ml-agents-envs/mlagents/envs/mock_communicator.py @@ -84,7 +84,6 @@ def _get_agent_infos(self): stored_vector_actions=vector_action, stored_text_actions="", text_observation="", - memories=[], done=(i == 2), max_step_reached=False, id=i, diff --git a/ml-agents/mlagents/trainers/tests/test_policy.py b/ml-agents/mlagents/trainers/tests/test_policy.py index 2b6a1fee88..1a3ed31756 100644 --- a/ml-agents/mlagents/trainers/tests/test_policy.py +++ b/ml-agents/mlagents/trainers/tests/test_policy.py @@ -27,6 +27,7 @@ def test_take_action_returns_nones_on_missing_values(): test_seed = 3 policy = TFPolicy(test_seed, basic_mock_brain(), basic_params()) policy.evaluate = MagicMock(return_value={}) + policy.save_memories = MagicMock() brain_info_with_agents = BrainInfo( [], [], [], agents=["an-agent-id"], local_done=[False] ) diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index 73d9caa78c..cb497e6522 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional import numpy as np import tensorflow as tf @@ -177,7 +177,11 @@ def make_empty_memory(self, num_agents): """ return np.zeros((num_agents, self.m_size)) - def save_memories(self, agent_ids: List[int], memory_matrix: np.ndarray) -> None: + def save_memories( + self, agent_ids: List[int], memory_matrix: Optional[np.ndarray] + ) -> None: + if memory_matrix is None: + return for index, agent_id in enumerate(agent_ids): self.memory_dict[agent_id] = memory_matrix[index, :] From b6bf28f98c0aa66a569468b055c04a9aa229499d Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 25 Oct 2019 10:18:37 -0700 Subject: [PATCH 15/17] Addressing comments --- .../Tests/EditModeTestInternalBrainTensorApplier.cs | 2 +- .../Tests/EditModeTestInternalBrainTensorGenerator.cs | 2 +- UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs | 5 +++++ .../ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs | 8 ++++---- .../ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs | 7 +++---- .../ML-Agents/Scripts/InferenceBrain/ModelRunner.cs | 4 ++-- .../ML-Agents/Scripts/InferenceBrain/TensorApplier.cs | 4 ++-- .../ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs | 6 ++---- ml-agents/mlagents/trainers/tf_policy.py | 6 +++--- 9 files changed, 23 insertions(+), 21 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs index c35e3dadf5..e721ab0b11 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs @@ -35,7 +35,7 @@ public void Construction() var bp = new BrainParameters(); var alloc = new TensorCachingAllocator(); var mem = new Dictionary>(); - var tensorGenerator = new TensorApplier(bp, 0, alloc, ref mem); + var tensorGenerator = new TensorApplier(bp, 0, alloc, mem); Assert.IsNotNull(tensorGenerator); alloc.Dispose(); } diff --git a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs index 6761ce5acd..5aaefdbf82 100644 --- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs @@ -39,7 +39,7 @@ public void Construction() var bp = new BrainParameters(); var alloc = new TensorCachingAllocator(); var mem = new Dictionary>(); - var tensorGenerator = new TensorGenerator(bp, 0, alloc, ref mem); + var tensorGenerator = new TensorGenerator(bp, 0, alloc, mem); Assert.IsNotNull(tensorGenerator); alloc.Dispose(); } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs index a706223c10..3f81fa23ce 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs @@ -148,6 +148,11 @@ public interface ICommunicator /// Agent info. void PutObservations(string brainKey, Agent agent); + /// + /// Signals the ICommunicator that the Agents are now ready to receive their action + /// and that if the communicator has not yet received an action for one of the Agents + /// it needs to get one at this point. + /// void DecideBatch(); /// diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs index 39ec4d6c7e..582f5af51f 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs @@ -179,7 +179,7 @@ public class BarracudaMemoryOutputApplier : TensorApplier.IApplier public BarracudaMemoryOutputApplier( int memoriesCount, int memoryIndex, - ref Dictionary> memories) + Dictionary> memories) { m_MemoriesCount = memoriesCount; m_MemoryIndex = memoryIndex; @@ -193,9 +193,9 @@ public void Apply(TensorProxy tensorProxy, IEnumerable agents) foreach (var agent in agents) { - var memory = m_Memories.ContainsKey(agent.Info.id) ? m_Memories[agent.Info.id] : null; - - if (memory == null || memory.Count < memorySize * m_MemoriesCount) + List memory = null; + if (!m_Memories.TryGetValue(agent.Info.id, out memory) + || memory.Count < memorySize * m_MemoriesCount) { memory = new List(); memory.AddRange(Enumerable.Repeat(0f, memorySize * m_MemoriesCount)); diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs index f584c6d9c6..b89fc8b359 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs @@ -115,7 +115,7 @@ public class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator public BarracudaRecurrentInputGenerator( int memoryIndex, ITensorAllocator allocator, - ref Dictionary> memories) + Dictionary> memories) { m_MemoryIndex = memoryIndex; m_Allocator = allocator; @@ -131,10 +131,9 @@ public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable var agentIndex = 0; foreach (var agent in agents) { - var memory = m_Memories.ContainsKey(agent.Info.id) ? m_Memories[agent.Info.id] : null; var offset = memorySize * m_MemoryIndex; - - if (memory == null) + List memory = null; + if (!m_Memories.TryGetValue(agent.Info.id, out memory)) { agentIndex++; continue; diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs index c9978d1297..e21f0cef74 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs @@ -68,9 +68,9 @@ public ModelRunner( m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel); m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel); m_TensorGenerator = new TensorGenerator( - brainParameters, seed, m_TensorAllocator, ref m_Memories, barracudaModel); + brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel); m_TensorApplier = new TensorApplier( - brainParameters, seed, m_TensorAllocator, ref m_Memories, barracudaModel); + brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel); } private static Dictionary PrepareBarracudaInputs(IEnumerable infInputs) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs index adfc2ecac6..379f6d0adc 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs @@ -47,7 +47,7 @@ public TensorApplier( BrainParameters bp, int seed, ITensorAllocator allocator, - ref Dictionary> memories, + Dictionary> memories, object barracudaModel = null) { m_Dict[TensorNames.ValueEstimateOutput] = new ValueEstimateApplier(); @@ -68,7 +68,7 @@ public TensorApplier( for (var i = 0; i < model?.memories.Length; i++) { m_Dict[model.memories[i].output] = - new BarracudaMemoryOutputApplier(model.memories.Length, i, ref memories); + new BarracudaMemoryOutputApplier(model.memories.Length, i, memories); } } } diff --git a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs index 0cbb22e6e6..38050e8c96 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs @@ -45,7 +45,7 @@ public TensorGenerator( BrainParameters bp, int seed, ITensorAllocator allocator, - ref Dictionary> memories, + Dictionary> memories, object barracudaModel = null) { // Generator for Inputs @@ -55,8 +55,6 @@ public TensorGenerator( new SequenceLengthGenerator(allocator); m_Dict[TensorNames.VectorObservationPlacholder] = new VectorObservationGenerator(allocator); - // m_Dict[TensorNames.RecurrentInPlaceholder] = - // new RecurrentInputGenerator(allocator); if (barracudaModel != null) { @@ -64,7 +62,7 @@ public TensorGenerator( for (var i = 0; i < model?.memories.Length; i++) { m_Dict[model.memories[i].input] = - new BarracudaRecurrentInputGenerator(i, allocator, ref memories); + new BarracudaRecurrentInputGenerator(i, allocator, memories); } } diff --git a/ml-agents/mlagents/trainers/tf_policy.py b/ml-agents/mlagents/trainers/tf_policy.py index cb497e6522..e3a64aaa72 100644 --- a/ml-agents/mlagents/trainers/tf_policy.py +++ b/ml-agents/mlagents/trainers/tf_policy.py @@ -126,9 +126,9 @@ def get_action(self, brain_info: BrainInfo) -> ActionInfo: self.remove_memories( [ - brain_info.agents[i] - for i in range(len(brain_info.agents)) - if brain_info.local_done[i] + agent + for agent, done in zip(brain_info.agents, brain_info.local_done) + if done ] ) run_out = self.evaluate(brain_info) From d794531544ae9dc29d889175a5e29dcba1db323b Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 25 Oct 2019 10:19:41 -0700 Subject: [PATCH 16/17] Renaming variable to reflect type --- UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs b/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs index dfc3a2cb99..4f92c8ba21 100644 --- a/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs +++ b/UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs @@ -11,7 +11,7 @@ public class RemotePolicy : IPolicy { private string m_BehaviorName; - protected ICommunicator m_BatchedDecisionMaker; + protected ICommunicator m_Communicator; /// /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their sensors. @@ -26,7 +26,7 @@ public RemotePolicy( m_BehaviorName = behaviorName; var aca = GameObject.FindObjectOfType(); aca.LazyInitialization(); - m_BatchedDecisionMaker = aca.Communicator; + m_Communicator = aca.Communicator; aca.Communicator.SubscribeBrain(m_BehaviorName, brainParameters); } @@ -36,13 +36,13 @@ public void RequestDecision(Agent agent) #if DEBUG ValidateAgentSensorShapes(agent); #endif - m_BatchedDecisionMaker?.PutObservations(m_BehaviorName, agent); + m_Communicator?.PutObservations(m_BehaviorName, agent); } /// public void DecideAction() { - m_BatchedDecisionMaker?.DecideBatch(); + m_Communicator?.DecideBatch(); } /// From 78e4a314666de7604b0c1ec2fd9cc8fe13154b57 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 25 Oct 2019 10:46:24 -0700 Subject: [PATCH 17/17] test