From 377a25c455acbb9a0e5af31cccf9df3916d06c9f Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES Date: Mon, 30 Mar 2020 14:18:31 -0700 Subject: [PATCH] Hotfixes for Release 0.15.1 (#3698) * [bug-fix] Increase height of wall in CrawlerStatic (#3650) * [bug-fix] Improve performance for PPO with continuous actions (#3662) * Corrected a typo in a name of a function (#3670) OnEpsiodeBegin was corrected to OnEpisodeBegin in Migrating.md document * Add Academy.AutomaticSteppingEnabled to migration (#3666) * Fix editor port in Dockerfile (#3674) * Hotfix memory leak on Python (#3664) * Hotfix memory leak on Python * Fixing * Fixing a bug in the heuristic policy. A decision should not be requested when the agent is done * [bug-fix] Make Python able to deal with 0-step episodes (#3671) * adding some comments Co-authored-by: Ervin T * Remove vis_encode_type from list of required (#3677) * Update changelog (#3678) * Shorten timeout duration for environment close (#3679) The timeout duration for closing an environment was set to the same duration as the timeout when waiting for a response from the still-running environment. This led to long waits for the error response when communication version wasn't matching. This change forces a timeout duration of 0 when handling errors. * Bumping the versions * handle multiple dones in a single step (#3700) * handle multiple dones in a single step * [tests] Make end-to-end tests more stable (#3697) * [bug-fix] Fix entropy computation for GaussianDistribution (#3684) * Fix how we set logging levels (#3703) * cleanup logging * comments and cleanup * pylint, gym * [skip-ci] Update changelog for logging fix. (#3707) * [skip ci] Update README * [skip ci] Fixed a typo Co-authored-by: Ervin T Co-authored-by: Adam Streck Co-authored-by: Chris Elion Co-authored-by: Jonathan Harper --- .pylintrc | 2 + Dockerfile | 6 ++- .../Crawler/Prefabs/FixedPlatform.prefab | 13 +++-- README.md | 3 +- com.unity.ml-agents/CHANGELOG.md | 11 +++++ com.unity.ml-agents/Runtime/Academy.cs | 2 +- com.unity.ml-agents/Runtime/Agent.cs | 3 +- .../Runtime/Communicator/RpcCommunicator.cs | 17 +++++-- .../Runtime/Policies/HeuristicPolicy.cs | 5 +- com.unity.ml-agents/package.json | 2 +- docs/Migrating.md | 4 +- gym-unity/gym_unity/__init__.py | 2 +- gym-unity/gym_unity/envs/__init__.py | 32 ++++++++----- gym-unity/gym_unity/tests/test_gym.py | 48 +++++++++++++++++++ ml-agents-envs/mlagents_envs/__init__.py | 2 +- ml-agents-envs/mlagents_envs/environment.py | 23 ++++++--- ml-agents-envs/mlagents_envs/logging_util.py | 46 ++++++++++++++++++ .../side_channel/outgoing_message.py | 4 +- .../side_channel/side_channel.py | 4 +- ml-agents/mlagents/logging_util.py | 10 ---- ml-agents/mlagents/model_serialization.py | 5 +- ml-agents/mlagents/trainers/__init__.py | 2 +- .../mlagents/trainers/agent_processor.py | 31 +++++++----- .../components/reward_signals/__init__.py | 5 +- ml-agents/mlagents/trainers/curriculum.py | 4 +- ml-agents/mlagents/trainers/distributions.py | 38 ++++++++++----- ml-agents/mlagents/trainers/env_manager.py | 5 +- ml-agents/mlagents/trainers/ghost/trainer.py | 5 +- ml-agents/mlagents/trainers/learn.py | 19 ++++---- .../mlagents/trainers/meta_curriculum.py | 4 +- .../mlagents/trainers/policy/nn_policy.py | 1 + .../mlagents/trainers/policy/tf_policy.py | 23 +++------ ml-agents/mlagents/trainers/ppo/trainer.py | 4 +- ml-agents/mlagents/trainers/sac/optimizer.py | 5 +- ml-agents/mlagents/trainers/sac/trainer.py | 6 +-- ml-agents/mlagents/trainers/stats.py | 7 ++- .../trainers/subprocess_env_manager.py | 5 +- .../trainers/tests/simple_test_envs.py | 2 +- .../trainers/tests/test_agent_processor.py | 9 ++++ .../trainers/tests/test_distributions.py | 10 +++- .../mlagents/trainers/tests/test_simple_rl.py | 23 ++++----- .../mlagents/trainers/trainer/trainer.py | 5 +- .../mlagents/trainers/trainer_controller.py | 4 +- ml-agents/mlagents/trainers/trainer_util.py | 5 +- setup.cfg | 1 + 45 files changed, 320 insertions(+), 147 deletions(-) create mode 100644 ml-agents-envs/mlagents_envs/logging_util.py delete mode 100644 ml-agents/mlagents/logging_util.py diff --git a/.pylintrc b/.pylintrc index 40f89e8708..1da1dad0d4 100644 --- a/.pylintrc +++ b/.pylintrc @@ -44,3 +44,5 @@ disable = # Appears to be https://github.com/PyCQA/pylint/issues/2981 W0201, + # Using the global statement + W0603, diff --git a/Dockerfile b/Dockerfile index 9262f6db3a..ee3fba449a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -132,7 +132,9 @@ COPY ml-agents /ml-agents WORKDIR /ml-agents RUN pip install -e . -# port 5005 is the port used in in Editor training. -EXPOSE 5005 +# Port 5004 is the port used in in Editor training. +# Environments will start from port 5005, +# so allow enough ports for several environments. +EXPOSE 5004-5050 ENTRYPOINT ["mlagents-learn"] diff --git a/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab b/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab index 8d7c55b6e1..8e6da3ace8 100644 --- a/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab +++ b/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab @@ -1690,8 +1690,8 @@ MonoBehaviour: m_InferenceDevice: 0 m_BehaviorType: 0 m_BehaviorName: CrawlerStatic - m_TeamID: 0 - m_useChildSensors: 1 + TeamId: 0 + m_UseChildSensors: 1 --- !u!114 &114230237520033992 MonoBehaviour: m_ObjectHideFlags: 0 @@ -1704,6 +1704,9 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: 2f37c30a5e8d04117947188818902ef3, type: 3} m_Name: m_EditorClassIdentifier: + agentParameters: + maxStep: 0 + hasUpgradedFromAgentParameters: 1 maxStep: 5000 target: {fileID: 4749909135913778} ground: {fileID: 4856650706546504} @@ -1759,7 +1762,7 @@ MonoBehaviour: m_Name: m_EditorClassIdentifier: DecisionPeriod: 5 - RepeatAction: 0 + TakeActionsBetweenDecisions: 0 offsetStep: 0 --- !u!1 &1492926997393242 GameObject: @@ -2959,8 +2962,8 @@ Transform: m_PrefabAsset: {fileID: 0} m_GameObject: {fileID: 1995322274649904} m_LocalRotation: {x: 0, y: -0, z: -0, w: 1} - m_LocalPosition: {x: -0, y: 0.5, z: 0} - m_LocalScale: {x: 0.01, y: 0.01, z: 0.01} + m_LocalPosition: {x: -0, y: 1.5, z: 0} + m_LocalScale: {x: 0.01, y: 0.03, z: 0.01} m_Children: [] m_Father: {fileID: 4924174722017668} m_RootOrder: 1 diff --git a/README.md b/README.md index 85c355d9b7..11aa0511d4 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ developer communities. * Train using concurrent Unity environment instances ## Releases & Documentation -**Our latest, stable release is 0.15.0. Click +**Our latest, stable release is 0.15.1. Click [here](docs/Readme.md) to get started with the latest release of ML-Agents.** @@ -61,6 +61,7 @@ details of the changes between versions. | **Version** | **Release Date** | **Source** | **Documentation** | **Download** | |:-------:|:------:|:-------------:|:-------:|:------------:| +| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) | | **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) | | **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) | | **0.13.1** | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) | diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 61c530b825..b25d43a182 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -5,6 +5,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [0.15.1-preview] - 2020-03-30 +### Bug Fixes + - Raise the wall in CrawlerStatic scene to prevent Agent from falling off. (#3650) + - Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677) + - Fixed the reported entropy values for continuous actions (#3684) + - Fixed an issue where switching models using `SetModel()` during training would use an excessive amount of memory. (#3664) + - Environment subprocesses now close immediately on timeout or wrong API version. (#3679) + - Fixed an issue in the gym wrapper that would raise an exception if an Agent called EndEpisode multiple times in the same step. (#3700) + - Fixed an issue where logging output was not visible; logging levels are now set consistently (#3703). + + ## [0.15.0-preview] - 2020-03-18 ### Major Changes - `Agent.CollectObservations` now takes a VectorSensor argument. (#3352, #3389) diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs index 82d851e72b..7375e45eb9 100644 --- a/com.unity.ml-agents/Runtime/Academy.cs +++ b/com.unity.ml-agents/Runtime/Academy.cs @@ -64,7 +64,7 @@ public class Academy : IDisposable /// Unity package version of com.unity.ml-agents. /// This must match the version string in package.json and is checked in a unit test. /// - internal const string k_PackageVersion = "0.15.0-preview"; + internal const string k_PackageVersion = "0.15.1-preview"; const int k_EditorTrainingPort = 5004; diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs index 838cd2cbf2..4e703f65ec 100644 --- a/com.unity.ml-agents/Runtime/Agent.cs +++ b/com.unity.ml-agents/Runtime/Agent.cs @@ -315,6 +315,7 @@ protected virtual void OnDisable() void NotifyAgentDone(DoneReason doneReason) { + m_Info.episodeId = m_EpisodeId; m_Info.reward = m_Reward; m_Info.done = true; m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached; @@ -376,7 +377,7 @@ public void SetModel( // If everything is the same, don't make any changes. return; } - + NotifyAgentDone(DoneReason.Disabled); m_PolicyFactory.model = model; m_PolicyFactory.inferenceDevice = inferenceDevice; m_PolicyFactory.behaviorName = behaviorName; diff --git a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs index edbf1d9e64..15637b2582 100644 --- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs +++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs @@ -458,13 +458,20 @@ UnityRLInitializationOutputProto GetTempUnityRlInitializationOutput() { if (m_CurrentUnityRlOutput.AgentInfos.ContainsKey(behaviorName)) { - if (output == null) + if (m_CurrentUnityRlOutput.AgentInfos[behaviorName].CalculateSize() > 0) { - output = new UnityRLInitializationOutputProto(); - } + // Only send the BrainParameters if there is a non empty list of + // AgentInfos ready to be sent. + // This is to ensure that The Python side will always have a first + // observation when receiving the BrainParameters + if (output == null) + { + output = new UnityRLInitializationOutputProto(); + } - var brainParameters = m_UnsentBrainKeys[behaviorName]; - output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true)); + var brainParameters = m_UnsentBrainKeys[behaviorName]; + output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true)); + } } } diff --git a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs index 7806732a32..84c57ad73e 100644 --- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs +++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs @@ -29,7 +29,10 @@ public HeuristicPolicy(Func heuristic) public void RequestDecision(AgentInfo info, List sensors) { StepSensors(sensors); - m_LastDecision = m_Heuristic.Invoke(); + if (!info.done) + { + m_LastDecision = m_Heuristic.Invoke(); + } } /// diff --git a/com.unity.ml-agents/package.json b/com.unity.ml-agents/package.json index 7e4b1d4e74..faf047b163 100755 --- a/com.unity.ml-agents/package.json +++ b/com.unity.ml-agents/package.json @@ -1,7 +1,7 @@ { "name": "com.unity.ml-agents", "displayName": "ML Agents", - "version": "0.15.0-preview", + "version": "0.15.1-preview", "unity": "2018.4", "description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.", "dependencies": { diff --git a/docs/Migrating.md b/docs/Migrating.md index c9a99efbba..654fd3ba7c 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -34,6 +34,7 @@ The versions can be found in * The interface for SideChannels was changed: * In C#, `OnMessageReceived` now takes a `IncomingMessage` argument, and `QueueMessageToSend` takes an `OutgoingMessage` argument. * In python, `on_message_received` now takes a `IncomingMessage` argument, and `queue_message_to_send` takes an `OutgoingMessage` argument. + * Automatic stepping for Academy is now controlled from the AutomaticSteppingEnabled property. ### Steps to Migrate * Add the `using MLAgents.Sensors;` in addition to `using MLAgents;` on top of your Agent's script. @@ -45,11 +46,12 @@ The versions can be found in * We strongly recommend replacing the following methods with their new equivalent as they will be removed in a later release: * `InitializeAgent()` to `Initialize()` * `AgentAction()` to `OnActionReceived()` - * `AgentReset()` to `OnEpsiodeBegin()` + * `AgentReset()` to `OnEpisodeBegin()` * `Done()` to `EndEpisode()` * `GiveModel()` to `SetModel()` * Replace `IFloatProperties` variables with `FloatPropertiesChannel` variables. * If you implemented custom `SideChannels`, update the signatures of your methods, and add your data to the `OutgoingMessage` or read it from the `IncomingMessage`. +* Replace calls to Academy.EnableAutomaticStepping()/DisableAutomaticStepping() with Academy.AutomaticSteppingEnabled = true/false. ## Migrating from 0.13 to 0.14 diff --git a/gym-unity/gym_unity/__init__.py b/gym-unity/gym_unity/__init__.py index 9da2f8fcca..903e77ce1b 100644 --- a/gym-unity/gym_unity/__init__.py +++ b/gym-unity/gym_unity/__init__.py @@ -1 +1 @@ -__version__ = "0.15.0" +__version__ = "0.15.1" diff --git a/gym-unity/gym_unity/envs/__init__.py b/gym-unity/gym_unity/envs/__init__.py index 0066c71664..5077758d96 100644 --- a/gym-unity/gym_unity/envs/__init__.py +++ b/gym-unity/gym_unity/envs/__init__.py @@ -1,4 +1,3 @@ -import logging import itertools import numpy as np from typing import Any, Dict, List, Optional, Tuple, Union @@ -8,6 +7,7 @@ from mlagents_envs.environment import UnityEnvironment from mlagents_envs.base_env import BatchedStepResult +from mlagents_envs import logging_util class UnityGymException(error.Error): @@ -18,9 +18,8 @@ class UnityGymException(error.Error): pass -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger("gym_unity") - +logger = logging_util.get_logger(__name__) +logging_util.set_log_level(logging_util.INFO) GymSingleStepResult = Tuple[np.ndarray, float, bool, Dict] GymMultiStepResult = Tuple[List[np.ndarray], List[float], List[bool], Dict] @@ -364,9 +363,8 @@ def _check_agents(self, n_agents: int) -> None: def _sanitize_info(self, step_result: BatchedStepResult) -> BatchedStepResult: n_extra_agents = step_result.n_agents() - self._n_agents - if n_extra_agents < 0 or n_extra_agents > self._n_agents: + if n_extra_agents < 0: # In this case, some Agents did not request a decision when expected - # or too many requested a decision raise UnityGymException( "The number of agents in the scene does not match the expected number." ) @@ -386,6 +384,10 @@ def _sanitize_info(self, step_result: BatchedStepResult) -> BatchedStepResult: # only cares about the ordering. for index, agent_id in enumerate(step_result.agent_id): if not self._previous_step_result.contains_agent(agent_id): + if step_result.done[index]: + # If the Agent is already done (e.g. it ended its epsiode twice in one step) + # Don't try to register it here. + continue # Register this agent, and get the reward of the previous agent that # was in its index, so that we can return it to the gym. last_reward = self.agent_mapper.register_new_agent_id(agent_id) @@ -528,8 +530,12 @@ def mark_agent_done(self, agent_id: int, reward: float) -> None: """ Declare the agent done with the corresponding final reward. """ - gym_index = self._agent_id_to_gym_index.pop(agent_id) - self._done_agents_index_to_last_reward[gym_index] = reward + if agent_id in self._agent_id_to_gym_index: + gym_index = self._agent_id_to_gym_index.pop(agent_id) + self._done_agents_index_to_last_reward[gym_index] = reward + else: + # Agent was never registered in the first place (e.g. EndEpisode called multiple times) + pass def register_new_agent_id(self, agent_id: int) -> float: """ @@ -581,9 +587,13 @@ def set_initial_agents(self, agent_ids: List[int]) -> None: self._gym_id_order = list(agent_ids) def mark_agent_done(self, agent_id: int, reward: float) -> None: - gym_index = self._gym_id_order.index(agent_id) - self._done_agents_index_to_last_reward[gym_index] = reward - self._gym_id_order[gym_index] = -1 + try: + gym_index = self._gym_id_order.index(agent_id) + self._done_agents_index_to_last_reward[gym_index] = reward + self._gym_id_order[gym_index] = -1 + except ValueError: + # Agent was never registered in the first place (e.g. EndEpisode called multiple times) + pass def register_new_agent_id(self, agent_id: int) -> float: original_index = self._gym_id_order.index(-1) diff --git a/gym-unity/gym_unity/tests/test_gym.py b/gym-unity/gym_unity/tests/test_gym.py index 3ee6e67913..1e691397e0 100644 --- a/gym-unity/gym_unity/tests/test_gym.py +++ b/gym-unity/gym_unity/tests/test_gym.py @@ -129,6 +129,50 @@ def test_sanitize_action_one_agent_done(mock_env): assert expected_agent_id == agent_id +@mock.patch("gym_unity.envs.UnityEnvironment") +def test_sanitize_action_new_agent_done(mock_env): + mock_spec = create_mock_group_spec( + vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3] + ) + mock_step = create_mock_vector_step_result(num_agents=3) + mock_step.agent_id = np.array(range(5)) + setup_mock_unityenvironment(mock_env, mock_spec, mock_step) + env = UnityEnv(" ", use_visual=False, multiagent=True) + + received_step_result = create_mock_vector_step_result(num_agents=7) + received_step_result.agent_id = np.array(range(7)) + # agent #3 (id = 2) is Done + # so is the "new" agent (id = 5) + done = [False] * 7 + done[2] = True + done[5] = True + received_step_result.done = np.array(done) + sanitized_result = env._sanitize_info(received_step_result) + for expected_agent_id, agent_id in zip([0, 1, 6, 3, 4], sanitized_result.agent_id): + assert expected_agent_id == agent_id + + +@mock.patch("gym_unity.envs.UnityEnvironment") +def test_sanitize_action_single_agent_multiple_done(mock_env): + mock_spec = create_mock_group_spec( + vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3] + ) + mock_step = create_mock_vector_step_result(num_agents=1) + mock_step.agent_id = np.array(range(1)) + setup_mock_unityenvironment(mock_env, mock_spec, mock_step) + env = UnityEnv(" ", use_visual=False, multiagent=False) + + received_step_result = create_mock_vector_step_result(num_agents=3) + received_step_result.agent_id = np.array(range(3)) + # original agent (id = 0) is Done + # so is the "new" agent (id = 1) + done = [True, True, False] + received_step_result.done = np.array(done) + sanitized_result = env._sanitize_info(received_step_result) + for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id): + assert expected_agent_id == agent_id + + # Helper methods @@ -200,6 +244,10 @@ def test_agent_id_index_mapper(mapper_cls): mapper.mark_agent_done(1001, 42.0) mapper.mark_agent_done(1004, 1337.0) + # Make sure we can handle an unknown agent id being marked done. + # This can happen when an agent ends an episode on the same step it starts. + mapper.mark_agent_done(9999, -1.0) + # Now add new agents, and get the rewards of the agent they replaced. old_reward1 = mapper.register_new_agent_id(2001) old_reward2 = mapper.register_new_agent_id(2002) diff --git a/ml-agents-envs/mlagents_envs/__init__.py b/ml-agents-envs/mlagents_envs/__init__.py index 9da2f8fcca..903e77ce1b 100644 --- a/ml-agents-envs/mlagents_envs/__init__.py +++ b/ml-agents-envs/mlagents_envs/__init__.py @@ -1 +1 @@ -__version__ = "0.15.0" +__version__ = "0.15.1" diff --git a/ml-agents-envs/mlagents_envs/environment.py b/ml-agents-envs/mlagents_envs/environment.py index 00f421cd1c..8cdb48d45b 100644 --- a/ml-agents-envs/mlagents_envs/environment.py +++ b/ml-agents-envs/mlagents_envs/environment.py @@ -1,13 +1,14 @@ import atexit import glob import uuid -import logging import numpy as np import os import subprocess from typing import Dict, List, Optional, Any import mlagents_envs + +from mlagents_envs.logging_util import get_logger from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage from mlagents_envs.base_env import ( @@ -47,7 +48,7 @@ import struct -logger = logging.getLogger("mlagents_envs") +logger = get_logger(__name__) class UnityEnvironment(BaseEnv): @@ -142,12 +143,12 @@ def __init__( aca_output = self.send_academy_parameters(rl_init_parameters_in) aca_params = aca_output.rl_initialization_output except UnityTimeOutException: - self._close() + self._close(0) raise unity_communicator_version = aca_params.communication_version if unity_communicator_version != UnityEnvironment.API_VERSION: - self._close() + self._close(0) raise UnityEnvironmentException( f"The communication API version is not compatible between Unity and python. " f"Python API: {UnityEnvironment.API_VERSION}, Unity API: {unity_communicator_version}.\n " @@ -228,7 +229,7 @@ def validate_environment_path(env_path: str) -> Optional[str]: def executable_launcher(self, file_name, docker_training, no_graphics, args): launch_string = self.validate_environment_path(file_name) if launch_string is None: - self._close() + self._close(0) raise UnityEnvironmentException( f"Couldn't launch the {file_name} environment. Provided filename does not match any environments." ) @@ -433,13 +434,21 @@ def close(self): else: raise UnityEnvironmentException("No Unity environment is loaded.") - def _close(self): + def _close(self, timeout: Optional[int] = None) -> None: + """ + Close the communicator and environment subprocess (if necessary). + + :int timeout: [Optional] Number of seconds to wait for the environment to shut down before + force-killing it. Defaults to `self.timeout_wait`. + """ + if timeout is None: + timeout = self.timeout_wait self._loaded = False self.communicator.close() if self.proc1 is not None: # Wait a bit for the process to shutdown, but kill it if it takes too long try: - self.proc1.wait(timeout=self.timeout_wait) + self.proc1.wait(timeout=timeout) signal_name = self.returncode_to_signal_name(self.proc1.returncode) signal_name = f" ({signal_name})" if signal_name else "" return_info = f"Environment shut down with return code {self.proc1.returncode}{signal_name}." diff --git a/ml-agents-envs/mlagents_envs/logging_util.py b/ml-agents-envs/mlagents_envs/logging_util.py new file mode 100644 index 0000000000..b768fc28ca --- /dev/null +++ b/ml-agents-envs/mlagents_envs/logging_util.py @@ -0,0 +1,46 @@ +import logging # noqa I251 + +CRITICAL = logging.CRITICAL +FATAL = logging.FATAL +ERROR = logging.ERROR +WARNING = logging.WARNING +INFO = logging.INFO +DEBUG = logging.DEBUG +NOTSET = logging.NOTSET + +_loggers = set() +_log_level = NOTSET +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" +LOG_FORMAT = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + + +def get_logger(name: str) -> logging.Logger: + """ + Create a logger with the specified name. The logger will use the log level + specified by set_log_level() + """ + logger = logging.getLogger(name=name) + + # If we've already set the log level, make sure new loggers use it + if _log_level != NOTSET: + logger.setLevel(_log_level) + + # Keep track of this logger so that we can change the log level later + _loggers.add(logger) + return logger + + +def set_log_level(log_level: int) -> None: + """ + Set the ML-Agents logging level. This will also configure the logging format (if it hasn't already been set). + """ + global _log_level + _log_level = log_level + + # Configure the log format. + # In theory, this would be sufficient, but if another library calls logging.basicConfig + # first, it doesn't have any effect. + logging.basicConfig(level=_log_level, format=LOG_FORMAT, datefmt=DATE_FORMAT) + + for logger in _loggers: + logger.setLevel(log_level) diff --git a/ml-agents-envs/mlagents_envs/side_channel/outgoing_message.py b/ml-agents-envs/mlagents_envs/side_channel/outgoing_message.py index 835349cbd2..83bbe3446c 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/outgoing_message.py +++ b/ml-agents-envs/mlagents_envs/side_channel/outgoing_message.py @@ -1,9 +1,9 @@ from typing import List import struct -import logging +from mlagents_envs.logging_util import get_logger -logger = logging.getLogger(__name__) +logger = get_logger(__name__) class OutgoingMessage: diff --git a/ml-agents-envs/mlagents_envs/side_channel/side_channel.py b/ml-agents-envs/mlagents_envs/side_channel/side_channel.py index 52f2a106f1..469cb51eab 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/side_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/side_channel.py @@ -1,11 +1,11 @@ from abc import ABC, abstractmethod from typing import List import uuid -import logging from mlagents_envs.side_channel import IncomingMessage, OutgoingMessage +from mlagents_envs.logging_util import get_logger -logger = logging.getLogger(__name__) +logger = get_logger(__name__) class SideChannel(ABC): diff --git a/ml-agents/mlagents/logging_util.py b/ml-agents/mlagents/logging_util.py deleted file mode 100644 index a9478023d7..0000000000 --- a/ml-agents/mlagents/logging_util.py +++ /dev/null @@ -1,10 +0,0 @@ -import logging - - -def create_logger(name, log_level): - date_format = "%Y-%m-%d %H:%M:%S" - log_format = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" - - logging.basicConfig(level=log_level, format=log_format, datefmt=date_format) - logger = logging.getLogger(name=name) - return logger diff --git a/ml-agents/mlagents/model_serialization.py b/ml-agents/mlagents/model_serialization.py index fdd322ab0a..ee1bd0c85e 100644 --- a/ml-agents/mlagents/model_serialization.py +++ b/ml-agents/mlagents/model_serialization.py @@ -1,6 +1,5 @@ from distutils.util import strtobool import os -import logging from typing import Any, List, Set, NamedTuple from distutils.version import LooseVersion @@ -19,14 +18,16 @@ from tensorflow.python.platform import gfile from tensorflow.python.framework import graph_util + +from mlagents_envs.logging_util import get_logger from mlagents.trainers import tensorflow_to_barracuda as tf2bc if LooseVersion(tf.__version__) < LooseVersion("1.12.0"): # ONNX is only tested on 1.12.0 and later ONNX_EXPORT_ENABLED = False +logger = get_logger(__name__) -logger = logging.getLogger("mlagents.trainers") POSSIBLE_INPUT_NODES = frozenset( [ diff --git a/ml-agents/mlagents/trainers/__init__.py b/ml-agents/mlagents/trainers/__init__.py index 9da2f8fcca..903e77ce1b 100644 --- a/ml-agents/mlagents/trainers/__init__.py +++ b/ml-agents/mlagents/trainers/__init__.py @@ -1 +1 @@ -__version__ = "0.15.0" +__version__ = "0.15.1" diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index 97ddf74b85..50a3a4796b 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -1,5 +1,5 @@ import sys -from typing import List, Dict, Deque, TypeVar, Generic, Tuple, Set +from typing import List, Dict, Deque, TypeVar, Generic, Tuple, Any from collections import defaultdict, Counter, deque from mlagents_envs.base_env import BatchedStepResult, StepResult @@ -66,7 +66,6 @@ def add_experiences( for _entropy in take_action_outputs["entropy"]: self.stats_reporter.add_stat("Policy/Entropy", _entropy) - terminated_agents: Set[str] = set() # Make unique agent_ids that are global across workers action_global_agent_ids = [ get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids @@ -85,6 +84,7 @@ def add_experiences( stored_take_action_outputs = self.last_take_action_outputs.get( global_id, None ) + if stored_agent_step is not None and stored_take_action_outputs is not None: # We know the step is from the same worker, so use the local agent id. obs = stored_agent_step.obs @@ -143,6 +143,8 @@ def add_experiences( traj_queue.put(trajectory) self.experience_buffers[global_id] = [] if curr_agent_step.done: + # Record episode length for agents which have had at least + # 1 step. Done after reset ignored. self.stats_reporter.add_stat( "Environment/Cumulative Reward", self.episode_rewards.get(global_id, 0), @@ -151,7 +153,6 @@ def add_experiences( "Environment/Episode Length", self.episode_steps.get(global_id, 0), ) - terminated_agents.add(global_id) elif not curr_agent_step.done: self.episode_steps[global_id] += 1 @@ -160,9 +161,9 @@ def add_experiences( curr_agent_step, batched_step_result.agent_id_to_index[_id], ) - - for terminated_id in terminated_agents: - self._clean_agent_data(terminated_id) + # Delete all done agents, regardless of if they had a 0-length episode. + if curr_agent_step.done: + self._clean_agent_data(global_id) for _gid in action_global_agent_ids: # If the ID doesn't have a last step result, the agent just reset, @@ -177,14 +178,22 @@ def _clean_agent_data(self, global_id: str) -> None: """ Removes the data for an Agent. """ - del self.experience_buffers[global_id] - del self.last_take_action_outputs[global_id] - del self.last_step_result[global_id] - del self.episode_steps[global_id] - del self.episode_rewards[global_id] + self._safe_delete(self.experience_buffers, global_id) + self._safe_delete(self.last_take_action_outputs, global_id) + self._safe_delete(self.last_step_result, global_id) + self._safe_delete(self.episode_steps, global_id) + self._safe_delete(self.episode_rewards, global_id) self.policy.remove_previous_action([global_id]) self.policy.remove_memories([global_id]) + def _safe_delete(self, my_dictionary: Dict[Any, Any], key: Any) -> None: + """ + Safe removes data from a dictionary. If not found, + don't delete. + """ + if key in my_dictionary: + del my_dictionary[key] + def publish_trajectory_queue( self, trajectory_queue: "AgentManagerQueue[Trajectory]" ) -> None: diff --git a/ml-agents/mlagents/trainers/components/reward_signals/__init__.py b/ml-agents/mlagents/trainers/components/reward_signals/__init__.py index 72c719e64f..9c721b4d64 100644 --- a/ml-agents/mlagents/trainers/components/reward_signals/__init__.py +++ b/ml-agents/mlagents/trainers/components/reward_signals/__init__.py @@ -1,4 +1,3 @@ -import logging from typing import Any, Dict, List from collections import namedtuple import numpy as np @@ -6,11 +5,13 @@ from mlagents.tf_utils import tf +from mlagents_envs.logging_util import get_logger from mlagents.trainers.exception import UnityTrainerException from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.buffer import AgentBuffer -logger = logging.getLogger("mlagents.trainers") + +logger = get_logger(__name__) RewardSignalResult = namedtuple( "RewardSignalResult", ["scaled_reward", "unscaled_reward"] diff --git a/ml-agents/mlagents/trainers/curriculum.py b/ml-agents/mlagents/trainers/curriculum.py index 0eba472265..12eaa7a5e7 100644 --- a/ml-agents/mlagents/trainers/curriculum.py +++ b/ml-agents/mlagents/trainers/curriculum.py @@ -4,9 +4,9 @@ from .exception import CurriculumConfigError, CurriculumLoadingError -import logging +from mlagents_envs.logging_util import get_logger -logger = logging.getLogger("mlagents.trainers") +logger = get_logger(__name__) class Curriculum: diff --git a/ml-agents/mlagents/trainers/distributions.py b/ml-agents/mlagents/trainers/distributions.py index a608f45932..294bad11cb 100644 --- a/ml-agents/mlagents/trainers/distributions.py +++ b/ml-agents/mlagents/trainers/distributions.py @@ -64,6 +64,7 @@ def __init__( act_size: List[int], reparameterize: bool = False, tanh_squash: bool = False, + condition_sigma: bool = True, log_sigma_min: float = -20, log_sigma_max: float = 2, ): @@ -79,7 +80,11 @@ def __init__( :param log_sigma_max: Maximum log standard deviation to clip by. """ encoded = self._create_mu_log_sigma( - logits, act_size, log_sigma_min, log_sigma_max + logits, + act_size, + log_sigma_min, + log_sigma_max, + condition_sigma=condition_sigma, ) self._sampled_policy = self._create_sampled_policy(encoded) if not reparameterize: @@ -101,6 +106,7 @@ def _create_mu_log_sigma( act_size: List[int], log_sigma_min: float, log_sigma_max: float, + condition_sigma: bool, ) -> "GaussianDistribution.MuSigmaTensors": mu = tf.layers.dense( @@ -112,14 +118,22 @@ def _create_mu_log_sigma( reuse=tf.AUTO_REUSE, ) - # Policy-dependent log_sigma_sq - log_sigma = tf.layers.dense( - logits, - act_size[0], - activation=None, - name="log_std", - kernel_initializer=ModelUtils.scaled_init(0.01), - ) + if condition_sigma: + # Policy-dependent log_sigma_sq + log_sigma = tf.layers.dense( + logits, + act_size[0], + activation=None, + name="log_std", + kernel_initializer=ModelUtils.scaled_init(0.01), + ) + else: + log_sigma = tf.get_variable( + "log_std", + [act_size[0]], + dtype=tf.float32, + initializer=tf.zeros_initializer(), + ) log_sigma = tf.clip_by_value(log_sigma, log_sigma_min, log_sigma_max) sigma = tf.exp(log_sigma) return self.MuSigmaTensors(mu, log_sigma, sigma) @@ -146,7 +160,7 @@ def _create_entropy( self, encoded: "GaussianDistribution.MuSigmaTensors" ) -> tf.Tensor: single_dim_entropy = 0.5 * tf.reduce_mean( - tf.log(2 * np.pi * np.e) + tf.square(encoded.log_sigma) + tf.log(2 * np.pi * np.e) + 2 * encoded.log_sigma ) # Make entropy the right shape return tf.ones_like(tf.reshape(encoded.mu[:, 0], [-1])) * single_dim_entropy @@ -155,8 +169,8 @@ def _do_squash_correction_for_tanh(self, probs, squashed_policy): """ Adjust probabilities for squashed sample before output """ - probs -= tf.log(1 - squashed_policy ** 2 + EPSILON) - return probs + adjusted_probs = probs - tf.log(1 - squashed_policy ** 2 + EPSILON) + return adjusted_probs @property def total_log_probs(self) -> tf.Tensor: diff --git a/ml-agents/mlagents/trainers/env_manager.py b/ml-agents/mlagents/trainers/env_manager.py index 0a20a48203..23288e12b4 100644 --- a/ml-agents/mlagents/trainers/env_manager.py +++ b/ml-agents/mlagents/trainers/env_manager.py @@ -1,16 +1,17 @@ from abc import ABC, abstractmethod -import logging from typing import List, Dict, NamedTuple, Iterable from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec, AgentGroup from mlagents.trainers.brain import BrainParameters from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue from mlagents.trainers.action_info import ActionInfo +from mlagents_envs.logging_util import get_logger AllStepResult = Dict[AgentGroup, BatchedStepResult] AllGroupSpec = Dict[AgentGroup, AgentGroupSpec] -logger = logging.getLogger("mlagents.trainers") + +logger = get_logger(__name__) class EnvironmentStep(NamedTuple): diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py index 7abf65d1cb..c952e42fbe 100644 --- a/ml-agents/mlagents/trainers/ghost/trainer.py +++ b/ml-agents/mlagents/trainers/ghost/trainer.py @@ -4,8 +4,8 @@ from typing import Deque, Dict, List, Any, cast import numpy as np -import logging +from mlagents_envs.logging_util import get_logger from mlagents.trainers.brain import BrainParameters from mlagents.trainers.policy import Policy from mlagents.trainers.policy.tf_policy import TFPolicy @@ -14,7 +14,8 @@ from mlagents.trainers.trajectory import Trajectory from mlagents.trainers.agent_processor import AgentManagerQueue -logger = logging.getLogger("mlagents.trainers") + +logger = get_logger(__name__) class GhostTrainer(Trainer): diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index fdc159997a..b793f433e9 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -1,5 +1,4 @@ # # Unity ML-Agents Toolkit -import logging import argparse import os @@ -31,7 +30,9 @@ from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig from mlagents_envs.exception import UnityEnvironmentException from mlagents_envs.timers import hierarchical_timer, get_timer_tree -from mlagents.logging_util import create_logger +from mlagents_envs import logging_util + +logger = logging_util.get_logger(__name__) def _create_parser(): @@ -338,7 +339,7 @@ def write_timing_tree(summaries_dir: str, run_id: str) -> None: with open(timing_path, "w") as f: json.dump(get_timer_tree(), f, indent=4) except FileNotFoundError: - logging.warning( + logger.warning( f"Unable to save to {timing_path}. Make sure the directory exists" ) @@ -395,7 +396,7 @@ def prepare_for_docker_run(docker_target_name, env_path): shutil.copyfile(src_f, dst_f) os.chmod(dst_f, 0o775) # Make executable except Exception as e: - logging.getLogger("mlagents.trainers").info(e) + logger.info(e) env_path = "/ml-agents/{env_path}".format(env_path=env_path) return env_path @@ -471,16 +472,16 @@ def run_cli(options: RunOptions) -> None: print(get_version_string()) if options.debug: - log_level = logging.DEBUG + log_level = logging_util.DEBUG else: - log_level = logging.INFO + log_level = logging_util.INFO # disable noisy warnings from tensorflow tf_utils.set_warnings_enabled(False) - trainer_logger = create_logger("mlagents.trainers", log_level) + logging_util.set_log_level(log_level) - trainer_logger.debug("Configuration for this run:") - trainer_logger.debug(json.dumps(options._asdict(), indent=4)) + logger.debug("Configuration for this run:") + logger.debug(json.dumps(options._asdict(), indent=4)) run_seed = options.seed if options.cpu: diff --git a/ml-agents/mlagents/trainers/meta_curriculum.py b/ml-agents/mlagents/trainers/meta_curriculum.py index 60ea9cc5c5..699890359f 100644 --- a/ml-agents/mlagents/trainers/meta_curriculum.py +++ b/ml-agents/mlagents/trainers/meta_curriculum.py @@ -3,9 +3,9 @@ from typing import Dict, Set from mlagents.trainers.curriculum import Curriculum -import logging +from mlagents_envs.logging_util import get_logger -logger = logging.getLogger("mlagents.trainers") +logger = get_logger(__name__) class MetaCurriculum: diff --git a/ml-agents/mlagents/trainers/policy/nn_policy.py b/ml-agents/mlagents/trainers/policy/nn_policy.py index 528afa6a8b..a064b9d192 100644 --- a/ml-agents/mlagents/trainers/policy/nn_policy.py +++ b/ml-agents/mlagents/trainers/policy/nn_policy.py @@ -202,6 +202,7 @@ def _create_cc_actor( self.act_size, reparameterize=reparameterize, tanh_squash=tanh_squash, + condition_sigma=condition_sigma_on_obs, ) if tanh_squash: diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index a92fb34a38..9d0c02a57a 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -1,10 +1,10 @@ -import logging from typing import Any, Dict, List, Optional import abc import numpy as np from mlagents.tf_utils import tf from mlagents import tf_utils from mlagents_envs.exception import UnityException +from mlagents_envs.logging_util import get_logger from mlagents.trainers.policy import Policy from mlagents.trainers.action_info import ActionInfo from mlagents.trainers.trajectory import SplitObservations @@ -13,7 +13,7 @@ from mlagents.trainers.models import ModelUtils -logger = logging.getLogger("mlagents.trainers") +logger = get_logger(__name__) class UnityPolicyException(UnityException): @@ -174,17 +174,6 @@ def get_action( if batched_step_result.n_agents() == 0: return ActionInfo.empty() - agents_done = [ - agent - for agent, done in zip( - batched_step_result.agent_id, batched_step_result.done - ) - if done - ] - - self.remove_memories(agents_done) - self.remove_previous_action(agents_done) - global_agent_ids = [ get_global_agent_id(worker_id, int(agent_id)) for agent_id in batched_step_result.agent_id @@ -379,9 +368,11 @@ def _initialize_tensorflow_references(self): def create_input_placeholders(self): with self.graph.as_default(): - self.global_step, self.increment_step_op, self.steps_to_increment = ( - ModelUtils.create_global_steps() - ) + ( + self.global_step, + self.increment_step_op, + self.steps_to_increment, + ) = ModelUtils.create_global_steps() self.visual_in = ModelUtils.create_visual_input_placeholders( self.brain.camera_resolutions ) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 308ee975e3..ecbb30a383 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -2,11 +2,11 @@ # ## ML-Agent Learning (PPO) # Contains an implementation of PPO as described in: https://arxiv.org/abs/1707.06347 -import logging from collections import defaultdict import numpy as np +from mlagents_envs.logging_util import get_logger from mlagents.trainers.policy.nn_policy import NNPolicy from mlagents.trainers.trainer.rl_trainer import RLTrainer from mlagents.trainers.brain import BrainParameters @@ -16,7 +16,7 @@ from mlagents.trainers.exception import UnityTrainerException -logger = logging.getLogger("mlagents.trainers") +logger = get_logger(__name__) class PPOTrainer(RLTrainer): diff --git a/ml-agents/mlagents/trainers/sac/optimizer.py b/ml-agents/mlagents/trainers/sac/optimizer.py index 963595fb28..72494f4fe9 100644 --- a/ml-agents/mlagents/trainers/sac/optimizer.py +++ b/ml-agents/mlagents/trainers/sac/optimizer.py @@ -1,9 +1,9 @@ -import logging import numpy as np from typing import Dict, List, Optional, Any, Mapping from mlagents.tf_utils import tf +from mlagents_envs.logging_util import get_logger from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer @@ -13,7 +13,7 @@ EPSILON = 1e-6 # Small value to avoid divide by zero -logger = logging.getLogger("mlagents.trainers") +logger = get_logger(__name__) POLICY_SCOPE = "" TARGET_SCOPE = "target_network" @@ -155,7 +155,6 @@ def __init__(self, policy: TFPolicy, trainer_params: Dict[str, Any]): "q1_loss": self.q1_loss, "q2_loss": self.q2_loss, "entropy_coef": self.ent_coef, - "entropy": self.policy.entropy, "update_batch": self.update_batch_policy, "update_value": self.update_batch_value, "update_entropy": self.update_batch_entropy, diff --git a/ml-agents/mlagents/trainers/sac/trainer.py b/ml-agents/mlagents/trainers/sac/trainer.py index 9ab025de28..f0a58779e5 100644 --- a/ml-agents/mlagents/trainers/sac/trainer.py +++ b/ml-agents/mlagents/trainers/sac/trainer.py @@ -2,7 +2,6 @@ # Contains an implementation of SAC as described in https://arxiv.org/abs/1801.01290 # and implemented in https://github.com/hill-a/stable-baselines -import logging from collections import defaultdict from typing import Dict import os @@ -10,6 +9,7 @@ import numpy as np +from mlagents_envs.logging_util import get_logger from mlagents_envs.timers import timed from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.policy.nn_policy import NNPolicy @@ -20,7 +20,8 @@ from mlagents.trainers.exception import UnityTrainerException -logger = logging.getLogger("mlagents.trainers") +logger = get_logger(__name__) + BUFFER_TRUNCATE_PERCENT = 0.8 @@ -73,7 +74,6 @@ def __init__( "memory_size", "model_path", "reward_signals", - "vis_encode_type", ] self._check_param_keys() diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 70a417e3a3..53499c0e5c 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -4,9 +4,12 @@ import abc import csv import os - -from mlagents.tf_utils import tf +from mlagents_envs.logging_util import get_logger from mlagents_envs.timers import set_gauge +from mlagents.tf_utils import tf + + +logger = get_logger(__name__) class StatsSummary(NamedTuple): diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index 9164ec475f..f24841b0b1 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -1,4 +1,3 @@ -import logging from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set import cloudpickle @@ -8,6 +7,7 @@ from multiprocessing.connection import Connection from queue import Empty as EmptyQueueException from mlagents_envs.base_env import BaseEnv, AgentGroup +from mlagents_envs.logging_util import get_logger from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult from mlagents_envs.timers import ( TimerNode, @@ -26,7 +26,8 @@ from mlagents_envs.side_channel.side_channel import SideChannel from mlagents.trainers.brain_conversion_utils import group_spec_to_brain_parameters -logger = logging.getLogger("mlagents.trainers") + +logger = get_logger(__name__) class EnvironmentCommand(NamedTuple): diff --git a/ml-agents/mlagents/trainers/tests/simple_test_envs.py b/ml-agents/mlagents/trainers/tests/simple_test_envs.py index 66a0abc3e0..d0817b0e60 100644 --- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py +++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py @@ -13,7 +13,7 @@ VIS_OBS_SIZE = (20, 20, 3) STEP_SIZE = 0.1 -TIME_PENALTY = 0.001 +TIME_PENALTY = 0.01 MIN_STEPS = int(1.0 / STEP_SIZE) + 1 SUCCESS_REWARD = 1.0 + MIN_STEPS * TIME_PENALTY diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py index 0a3083bf2b..4ca4146427 100644 --- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py +++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py @@ -152,6 +152,15 @@ def test_agent_deletion(): assert len(processor.last_take_action_outputs.keys()) == 0 assert len(processor.episode_steps.keys()) == 0 assert len(processor.episode_rewards.keys()) == 0 + assert len(processor.last_step_result.keys()) == 0 + + # check that steps with immediate dones don't add to dicts + processor.add_experiences(mock_done_step, 0, ActionInfo.empty()) + assert len(processor.experience_buffers.keys()) == 0 + assert len(processor.last_take_action_outputs.keys()) == 0 + assert len(processor.episode_steps.keys()) == 0 + assert len(processor.episode_rewards.keys()) == 0 + assert len(processor.last_step_result.keys()) == 0 def test_end_episode(): diff --git a/ml-agents/mlagents/trainers/tests/test_distributions.py b/ml-agents/mlagents/trainers/tests/test_distributions.py index 751894bd3f..c27047fd69 100644 --- a/ml-agents/mlagents/trainers/tests/test_distributions.py +++ b/ml-agents/mlagents/trainers/tests/test_distributions.py @@ -53,7 +53,7 @@ def dummy_config(): def test_gaussian_distribution(): with tf.Graph().as_default(): - logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32) + logits = tf.Variable(initial_value=[[1, 1]], trainable=True, dtype=tf.float32) distribution = GaussianDistribution( logits, act_size=VECTOR_ACTION_SPACE, @@ -71,6 +71,14 @@ def test_gaussian_distribution(): assert out.shape[1] == VECTOR_ACTION_SPACE[0] output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1 + # Test entropy is correct + log_std_tensor = tf.get_default_graph().get_tensor_by_name( + "log_std/BiasAdd:0" + ) + feed_dict = {log_std_tensor: [[1.0, 1.0]]} + entropy = sess.run([distribution.entropy], feed_dict=feed_dict) + # Entropy with log_std of 1.0 should be 2.42 + assert pytest.approx(entropy[0], 0.01) == 2.42 def test_tanh_distribution(): diff --git a/ml-agents/mlagents/trainers/tests/test_simple_rl.py b/ml-agents/mlagents/trainers/tests/test_simple_rl.py index e8ba8e9957..a4280fcf17 100644 --- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py +++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py @@ -29,7 +29,7 @@ lambd: 0.95 learning_rate: 5.0e-3 learning_rate_schedule: constant - max_steps: 2000 + max_steps: 3000 memory_size: 16 normalize: false num_epoch: 3 @@ -89,6 +89,9 @@ def generate_config( # Custom reward processors shuld be built within the test function and passed to _check_environment_trains # Default is average over the last 5 final rewards def default_reward_processor(rewards, last_n_rewards=5): + rewards_to_use = rewards[-last_n_rewards:] + # For debugging tests + print("Last {} rewards:".format(last_n_rewards), rewards_to_use) return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean() @@ -120,7 +123,7 @@ def _check_environment_trains( trainer_config, reward_processor=default_reward_processor, meta_curriculum=None, - success_threshold=0.99, + success_threshold=0.9, env_manager=None, ): # Create controller and begin training. @@ -164,7 +167,6 @@ def _check_environment_trains( if ( success_threshold is not None ): # For tests where we are just checking setup and not reward - processed_rewards = [ reward_processor(rewards) for rewards in env.final_rewards.values() ] @@ -220,13 +222,14 @@ def test_visual_advanced_ppo(vis_encode_type, num_visual): def test_recurrent_ppo(use_discrete): env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete) override_vals = { - "max_steps": 3000, + "max_steps": 5000, "batch_size": 64, "buffer_size": 128, + "learning_rate": 1e-3, "use_recurrent": True, } config = generate_config(PPO_CONFIG, override_vals) - _check_environment_trains(env, config) + _check_environment_trains(env, config, success_threshold=0.9) @pytest.mark.parametrize("use_discrete", [True, False]) @@ -274,14 +277,6 @@ def test_visual_advanced_sac(vis_encode_type, num_visual): _check_environment_trains(env, config, success_threshold=0.5) -@pytest.mark.parametrize("use_discrete", [True, False]) -def test_recurrent_sac(use_discrete): - env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete) - override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000} - config = generate_config(SAC_CONFIG, override_vals) - _check_environment_trains(env, config) - - @pytest.mark.parametrize("use_discrete", [True, False]) def test_simple_ghost(use_discrete): env = Simple1DEnvironment( @@ -319,7 +314,7 @@ def test_simple_ghost_fails(use_discrete): processed_rewards = [ default_reward_processor(rewards) for rewards in env.final_rewards.values() ] - success_threshold = 0.99 + success_threshold = 0.9 assert any(reward > success_threshold for reward in processed_rewards) and any( reward < success_threshold for reward in processed_rewards ) diff --git a/ml-agents/mlagents/trainers/trainer/trainer.py b/ml-agents/mlagents/trainers/trainer/trainer.py index a6c18cae94..9fd7ac0c14 100644 --- a/ml-agents/mlagents/trainers/trainer/trainer.py +++ b/ml-agents/mlagents/trainers/trainer/trainer.py @@ -1,5 +1,4 @@ # # Unity ML-Agents Toolkit -import logging from typing import Dict, List, Deque, Any import time import abc @@ -10,6 +9,7 @@ from collections import deque from mlagents_envs.timers import set_gauge +from mlagents_envs.logging_util import get_logger from mlagents.model_serialization import export_policy_model, SerializationSettings from mlagents.trainers.policy.tf_policy import TFPolicy from mlagents.trainers.stats import StatsReporter @@ -20,7 +20,8 @@ from mlagents.trainers.exception import UnityTrainerException from mlagents_envs.timers import hierarchical_timer -logger = logging.getLogger("mlagents.trainers") + +logger = get_logger(__name__) class Trainer(abc.ABC): diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 9a5b50b9b3..0d83161398 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -4,13 +4,13 @@ import os import sys -import logging from typing import Dict, Optional, Set from collections import defaultdict import numpy as np from mlagents.tf_utils import tf +from mlagents_envs.logging_util import get_logger from mlagents.trainers.env_manager import EnvManager from mlagents_envs.exception import ( UnityEnvironmentException, @@ -55,7 +55,7 @@ def __init__( self.trainer_factory = trainer_factory self.model_path = model_path self.summaries_dir = summaries_dir - self.logger = logging.getLogger("mlagents.trainers") + self.logger = get_logger(__name__) self.run_id = run_id self.save_freq = save_freq self.train_model = train diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index a112da05c5..87849cbdcb 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -1,8 +1,8 @@ import os import yaml from typing import Any, Dict, TextIO -import logging +from mlagents_envs.logging_util import get_logger from mlagents.trainers.meta_curriculum import MetaCurriculum from mlagents.trainers.exception import TrainerConfigError from mlagents.trainers.trainer import Trainer @@ -11,7 +11,8 @@ from mlagents.trainers.sac.trainer import SACTrainer from mlagents.trainers.ghost.trainer import GhostTrainer -logger = logging.getLogger("mlagents.trainers") + +logger = get_logger(__name__) class TrainerFactory: diff --git a/setup.cfg b/setup.cfg index e728ceb30e..ecd551bdd7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,3 +20,4 @@ ignore = I200, banned-modules = tensorflow = use mlagents.tf_utils instead (it handles tf2 compat). + logging = use mlagents_envs.logging_util instead