From 953b08cc8d28e11cc0afc6f740425253439d5561 Mon Sep 17 00:00:00 2001 From: brccabral Date: Fri, 25 Dec 2020 10:58:32 -0800 Subject: [PATCH 01/21] remove Builds folder from git --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b3aab2b943..73878cd4e1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ /summaries # Output Artifacts /results +# Output Builds +/Builds # Training environments /envs From 38afbdecd30dc0d46031a4be198a3ec9b359891c Mon Sep 17 00:00:00 2001 From: brccabral Date: Sat, 2 Jan 2021 21:18:33 -0800 Subject: [PATCH 02/21] log number of rewards in cmd summary --- ml-agents/mlagents/trainers/stats.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 1074d61520..5b7607ee30 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -128,6 +128,7 @@ def write_stats( log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}") log_info.append(f"Std of Reward: {stats_summary.std:0.3f}") + log_info.append(f"Num of Reward: {stats_summary.num:0.3f}") log_info.append(is_training) if self.self_play and "Self-play/ELO" in values: From b9a18a08287eed02d60640ab2e34d99c1ed53096 Mon Sep 17 00:00:00 2001 From: brccabral Date: Sat, 2 Jan 2021 22:12:06 -0800 Subject: [PATCH 03/21] added sum to StatsSummary --- ml-agents/mlagents/trainers/stats.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 5b7607ee30..b5289e34dc 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -40,11 +40,12 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str: class StatsSummary(NamedTuple): mean: float std: float + sum: float num: int @staticmethod def empty() -> "StatsSummary": - return StatsSummary(0.0, 0.0, 0) + return StatsSummary(0.0, 0.0, 0.0, 0) class StatsPropertyType(Enum): @@ -129,6 +130,7 @@ def write_stats( log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}") log_info.append(f"Std of Reward: {stats_summary.std:0.3f}") log_info.append(f"Num of Reward: {stats_summary.num:0.3f}") + log_info.append(f"Sum of Reward: {stats_summary.sum:0.3f}") log_info.append(is_training) if self.self_play and "Self-play/ELO" in values: @@ -280,14 +282,15 @@ def write_stats(self, step: int) -> None: def get_stats_summaries(self, key: str) -> StatsSummary: """ - Get the mean, std, and count of a particular statistic, since last write. + Get the mean, std, sum, and count of a particular statistic, since last write. :param key: The type of statistic, e.g. Environment/Reward. - :returns: A StatsSummary NamedTuple containing (mean, std, count). + :returns: A StatsSummary NamedTuple containing (mean, std, sum, count). """ if len(StatsReporter.stats_dict[self.category][key]) > 0: return StatsSummary( mean=np.mean(StatsReporter.stats_dict[self.category][key]), std=np.std(StatsReporter.stats_dict[self.category][key]), + sum=np.sum(StatsReporter.stats_dict[self.category][key]), num=len(StatsReporter.stats_dict[self.category][key]), ) return StatsSummary.empty() From 9c3f05963a6fc3a0dfcae743ab057bbb63b6a371 Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 00:08:01 -0800 Subject: [PATCH 04/21] added SUM in Unity StatAggregationMethod --- com.unity.ml-agents/Runtime/StatsRecorder.cs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/com.unity.ml-agents/Runtime/StatsRecorder.cs b/com.unity.ml-agents/Runtime/StatsRecorder.cs index 96e4e6a29d..86b6245bd9 100644 --- a/com.unity.ml-agents/Runtime/StatsRecorder.cs +++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs @@ -17,7 +17,12 @@ public enum StatAggregationMethod /// To avoid conflicts when training with multiple concurrent environments, only /// stats from worker index 0 will be tracked. /// - MostRecent = 1 + MostRecent = 1, + + /// + /// Values within the summary period are summed up before reporting. + /// + Sum = 2 } /// From b7328ba04a1b2984f56ae4bb4beea5f1dce4f1c0 Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 00:18:02 -0800 Subject: [PATCH 05/21] added support for SUM as StatsAggregationMethod in python mlagents --- .../side_channel/stats_side_channel.py | 5 +++ .../mlagents/trainers/agent_processor.py | 10 ++++- ml-agents/mlagents/trainers/stats.py | 43 +++++++++++++++---- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py index 4d8a481f69..5934770dd7 100644 --- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py +++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py @@ -14,6 +14,9 @@ class StatsAggregationMethod(Enum): # Only the most recent value is reported. MOST_RECENT = 1 + # Values within the summary period are summed up before reporting. + SUM = 2 + StatList = List[Tuple[float, StatsAggregationMethod]] EnvironmentStats = Mapping[str, StatList] @@ -35,6 +38,7 @@ def __init__(self) -> None: def on_message_received(self, msg: IncomingMessage) -> None: """ Receive the message from the environment, and save it for later retrieval. + :param msg: :return: """ @@ -47,6 +51,7 @@ def on_message_received(self, msg: IncomingMessage) -> None: def get_and_reset_stats(self) -> EnvironmentStats: """ Returns the current stats, and resets the internal storage of the stats. + :return: """ s = self.stats diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index e56d332f63..3650d54552 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -40,6 +40,7 @@ def __init__( ): """ Create an AgentProcessor. + :param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory when it is finished. :param policy: Policy instance associated with this AgentProcessor. @@ -70,6 +71,7 @@ def add_experiences( ) -> None: """ Adds experiences to each agent's experience history. + :param decision_steps: current DecisionSteps. :param terminal_steps: current TerminalSteps. :param previous_action: The outputs of the Policy's get_action method. @@ -209,6 +211,7 @@ def publish_trajectory_queue( """ Adds a trajectory queue to the list of queues to publish to when this AgentProcessor assembles a Trajectory + :param trajectory_queue: Trajectory queue to publish to. """ self.trajectory_queues.append(trajectory_queue) @@ -250,6 +253,7 @@ def __init__(self, behavior_id: str, maxlen: int = 0): def maxlen(self): """ The maximum length of the queue. + :return: Maximum length of the queue. """ return self._maxlen @@ -258,6 +262,7 @@ def maxlen(self): def behavior_id(self): """ The Behavior ID of this queue. + :return: Behavior ID associated with the queue. """ return self._behavior_id @@ -318,7 +323,8 @@ def record_environment_stats( """ Pass stats from the environment to the StatsReporter. Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used. - The worker_id is used to determin whether StatsReporter.set_stat should be used. + The worker_id is used to determine whether StatsReporter.set_stat should be used. + :param env_stats: :param worker_id: :return: @@ -332,3 +338,5 @@ def record_environment_stats( # only stats from the first environment are recorded. if worker_id == 0: self.stats_reporter.set_stat(stat_name, val) + elif agg_type == StatsAggregationMethod.SUM: + self.stats_reporter.add_stat(stat_name, val, agg_type) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index b5289e34dc..005a3dcb30 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -7,6 +7,8 @@ import time from threading import RLock +from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod + from mlagents_envs.logging_util import get_logger from mlagents_envs.timers import set_gauge from torch.utils.tensorboard import SummaryWriter @@ -20,8 +22,9 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str: """ Takes a parameter dictionary and converts it to a human-readable string. Recurses if there are multiple levels of dict. Used to print out hyperparameters. - param: param_dict: A Dictionary of key, value parameters. - return: A string version of this dictionary. + + :param param_dict: A Dictionary of key, value parameters. + :return: A string version of this dictionary. """ if not isinstance(param_dict, dict): return str(param_dict) @@ -42,10 +45,11 @@ class StatsSummary(NamedTuple): std: float sum: float num: int + aggregation: StatsAggregationMethod @staticmethod def empty() -> "StatsSummary": - return StatsSummary(0.0, 0.0, 0.0, 0) + return StatsSummary(0.0, 0.0, 0.0, 0, StatsAggregationMethod.AVERAGE) class StatsPropertyType(Enum): @@ -72,6 +76,7 @@ def add_property( Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters, a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible with all types of properties. For instance, a TB writer doesn't need a max step. + :param category: The category that the property belongs to. :param type: The type of property. :param value: The property itself. @@ -159,10 +164,11 @@ class TensorboardWriter(StatsWriter): def __init__(self, base_dir: str, clear_past_data: bool = False): """ A StatsWriter that writes to a Tensorboard summary. + :param base_dir: The directory within which to place all the summaries. Tensorboard files will be written to a {base_dir}/{category} directory. :param clear_past_data: Whether or not to clean up existing Tensorboard files associated with the base_dir and - category. + category. """ self.summary_writers: Dict[str, SummaryWriter] = {} self.base_dir: str = base_dir @@ -173,7 +179,10 @@ def write_stats( ) -> None: self._maybe_create_summary_writer(category) for key, value in values.items(): - self.summary_writers[category].add_scalar(f"{key}", value.mean, step) + if value.aggregation == StatsAggregationMethod.SUM: + self.summary_writers[category].add_scalar(f"{key}", value.sum, step) + else: + self.summary_writers[category].add_scalar(f"{key}", value.mean, step) self.summary_writers[category].flush() def _maybe_create_summary_writer(self, category: str) -> None: @@ -217,6 +226,9 @@ class StatsReporter: writers: List[StatsWriter] = [] stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list)) lock = RLock() + stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict( + lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE) + ) def __init__(self, category: str): """ @@ -237,26 +249,36 @@ def add_property(self, property_type: StatsPropertyType, value: Any) -> None: Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters, a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible with all types of properties. For instance, a TB writer doesn't need a max step. - :param key: The type of property. + + :param property_type: The type of property. :param value: The property itself. """ with StatsReporter.lock: for writer in StatsReporter.writers: writer.add_property(self.category, property_type, value) - def add_stat(self, key: str, value: float) -> None: + def add_stat( + self, + key: str, + value: float, + aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE, + ) -> None: """ Add a float value stat to the StatsReporter. + :param key: The type of statistic, e.g. Environment/Reward. :param value: the value of the statistic. + :param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE. """ with StatsReporter.lock: StatsReporter.stats_dict[self.category][key].append(value) + StatsReporter.stats_aggregation[self.category][key] = aggregation def set_stat(self, key: str, value: float) -> None: """ Sets a stat value to a float. This is for values that we don't want to average, and just want the latest. + :param key: The type of statistic, e.g. Environment/Reward. :param value: the value of the statistic. """ @@ -268,6 +290,7 @@ def write_stats(self, step: int) -> None: Write out all stored statistics that fall under the category specified. The currently stored values will be averaged, written out as a single value, and the buffer cleared. + :param step: Training step which to write these stats as. """ with StatsReporter.lock: @@ -282,9 +305,10 @@ def write_stats(self, step: int) -> None: def get_stats_summaries(self, key: str) -> StatsSummary: """ - Get the mean, std, sum, and count of a particular statistic, since last write. + Get the mean, std, sum, count and aggregation method of a particular statistic, since last write. + :param key: The type of statistic, e.g. Environment/Reward. - :returns: A StatsSummary NamedTuple containing (mean, std, sum, count). + :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation). """ if len(StatsReporter.stats_dict[self.category][key]) > 0: return StatsSummary( @@ -292,5 +316,6 @@ def get_stats_summaries(self, key: str) -> StatsSummary: std=np.std(StatsReporter.stats_dict[self.category][key]), sum=np.sum(StatsReporter.stats_dict[self.category][key]), num=len(StatsReporter.stats_dict[self.category][key]), + aggregation=StatsReporter.stats_aggregation[self.category][key], ) return StatsSummary.empty() From f178316594147b2b5391481fe714fb898ce53cea Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 00:19:41 -0800 Subject: [PATCH 06/21] example to use SUM as aggregation --- .../Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs index b93deebd47..e453ad902e 100644 --- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs +++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs @@ -18,6 +18,7 @@ public class HallwayAgent : Agent Renderer m_GroundRenderer; HallwaySettings m_HallwaySettings; int m_Selection; + StatsRecorder statsRecorder; public override void Initialize() { @@ -25,6 +26,7 @@ public override void Initialize() m_AgentRb = GetComponent(); m_GroundRenderer = ground.GetComponent(); m_GroundMaterial = m_GroundRenderer.material; + statsRecorder = Academy.Instance.StatsRecorder; } public override void CollectObservations(VectorSensor sensor) @@ -83,11 +85,13 @@ void OnCollisionEnter(Collision col) { SetReward(1f); StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f)); + statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum); } else { SetReward(-0.1f); StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f)); + statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum); } EndEpisode(); } From e7c45cda622601e8189642b293bcbb53aa7078eb Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 00:33:34 -0800 Subject: [PATCH 07/21] fixed field order with default values for StatsSummary --- ml-agents/mlagents/trainers/stats.py | 8 ++++---- ml-agents/mlagents/trainers/tests/test_stats.py | 14 ++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 005a3dcb30..41d6481652 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -43,13 +43,13 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str: class StatsSummary(NamedTuple): mean: float std: float - sum: float num: int - aggregation: StatsAggregationMethod + sum: float = 0 + aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE @staticmethod def empty() -> "StatsSummary": - return StatsSummary(0.0, 0.0, 0.0, 0, StatsAggregationMethod.AVERAGE) + return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE) class StatsPropertyType(Enum): @@ -314,8 +314,8 @@ def get_stats_summaries(self, key: str) -> StatsSummary: return StatsSummary( mean=np.mean(StatsReporter.stats_dict[self.category][key]), std=np.std(StatsReporter.stats_dict[self.category][key]), - sum=np.sum(StatsReporter.stats_dict[self.category][key]), num=len(StatsReporter.stats_dict[self.category][key]), + sum=np.sum(StatsReporter.stats_dict[self.category][key]), aggregation=StatsReporter.stats_aggregation[self.category][key], ) return StatsSummary.empty() diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py index 7a81ac684b..70179eb76f 100644 --- a/ml-agents/mlagents/trainers/tests/test_stats.py +++ b/ml-agents/mlagents/trainers/tests/test_stats.py @@ -129,7 +129,7 @@ def test_console_writer(self): with self.assertLogs("mlagents.trainers", level="INFO") as cm: category = "category1" console_writer = ConsoleWriter() - statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1) + statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1) console_writer.write_stats( category, { @@ -138,11 +138,11 @@ def test_console_writer(self): }, 10, ) - statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1) + statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1, sum=0.0) console_writer.write_stats( category, { - "Environment/Cumulative Reward": statssummary1, + "Environment/Cumulative Reward": statssummary2, "Is Training": statssummary2, }, 10, @@ -153,7 +153,8 @@ def test_console_writer(self): ) self.assertIn( - "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0] + "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Sum of Reward: 1.000. Training.", + cm.output[0], ) self.assertIn("Not Training.", cm.output[1]) @@ -165,7 +166,7 @@ def test_selfplay_console_writer(self): category = "category1" console_writer = ConsoleWriter() console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True) - statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1) + statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1) console_writer.write_stats( category, { @@ -177,5 +178,6 @@ def test_selfplay_console_writer(self): ) self.assertIn( - "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0] + "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Sum of Reward: 1.000. Training.", + cm.output[0], ) From b6c9a2ea9b281489364e85ed036f6438dc329ad6 Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 01:05:26 -0800 Subject: [PATCH 08/21] simplified StatsSummary --- ml-agents/mlagents/trainers/ppo/trainer.py | 2 +- ml-agents/mlagents/trainers/stats.py | 44 ++++++++++--------- .../trainers/tests/check_env_trains.py | 4 +- .../trainers/tests/test_agent_processor.py | 4 +- .../mlagents/trainers/tests/test_stats.py | 18 ++++---- .../reward_providers/gail_reward_provider.py | 6 +-- 6 files changed, 40 insertions(+), 38 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 62999878a5..313364a6af 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -169,7 +169,7 @@ def _update_policy(self): advantages = self.update_buffer["advantages"].get_batch() self.update_buffer["advantages"].set( - (advantages - advantages.mean()) / (advantages.std() + 1e-10) + (advantages - advantages.stats_value()) / (advantages.std() + 1e-10) ) num_epoch = self.hyperparameters.num_epoch batch_update_stats = defaultdict(list) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 41d6481652..3e1650e5f6 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -41,15 +41,13 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str: class StatsSummary(NamedTuple): - mean: float + stats_value: float std: float num: int - sum: float = 0 - aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE @staticmethod def empty() -> "StatsSummary": - return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE) + return StatsSummary(0.0, 0.0, 0) class StatsPropertyType(Enum): @@ -78,7 +76,7 @@ def add_property( with all types of properties. For instance, a TB writer doesn't need a max step. :param category: The category that the property belongs to. - :param type: The type of property. + :param property_type: The type of property. :param value: The property itself. """ pass @@ -102,7 +100,7 @@ def write_stats( for val, stats_summary in values.items(): set_gauge( GaugeWriter.sanitize_string(f"{category}.{val}.mean"), - float(stats_summary.mean), + float(stats_summary.stats_value), ) @@ -120,7 +118,7 @@ def write_stats( is_training = "Not Training" if "Is Training" in values: stats_summary = values["Is Training"] - if stats_summary.mean > 0.0: + if stats_summary.stats_value > 0.0: is_training = "Training" elapsed_time = time.time() - self.training_start_time @@ -132,15 +130,14 @@ def write_stats( if self.rank is not None: log_info.append(f"Rank: {self.rank}") - log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}") + log_info.append(f"Mean Reward: {stats_summary.stats_value:0.3f}") log_info.append(f"Std of Reward: {stats_summary.std:0.3f}") log_info.append(f"Num of Reward: {stats_summary.num:0.3f}") - log_info.append(f"Sum of Reward: {stats_summary.sum:0.3f}") log_info.append(is_training) if self.self_play and "Self-play/ELO" in values: elo_stats = values["Self-play/ELO"] - log_info.append(f"ELO: {elo_stats.mean:0.3f}") + log_info.append(f"ELO: {elo_stats.stats_value:0.3f}") else: log_info.append("No episode was completed since last summary") log_info.append(is_training) @@ -179,10 +176,7 @@ def write_stats( ) -> None: self._maybe_create_summary_writer(category) for key, value in values.items(): - if value.aggregation == StatsAggregationMethod.SUM: - self.summary_writers[category].add_scalar(f"{key}", value.sum, step) - else: - self.summary_writers[category].add_scalar(f"{key}", value.mean, step) + self.summary_writers[category].add_scalar(f"{key}", value.stats_value, step) self.summary_writers[category].flush() def _maybe_create_summary_writer(self, category: str) -> None: @@ -311,11 +305,19 @@ def get_stats_summaries(self, key: str) -> StatsSummary: :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation). """ if len(StatsReporter.stats_dict[self.category][key]) > 0: - return StatsSummary( - mean=np.mean(StatsReporter.stats_dict[self.category][key]), - std=np.std(StatsReporter.stats_dict[self.category][key]), - num=len(StatsReporter.stats_dict[self.category][key]), - sum=np.sum(StatsReporter.stats_dict[self.category][key]), - aggregation=StatsReporter.stats_aggregation[self.category][key], - ) + if ( + StatsReporter.stats_aggregation[self.category][key] + == StatsAggregationMethod.SUM + ): + return StatsSummary( + stats_value=np.sum(StatsReporter.stats_dict[self.category][key]), + std=np.std(StatsReporter.stats_dict[self.category][key]), + num=len(StatsReporter.stats_dict[self.category][key]), + ) + else: + return StatsSummary( + stats_value=np.mean(StatsReporter.stats_dict[self.category][key]), + std=np.std(StatsReporter.stats_dict[self.category][key]), + num=len(StatsReporter.stats_dict[self.category][key]), + ) return StatsSummary.empty() diff --git a/ml-agents/mlagents/trainers/tests/check_env_trains.py b/ml-agents/mlagents/trainers/tests/check_env_trains.py index 20630d0b32..5bcd59801f 100644 --- a/ml-agents/mlagents/trainers/tests/check_env_trains.py +++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py @@ -28,8 +28,8 @@ def write_stats( ) -> None: for val, stats_summary in values.items(): if val == "Environment/Cumulative Reward": - print(step, val, stats_summary.mean) - self._last_reward_summary[category] = stats_summary.mean + print(step, val, stats_summary.stats_value) + self._last_reward_summary[category] = stats_summary.stats_value # The reward processor is passed as an argument to _check_environment_trains. diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py index efa2549f20..fad5d0d61c 100644 --- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py +++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py @@ -272,8 +272,8 @@ def test_agent_manager_stats(): manager.record_environment_stats(env_stats, worker_id=0) expected_stats = { - "averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2), - "most_recent": StatsSummary(mean=4.0, std=0.0, num=1), + "averaged": StatsSummary(stats_value=2.0, std=mock.ANY, num=2), + "most_recent": StatsSummary(stats_value=4.0, std=0.0, num=1), } stats_reporter.write_stats(123) writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123) diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py index 70179eb76f..be4bb2858e 100644 --- a/ml-agents/mlagents/trainers/tests/test_stats.py +++ b/ml-agents/mlagents/trainers/tests/test_stats.py @@ -36,8 +36,8 @@ def test_stat_reporter_add_summary_write(): assert statssummary1.num == 10 assert statssummary2.num == 10 - assert statssummary1.mean == 4.5 - assert statssummary2.mean == 4.5 + assert statssummary1.stats_value == 4.5 + assert statssummary2.stats_value == 4.5 assert statssummary1.std == pytest.approx(2.9, abs=0.1) assert statssummary2.std == pytest.approx(2.9, abs=0.1) @@ -74,7 +74,7 @@ def test_tensorboard_writer(mock_summary): category = "category1" with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir: tb_writer = TensorboardWriter(base_dir, clear_past_data=False) - statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1) + statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) tb_writer.write_stats("category1", {"key1": statssummary1}, 10) # Test that the filewriter has been created and the directory has been created. @@ -97,7 +97,7 @@ def test_tensorboard_writer(mock_summary): def test_tensorboard_writer_clear(tmp_path): tb_writer = TensorboardWriter(tmp_path, clear_past_data=False) - statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1) + statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) tb_writer.write_stats("category1", {"key1": statssummary1}, 10) # TB has some sort of timeout before making a new file time.sleep(1.0) @@ -129,7 +129,7 @@ def test_console_writer(self): with self.assertLogs("mlagents.trainers", level="INFO") as cm: category = "category1" console_writer = ConsoleWriter() - statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1) + statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) console_writer.write_stats( category, { @@ -138,7 +138,7 @@ def test_console_writer(self): }, 10, ) - statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1, sum=0.0) + statssummary2 = StatsSummary(stats_value=0.0, std=0.0, num=1) console_writer.write_stats( category, { @@ -153,7 +153,7 @@ def test_console_writer(self): ) self.assertIn( - "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Sum of Reward: 1.000. Training.", + "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Training.", cm.output[0], ) self.assertIn("Not Training.", cm.output[1]) @@ -166,7 +166,7 @@ def test_selfplay_console_writer(self): category = "category1" console_writer = ConsoleWriter() console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True) - statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1) + statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) console_writer.write_stats( category, { @@ -178,6 +178,6 @@ def test_selfplay_console_writer(self): ) self.assertIn( - "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Sum of Reward: 1.000. Training.", + "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.", cm.output[0], ) diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py index 1514b6693a..15fd7d7467 100644 --- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py +++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py @@ -161,12 +161,12 @@ def compute_loss( expert_estimate, expert_mu = self.compute_estimate( expert_batch, use_vail_noise=True ) - stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item() - stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item() + stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.stats_value().item() + stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.stats_value().item() discriminator_loss = -( torch.log(expert_estimate + self.EPSILON) + torch.log(1.0 - policy_estimate + self.EPSILON) - ).mean() + ).stats_value() stats_dict["Losses/GAIL Loss"] = discriminator_loss.item() total_loss += discriminator_loss if self._settings.use_vail: From 9206b46f68dbe2b1ca0e4d75c0c85617090fd727 Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 10:47:34 -0800 Subject: [PATCH 09/21] add default value for custom stats --- .../Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs index e453ad902e..ae02b47d1d 100644 --- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs +++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs @@ -160,5 +160,7 @@ public override void OnEpisodeBegin() symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position; symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position; } + statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum); + statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum); } } From 4a4451413df0fbd973d5517027e79f33be7c2243 Mon Sep 17 00:00:00 2001 From: brccabral Date: Sun, 3 Jan 2021 10:48:41 -0800 Subject: [PATCH 10/21] fixed tests --- ml-agents/mlagents/trainers/ppo/trainer.py | 10 +++++++++- ml-agents/mlagents/trainers/stats.py | 2 +- ml-agents/mlagents/trainers/tests/test_learn.py | 16 ++++++++++++---- .../mlagents/trainers/tests/test_rl_trainer.py | 3 ++- ml-agents/mlagents/trainers/tests/test_stats.py | 2 +- .../reward_providers/gail_reward_provider.py | 7 ++++--- .../mlagents/trainers/trainer/rl_trainer.py | 7 +++++++ 7 files changed, 36 insertions(+), 11 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index 313364a6af..f875ee0fbd 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -35,6 +35,7 @@ def __init__( ): """ Responsible for collecting experiences and training PPO model. + :param behavior_name: The name of the behavior associated with trainer config :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. @@ -61,6 +62,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: """ Takes a trajectory and processes it, putting it into the update buffer. Processing involves calculating value and advantage targets for model updating step. + :param trajectory: The Trajectory tuple containing the steps to be processed. """ super()._process_trajectory(trajectory) @@ -141,6 +143,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: def _is_ready_update(self): """ Returns whether or not the trainer has enough elements to run update model + :return: A boolean corresponding to whether or not update_model() can be run """ size_of_buffer = self.update_buffer.num_experiences @@ -169,7 +172,7 @@ def _update_policy(self): advantages = self.update_buffer["advantages"].get_batch() self.update_buffer["advantages"].set( - (advantages - advantages.stats_value()) / (advantages.std() + 1e-10) + (advantages - advantages.mean()) / (advantages.std() + 1e-10) ) num_epoch = self.hyperparameters.num_epoch batch_update_stats = defaultdict(list) @@ -199,6 +202,7 @@ def create_torch_policy( ) -> TorchPolicy: """ Creates a policy with a PyTorch backend and PPO hyperparameters + :param parsed_behavior_id: :param behavior_spec: specifications for policy construction :return policy @@ -222,6 +226,7 @@ def add_policy( ) -> None: """ Adds policy to trainer. + :param parsed_behavior_id: Behavior identifiers that the policy should belong to. :param policy: Policy to associate with name_behavior_id. """ @@ -249,6 +254,7 @@ def add_policy( def get_policy(self, name_behavior_id: str) -> Policy: """ Gets policy from trainer associated with name_behavior_id + :param name_behavior_id: full identifier of policy """ @@ -258,6 +264,7 @@ def get_policy(self, name_behavior_id: str) -> Policy: def discount_rewards(r, gamma=0.99, value_next=0.0): """ Computes discounted sum of future rewards for use in updating value estimate. + :param r: List of rewards. :param gamma: Discount factor. :param value_next: T+1 value estimate for returns calculation. @@ -274,6 +281,7 @@ def discount_rewards(r, gamma=0.99, value_next=0.0): def get_gae(rewards, value_estimates, value_next=0.0, gamma=0.99, lambd=0.95): """ Computes generalized advantage estimate for use in updating policy. + :param rewards: list of rewards for time-steps t to T. :param value_next: Value estimate for time-step T+1. :param value_estimates: list of value estimates for time-steps t to T. diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 3e1650e5f6..b3c240f61a 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -221,7 +221,7 @@ class StatsReporter: stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list)) lock = RLock() stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict( - lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE) + lambda: defaultdict(lambda: {"", StatsAggregationMethod.AVERAGE}) ) def __init__(self, category: str): diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index 81e1e5847d..54783b41ff 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -8,6 +8,7 @@ from mlagents_envs.exception import UnityEnvironmentException from mlagents.trainers.stats import StatsReporter from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager +import os.path def basic_options(extra_args=None): @@ -75,17 +76,24 @@ def test_run_training( learn.run_training(0, options) mock_init.assert_called_once_with( trainer_factory_mock.return_value, - "results/ppo", + os.path.join("results", "ppo"), "ppo", "mock_param_manager", True, 0, ) handle_dir_mock.assert_called_once_with( - "results/ppo", False, False, "results/notuselessrun" + os.path.join("results", "ppo"), + False, + False, + os.path.join("results", "notuselessrun"), + ) + write_timing_tree_mock.assert_called_once_with( + os.path.join("results", "ppo", "run_logs") + ) + write_run_options_mock.assert_called_once_with( + os.path.join("results", "ppo"), options ) - write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs") - write_run_options_mock.assert_called_once_with("results/ppo", options) StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py diff --git a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py index 9a1c583dd4..ae0b64b5be 100644 --- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py +++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py @@ -9,6 +9,7 @@ from mlagents.trainers.settings import TrainerSettings from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes from mlagents_envs.base_env import ActionSpec +import os.path # Add concrete implementations of abstract methods @@ -171,7 +172,7 @@ def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary): trainer.brain_name, ModelCheckpoint( step, - f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}", + f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}", None, mock.ANY, ), diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py index be4bb2858e..b42949b759 100644 --- a/ml-agents/mlagents/trainers/tests/test_stats.py +++ b/ml-agents/mlagents/trainers/tests/test_stats.py @@ -153,7 +153,7 @@ def test_console_writer(self): ) self.assertIn( - "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Training.", + "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.", cm.output[0], ) self.assertIn("Not Training.", cm.output[1]) diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py index 15fd7d7467..41af1711fb 100644 --- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py +++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py @@ -128,6 +128,7 @@ def compute_estimate( """ Given a mini_batch, computes the estimate (How much the discriminator believes the data was sampled from the demonstration data). + :param mini_batch: The AgentBuffer of data :param use_vail_noise: Only when using VAIL : If true, will sample the code, if false, will return the mean of the code. @@ -161,12 +162,12 @@ def compute_loss( expert_estimate, expert_mu = self.compute_estimate( expert_batch, use_vail_noise=True ) - stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.stats_value().item() - stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.stats_value().item() + stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item() + stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item() discriminator_loss = -( torch.log(expert_estimate + self.EPSILON) + torch.log(1.0 - policy_estimate + self.EPSILON) - ).stats_value() + ).mean() stats_dict["Losses/GAIL Loss"] = discriminator_loss.item() total_loss += discriminator_loss if self._settings.use_vail: diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py index 93fa60d551..3e676bbede 100644 --- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py +++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py @@ -100,6 +100,7 @@ def _clear_update_buffer(self) -> None: def _is_ready_update(self): """ Returns whether or not the trainer has enough elements to run update model + :return: A boolean corresponding to wether or not update_model() can be run """ return False @@ -186,6 +187,7 @@ def save_model(self) -> None: def _update_policy(self) -> bool: """ Uses demonstration_buffer to update model. + :return: Whether or not the policy was updated. """ pass @@ -193,6 +195,7 @@ def _update_policy(self) -> bool: def _increment_step(self, n_steps: int, name_behavior_id: str) -> None: """ Increment the step count of the trainer + :param n_steps: number of steps to increment the step count by """ self.step += n_steps @@ -207,6 +210,7 @@ def _increment_step(self, n_steps: int, name_behavior_id: str) -> None: def _get_next_interval_step(self, interval: int) -> int: """ Get the next step count that should result in an action. + :param interval: The interval between actions. """ return self.step + (interval - self.step % interval) @@ -222,6 +226,7 @@ def _write_summary(self, step: int) -> None: def _process_trajectory(self, trajectory: Trajectory) -> None: """ Takes a trajectory and processes it, putting it into the update buffer. + :param trajectory: The Trajectory tuple containing the steps to be processed. """ self._maybe_write_summary(self.get_step + len(trajectory.steps)) @@ -232,6 +237,7 @@ def _maybe_write_summary(self, step_after_process: int) -> None: """ If processing the trajectory will make the step exceed the next summary write, write the summary. This logic ensures summaries are written on the update step and not in between. + :param step_after_process: the step count after processing the next trajectory. """ if self._next_summary_step == 0: # Don't write out the first one @@ -243,6 +249,7 @@ def _maybe_save_model(self, step_after_process: int) -> None: """ If processing the trajectory will make the step exceed the next model write, save the model. This logic ensures models are written on the update step and not in between. + :param step_after_process: the step count after processing the next trajectory. """ if self._next_save_step == 0: # Don't save the first one From 129906246c5ff09b974d3bd7c9acec759eb660a4 Mon Sep 17 00:00:00 2001 From: brccabral Date: Tue, 5 Jan 2021 20:07:30 -0800 Subject: [PATCH 11/21] extended test test_agent_manager_stats in test_agent_processor.py to have sum --- ml-agents/mlagents/trainers/tests/test_agent_processor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py index fad5d0d61c..2cbaf13ecd 100644 --- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py +++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py @@ -262,10 +262,12 @@ def test_agent_manager_stats(): { "averaged": [(1.0, StatsAggregationMethod.AVERAGE)], "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)], + "summed": [(3.1, StatsAggregationMethod.SUM)], }, { "averaged": [(3.0, StatsAggregationMethod.AVERAGE)], "most_recent": [(4.0, StatsAggregationMethod.MOST_RECENT)], + "summed": [(1.1, StatsAggregationMethod.SUM)], }, ] for env_stats in all_env_stats: @@ -274,6 +276,7 @@ def test_agent_manager_stats(): expected_stats = { "averaged": StatsSummary(stats_value=2.0, std=mock.ANY, num=2), "most_recent": StatsSummary(stats_value=4.0, std=0.0, num=1), + "summed": StatsSummary(stats_value=4.2, std=mock.ANY, num=2), } stats_reporter.write_stats(123) writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123) From f667c0bc1cbd22ac69c088d40559042563cc829e Mon Sep 17 00:00:00 2001 From: brccabral Date: Wed, 6 Jan 2021 22:51:28 -0800 Subject: [PATCH 12/21] refractor StatsSummary to add sum as property --- ml-agents/mlagents/trainers/stats.py | 55 ++++++++++--------- .../trainers/tests/check_env_trains.py | 4 +- .../trainers/tests/test_agent_processor.py | 24 +++++++- .../mlagents/trainers/tests/test_stats.py | 45 ++++++++++++--- 4 files changed, 91 insertions(+), 37 deletions(-) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index b3c240f61a..472db4ddc1 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -41,13 +41,22 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str: class StatsSummary(NamedTuple): - stats_value: float + mean: float std: float num: int + sum: float + aggregation_method: StatsAggregationMethod @staticmethod def empty() -> "StatsSummary": - return StatsSummary(0.0, 0.0, 0) + return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE) + + @property + def aggregated_value(self): + if self.aggregation_method == StatsAggregationMethod.SUM: + return self.sum + else: + return self.mean class StatsPropertyType(Enum): @@ -99,8 +108,8 @@ def write_stats( ) -> None: for val, stats_summary in values.items(): set_gauge( - GaugeWriter.sanitize_string(f"{category}.{val}.mean"), - float(stats_summary.stats_value), + GaugeWriter.sanitize_string(f"{category}.{val}.aggregated_value"), + float(stats_summary.aggregated_value), ) @@ -118,7 +127,7 @@ def write_stats( is_training = "Not Training" if "Is Training" in values: stats_summary = values["Is Training"] - if stats_summary.stats_value > 0.0: + if stats_summary.aggregated_value > 0.0: is_training = "Training" elapsed_time = time.time() - self.training_start_time @@ -130,14 +139,14 @@ def write_stats( if self.rank is not None: log_info.append(f"Rank: {self.rank}") - log_info.append(f"Mean Reward: {stats_summary.stats_value:0.3f}") + log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}") log_info.append(f"Std of Reward: {stats_summary.std:0.3f}") log_info.append(f"Num of Reward: {stats_summary.num:0.3f}") log_info.append(is_training) if self.self_play and "Self-play/ELO" in values: elo_stats = values["Self-play/ELO"] - log_info.append(f"ELO: {elo_stats.stats_value:0.3f}") + log_info.append(f"ELO: {elo_stats.aggregated_value:0.3f}") else: log_info.append("No episode was completed since last summary") log_info.append(is_training) @@ -176,7 +185,9 @@ def write_stats( ) -> None: self._maybe_create_summary_writer(category) for key, value in values.items(): - self.summary_writers[category].add_scalar(f"{key}", value.stats_value, step) + self.summary_writers[category].add_scalar( + f"{key}", value.aggregated_value, step + ) self.summary_writers[category].flush() def _maybe_create_summary_writer(self, category: str) -> None: @@ -304,20 +315,14 @@ def get_stats_summaries(self, key: str) -> StatsSummary: :param key: The type of statistic, e.g. Environment/Reward. :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation). """ - if len(StatsReporter.stats_dict[self.category][key]) > 0: - if ( - StatsReporter.stats_aggregation[self.category][key] - == StatsAggregationMethod.SUM - ): - return StatsSummary( - stats_value=np.sum(StatsReporter.stats_dict[self.category][key]), - std=np.std(StatsReporter.stats_dict[self.category][key]), - num=len(StatsReporter.stats_dict[self.category][key]), - ) - else: - return StatsSummary( - stats_value=np.mean(StatsReporter.stats_dict[self.category][key]), - std=np.std(StatsReporter.stats_dict[self.category][key]), - num=len(StatsReporter.stats_dict[self.category][key]), - ) - return StatsSummary.empty() + stat_values = StatsReporter.stats_dict[self.category][key] + if len(stat_values) == 0: + return StatsSummary.empty() + + return StatsSummary( + mean=np.mean(stat_values), + std=np.std(stat_values), + num=len(stat_values), + sum=np.sum(stat_values), + aggregation_method=StatsReporter.stats_aggregation[self.category][key], + ) diff --git a/ml-agents/mlagents/trainers/tests/check_env_trains.py b/ml-agents/mlagents/trainers/tests/check_env_trains.py index 5bcd59801f..0742b773f5 100644 --- a/ml-agents/mlagents/trainers/tests/check_env_trains.py +++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py @@ -28,8 +28,8 @@ def write_stats( ) -> None: for val, stats_summary in values.items(): if val == "Environment/Cumulative Reward": - print(step, val, stats_summary.stats_value) - self._last_reward_summary[category] = stats_summary.stats_value + print(step, val, stats_summary.aggregated_value) + self._last_reward_summary[category] = stats_summary.aggregated_value # The reward processor is passed as an argument to _check_environment_trains. diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py index 2cbaf13ecd..5301b535d2 100644 --- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py +++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py @@ -274,9 +274,27 @@ def test_agent_manager_stats(): manager.record_environment_stats(env_stats, worker_id=0) expected_stats = { - "averaged": StatsSummary(stats_value=2.0, std=mock.ANY, num=2), - "most_recent": StatsSummary(stats_value=4.0, std=0.0, num=1), - "summed": StatsSummary(stats_value=4.2, std=mock.ANY, num=2), + "averaged": StatsSummary( + mean=2.0, + std=mock.ANY, + num=2, + sum=4.0, + aggregation_method=StatsAggregationMethod.AVERAGE, + ), + "most_recent": StatsSummary( + mean=4.0, + std=0.0, + num=1, + sum=4.0, + aggregation_method=StatsAggregationMethod.MOST_RECENT, + ), + "summed": StatsSummary( + mean=2.1, + std=mock.ANY, + num=2, + sum=4.2, + aggregation_method=StatsAggregationMethod.SUM, + ), } stats_reporter.write_stats(123) writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123) diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py index b42949b759..010ed48efd 100644 --- a/ml-agents/mlagents/trainers/tests/test_stats.py +++ b/ml-agents/mlagents/trainers/tests/test_stats.py @@ -12,6 +12,7 @@ GaugeWriter, ConsoleWriter, StatsPropertyType, + StatsAggregationMethod, ) @@ -36,8 +37,8 @@ def test_stat_reporter_add_summary_write(): assert statssummary1.num == 10 assert statssummary2.num == 10 - assert statssummary1.stats_value == 4.5 - assert statssummary2.stats_value == 4.5 + assert statssummary1.mean == 4.5 + assert statssummary2.mean == 4.5 assert statssummary1.std == pytest.approx(2.9, abs=0.1) assert statssummary2.std == pytest.approx(2.9, abs=0.1) @@ -74,7 +75,13 @@ def test_tensorboard_writer(mock_summary): category = "category1" with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir: tb_writer = TensorboardWriter(base_dir, clear_past_data=False) - statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) + statssummary1 = StatsSummary( + mean=1.0, + std=1.0, + num=1, + sum=1.0, + aggregation_method=StatsAggregationMethod.AVERAGE, + ) tb_writer.write_stats("category1", {"key1": statssummary1}, 10) # Test that the filewriter has been created and the directory has been created. @@ -97,7 +104,13 @@ def test_tensorboard_writer(mock_summary): def test_tensorboard_writer_clear(tmp_path): tb_writer = TensorboardWriter(tmp_path, clear_past_data=False) - statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) + statssummary1 = StatsSummary( + mean=1.0, + std=1.0, + num=1, + sum=1.0, + aggregation_method=StatsAggregationMethod.AVERAGE, + ) tb_writer.write_stats("category1", {"key1": statssummary1}, 10) # TB has some sort of timeout before making a new file time.sleep(1.0) @@ -129,7 +142,13 @@ def test_console_writer(self): with self.assertLogs("mlagents.trainers", level="INFO") as cm: category = "category1" console_writer = ConsoleWriter() - statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) + statssummary1 = StatsSummary( + mean=1.0, + std=1.0, + num=1, + sum=1.0, + aggregation_method=StatsAggregationMethod.AVERAGE, + ) console_writer.write_stats( category, { @@ -138,7 +157,13 @@ def test_console_writer(self): }, 10, ) - statssummary2 = StatsSummary(stats_value=0.0, std=0.0, num=1) + statssummary2 = StatsSummary( + mean=0.0, + std=0.0, + num=1, + sum=0.0, + aggregation_method=StatsAggregationMethod.AVERAGE, + ) console_writer.write_stats( category, { @@ -166,7 +191,13 @@ def test_selfplay_console_writer(self): category = "category1" console_writer = ConsoleWriter() console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True) - statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1) + statssummary1 = StatsSummary( + mean=1.0, + std=1.0, + num=1, + sum=1.0, + aggregation_method=StatsAggregationMethod.AVERAGE, + ) console_writer.write_stats( category, { From 1531ec32809a82ec62315ed623949d642cab8dbb Mon Sep 17 00:00:00 2001 From: brccabral Date: Wed, 6 Jan 2021 23:17:32 -0800 Subject: [PATCH 13/21] fixed tests --- ml-agents/mlagents/trainers/agent_processor.py | 6 +++--- ml-agents/mlagents/trainers/stats.py | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index 3650d54552..3512b9df02 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -332,11 +332,11 @@ def record_environment_stats( for stat_name, value_list in env_stats.items(): for val, agg_type in value_list: if agg_type == StatsAggregationMethod.AVERAGE: - self.stats_reporter.add_stat(stat_name, val) + self.stats_reporter.add_stat(stat_name, val, agg_type) + elif agg_type == StatsAggregationMethod.SUM: + self.stats_reporter.add_stat(stat_name, val, agg_type) elif agg_type == StatsAggregationMethod.MOST_RECENT: # In order to prevent conflicts between multiple environments, # only stats from the first environment are recorded. if worker_id == 0: self.stats_reporter.set_stat(stat_name, val) - elif agg_type == StatsAggregationMethod.SUM: - self.stats_reporter.add_stat(stat_name, val, agg_type) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 472db4ddc1..9f6ad2eccc 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -232,7 +232,7 @@ class StatsReporter: stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list)) lock = RLock() stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict( - lambda: defaultdict(lambda: {"", StatsAggregationMethod.AVERAGE}) + lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE) ) def __init__(self, category: str): @@ -289,6 +289,9 @@ def set_stat(self, key: str, value: float) -> None: """ with StatsReporter.lock: StatsReporter.stats_dict[self.category][key] = [value] + StatsReporter.stats_aggregation[self.category][ + key + ] = StatsAggregationMethod.MOST_RECENT def write_stats(self, step: int) -> None: """ @@ -310,7 +313,7 @@ def write_stats(self, step: int) -> None: def get_stats_summaries(self, key: str) -> StatsSummary: """ - Get the mean, std, sum, count and aggregation method of a particular statistic, since last write. + Get the mean, std, count, sum and aggregation method of a particular statistic, since last write. :param key: The type of statistic, e.g. Environment/Reward. :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation). From dadf67d7f778e6a01cd0d1f8122ca0a4fbd4be61 Mon Sep 17 00:00:00 2001 From: brccabral Date: Thu, 7 Jan 2021 18:45:19 -0800 Subject: [PATCH 14/21] Unity coding standard --- .../Examples/Hallway/Scripts/HallwayAgent.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs index ae02b47d1d..aa7daf1a57 100644 --- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs +++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs @@ -18,7 +18,7 @@ public class HallwayAgent : Agent Renderer m_GroundRenderer; HallwaySettings m_HallwaySettings; int m_Selection; - StatsRecorder statsRecorder; + StatsRecorder m_statsRecorder; public override void Initialize() { @@ -26,7 +26,7 @@ public override void Initialize() m_AgentRb = GetComponent(); m_GroundRenderer = ground.GetComponent(); m_GroundMaterial = m_GroundRenderer.material; - statsRecorder = Academy.Instance.StatsRecorder; + m_statsRecorder = Academy.Instance.StatsRecorder; } public override void CollectObservations(VectorSensor sensor) @@ -85,13 +85,13 @@ void OnCollisionEnter(Collision col) { SetReward(1f); StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f)); - statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum); + m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum); } else { SetReward(-0.1f); StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f)); - statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum); + m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum); } EndEpisode(); } @@ -160,7 +160,7 @@ public override void OnEpisodeBegin() symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position; symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position; } - statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum); - statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum); + m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum); + m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum); } } From b5e63974b76c13e2cff2448e4dc1956396c84821 Mon Sep 17 00:00:00 2001 From: brccabral Date: Thu, 7 Jan 2021 18:45:42 -0800 Subject: [PATCH 15/21] reverted docstring empty lines --- .../torch/components/reward_providers/gail_reward_provider.py | 1 - ml-agents/mlagents/trainers/trainer/rl_trainer.py | 1 - 2 files changed, 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py index 41af1711fb..1514b6693a 100644 --- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py +++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py @@ -128,7 +128,6 @@ def compute_estimate( """ Given a mini_batch, computes the estimate (How much the discriminator believes the data was sampled from the demonstration data). - :param mini_batch: The AgentBuffer of data :param use_vail_noise: Only when using VAIL : If true, will sample the code, if false, will return the mean of the code. diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py index 3e676bbede..0653cb2b8e 100644 --- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py +++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py @@ -100,7 +100,6 @@ def _clear_update_buffer(self) -> None: def _is_ready_update(self): """ Returns whether or not the trainer has enough elements to run update model - :return: A boolean corresponding to wether or not update_model() can be run """ return False From 913fddfd08957cf0034fe690223eb3db745f6473 Mon Sep 17 00:00:00 2001 From: brccabral Date: Thu, 7 Jan 2021 18:46:02 -0800 Subject: [PATCH 16/21] GaugeWriter fix --- ml-agents/mlagents/trainers/stats.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 9f6ad2eccc..858a5767c6 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -108,8 +108,12 @@ def write_stats( ) -> None: for val, stats_summary in values.items(): set_gauge( - GaugeWriter.sanitize_string(f"{category}.{val}.aggregated_value"), - float(stats_summary.aggregated_value), + GaugeWriter.sanitize_string(f"{category}.{val}.mean"), + float(stats_summary.mean), + ) + set_gauge( + GaugeWriter.sanitize_string(f"{category}.{val}.sum"), + float(stats_summary.sum), ) @@ -146,7 +150,7 @@ def write_stats( if self.self_play and "Self-play/ELO" in values: elo_stats = values["Self-play/ELO"] - log_info.append(f"ELO: {elo_stats.aggregated_value:0.3f}") + log_info.append(f"ELO: {elo_stats.mean:0.3f}") else: log_info.append("No episode was completed since last summary") log_info.append(is_training) @@ -316,7 +320,7 @@ def get_stats_summaries(self, key: str) -> StatsSummary: Get the mean, std, count, sum and aggregation method of a particular statistic, since last write. :param key: The type of statistic, e.g. Environment/Reward. - :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation). + :returns: A StatsSummary containing summary statistics. """ stat_values = StatsReporter.stats_dict[self.category][key] if len(stat_values) == 0: From 8bc9892df9a6010f0dac325bb14bf569fae72bda Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Jan 2021 09:40:13 -0800 Subject: [PATCH 17/21] revert some whitespace --- ml-agents/mlagents/trainers/agent_processor.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py index 3512b9df02..748e77df78 100644 --- a/ml-agents/mlagents/trainers/agent_processor.py +++ b/ml-agents/mlagents/trainers/agent_processor.py @@ -71,7 +71,6 @@ def add_experiences( ) -> None: """ Adds experiences to each agent's experience history. - :param decision_steps: current DecisionSteps. :param terminal_steps: current TerminalSteps. :param previous_action: The outputs of the Policy's get_action method. @@ -114,7 +113,12 @@ def add_experiences( ) def _process_step( - self, step: Union[TerminalStep, DecisionStep], global_id: str, index: int + self, + step: Union[ + TerminalStep, DecisionStep + ], # pylint: disable=unsubscriptable-object + global_id: str, + index: int, ) -> None: terminated = isinstance(step, TerminalStep) stored_decision_step, idx = self.last_step_result.get(global_id, (None, None)) @@ -211,7 +215,6 @@ def publish_trajectory_queue( """ Adds a trajectory queue to the list of queues to publish to when this AgentProcessor assembles a Trajectory - :param trajectory_queue: Trajectory queue to publish to. """ self.trajectory_queues.append(trajectory_queue) @@ -253,7 +256,6 @@ def __init__(self, behavior_id: str, maxlen: int = 0): def maxlen(self): """ The maximum length of the queue. - :return: Maximum length of the queue. """ return self._maxlen @@ -262,7 +264,6 @@ def maxlen(self): def behavior_id(self): """ The Behavior ID of this queue. - :return: Behavior ID associated with the queue. """ return self._behavior_id From cc76bea9ea84853e174b786a0f0dfad0fb4997dd Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Jan 2021 09:40:52 -0800 Subject: [PATCH 18/21] undo whitespace --- ml-agents/mlagents/trainers/ppo/trainer.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py index f875ee0fbd..62999878a5 100644 --- a/ml-agents/mlagents/trainers/ppo/trainer.py +++ b/ml-agents/mlagents/trainers/ppo/trainer.py @@ -35,7 +35,6 @@ def __init__( ): """ Responsible for collecting experiences and training PPO model. - :param behavior_name: The name of the behavior associated with trainer config :param reward_buff_cap: Max reward history to track in the reward buffer :param trainer_settings: The parameters for the trainer. @@ -62,7 +61,6 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: """ Takes a trajectory and processes it, putting it into the update buffer. Processing involves calculating value and advantage targets for model updating step. - :param trajectory: The Trajectory tuple containing the steps to be processed. """ super()._process_trajectory(trajectory) @@ -143,7 +141,6 @@ def _process_trajectory(self, trajectory: Trajectory) -> None: def _is_ready_update(self): """ Returns whether or not the trainer has enough elements to run update model - :return: A boolean corresponding to whether or not update_model() can be run """ size_of_buffer = self.update_buffer.num_experiences @@ -202,7 +199,6 @@ def create_torch_policy( ) -> TorchPolicy: """ Creates a policy with a PyTorch backend and PPO hyperparameters - :param parsed_behavior_id: :param behavior_spec: specifications for policy construction :return policy @@ -226,7 +222,6 @@ def add_policy( ) -> None: """ Adds policy to trainer. - :param parsed_behavior_id: Behavior identifiers that the policy should belong to. :param policy: Policy to associate with name_behavior_id. """ @@ -254,7 +249,6 @@ def add_policy( def get_policy(self, name_behavior_id: str) -> Policy: """ Gets policy from trainer associated with name_behavior_id - :param name_behavior_id: full identifier of policy """ @@ -264,7 +258,6 @@ def get_policy(self, name_behavior_id: str) -> Policy: def discount_rewards(r, gamma=0.99, value_next=0.0): """ Computes discounted sum of future rewards for use in updating value estimate. - :param r: List of rewards. :param gamma: Discount factor. :param value_next: T+1 value estimate for returns calculation. @@ -281,7 +274,6 @@ def discount_rewards(r, gamma=0.99, value_next=0.0): def get_gae(rewards, value_estimates, value_next=0.0, gamma=0.99, lambd=0.95): """ Computes generalized advantage estimate for use in updating policy. - :param rewards: list of rewards for time-steps t to T. :param value_next: Value estimate for time-step T+1. :param value_estimates: list of value estimates for time-steps t to T. From 31f400d4fd77ca55902d0adf532e90555f637076 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Jan 2021 09:45:07 -0800 Subject: [PATCH 19/21] undo undesired change, undo whitespace --- ml-agents/mlagents/trainers/stats.py | 3 +-- ml-agents/mlagents/trainers/trainer/rl_trainer.py | 6 ------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 858a5767c6..c55c4a471c 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -40,7 +40,7 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str: ) -class StatsSummary(NamedTuple): +class StatsSummary(NamedTuple): # pylint: disable=inherit-non-class mean: float std: float num: int @@ -145,7 +145,6 @@ def write_stats( log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}") log_info.append(f"Std of Reward: {stats_summary.std:0.3f}") - log_info.append(f"Num of Reward: {stats_summary.num:0.3f}") log_info.append(is_training) if self.self_play and "Self-play/ELO" in values: diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py index 0653cb2b8e..93fa60d551 100644 --- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py +++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py @@ -186,7 +186,6 @@ def save_model(self) -> None: def _update_policy(self) -> bool: """ Uses demonstration_buffer to update model. - :return: Whether or not the policy was updated. """ pass @@ -194,7 +193,6 @@ def _update_policy(self) -> bool: def _increment_step(self, n_steps: int, name_behavior_id: str) -> None: """ Increment the step count of the trainer - :param n_steps: number of steps to increment the step count by """ self.step += n_steps @@ -209,7 +207,6 @@ def _increment_step(self, n_steps: int, name_behavior_id: str) -> None: def _get_next_interval_step(self, interval: int) -> int: """ Get the next step count that should result in an action. - :param interval: The interval between actions. """ return self.step + (interval - self.step % interval) @@ -225,7 +222,6 @@ def _write_summary(self, step: int) -> None: def _process_trajectory(self, trajectory: Trajectory) -> None: """ Takes a trajectory and processes it, putting it into the update buffer. - :param trajectory: The Trajectory tuple containing the steps to be processed. """ self._maybe_write_summary(self.get_step + len(trajectory.steps)) @@ -236,7 +232,6 @@ def _maybe_write_summary(self, step_after_process: int) -> None: """ If processing the trajectory will make the step exceed the next summary write, write the summary. This logic ensures summaries are written on the update step and not in between. - :param step_after_process: the step count after processing the next trajectory. """ if self._next_summary_step == 0: # Don't write out the first one @@ -248,7 +243,6 @@ def _maybe_save_model(self, step_after_process: int) -> None: """ If processing the trajectory will make the step exceed the next model write, save the model. This logic ensures models are written on the update step and not in between. - :param step_after_process: the step count after processing the next trajectory. """ if self._next_save_step == 0: # Don't save the first one From eaf51a824c87ec6c08d4db1150b60d15ebffe0fb Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Jan 2021 09:48:33 -0800 Subject: [PATCH 20/21] revert unit test logging strings --- ml-agents/mlagents/trainers/tests/test_stats.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py index 010ed48efd..8a8ff19a49 100644 --- a/ml-agents/mlagents/trainers/tests/test_stats.py +++ b/ml-agents/mlagents/trainers/tests/test_stats.py @@ -178,8 +178,7 @@ def test_console_writer(self): ) self.assertIn( - "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.", - cm.output[0], + "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0] ) self.assertIn("Not Training.", cm.output[1]) @@ -209,6 +208,5 @@ def test_selfplay_console_writer(self): ) self.assertIn( - "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.", - cm.output[0], + "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0] ) From 201f0996525a7a07c8d7405ac1e54e3272439500 Mon Sep 17 00:00:00 2001 From: Chris Elion Date: Fri, 8 Jan 2021 10:04:15 -0800 Subject: [PATCH 21/21] changelog --- com.unity.ml-agents/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index eccb84ac1f..4a6e59f099 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -15,6 +15,9 @@ and this project adheres to ### Minor Changes #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) +- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This +will result in the values being summed (instead of averaged) when written to +TensorBoard. Thanks to @brccabral for the contribution! (#4816) #### ml-agents / ml-agents-envs / gym-unity (Python)