From 953b08cc8d28e11cc0afc6f740425253439d5561 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Fri, 25 Dec 2020 10:58:32 -0800
Subject: [PATCH 01/21] remove Builds folder from git

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index b3aab2b943..73878cd4e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@
 /summaries
 # Output Artifacts
 /results
+# Output Builds
+/Builds
 
 # Training environments
 /envs

From 38afbdecd30dc0d46031a4be198a3ec9b359891c Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sat, 2 Jan 2021 21:18:33 -0800
Subject: [PATCH 02/21] log number of rewards in cmd summary

---
 ml-agents/mlagents/trainers/stats.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 1074d61520..5b7607ee30 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -128,6 +128,7 @@ def write_stats(
 
             log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
             log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
+            log_info.append(f"Num of Reward: {stats_summary.num:0.3f}")
             log_info.append(is_training)
 
             if self.self_play and "Self-play/ELO" in values:

From b9a18a08287eed02d60640ab2e34d99c1ed53096 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sat, 2 Jan 2021 22:12:06 -0800
Subject: [PATCH 03/21] added sum to StatsSummary

---
 ml-agents/mlagents/trainers/stats.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 5b7607ee30..b5289e34dc 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -40,11 +40,12 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
 class StatsSummary(NamedTuple):
     mean: float
     std: float
+    sum: float
     num: int
 
     @staticmethod
     def empty() -> "StatsSummary":
-        return StatsSummary(0.0, 0.0, 0)
+        return StatsSummary(0.0, 0.0, 0.0, 0)
 
 
 class StatsPropertyType(Enum):
@@ -129,6 +130,7 @@ def write_stats(
             log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
             log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
             log_info.append(f"Num of Reward: {stats_summary.num:0.3f}")
+            log_info.append(f"Sum of Reward: {stats_summary.sum:0.3f}")
             log_info.append(is_training)
 
             if self.self_play and "Self-play/ELO" in values:
@@ -280,14 +282,15 @@ def write_stats(self, step: int) -> None:
 
     def get_stats_summaries(self, key: str) -> StatsSummary:
         """
-        Get the mean, std, and count of a particular statistic, since last write.
+        Get the mean, std, sum, and count of a particular statistic, since last write.
         :param key: The type of statistic, e.g. Environment/Reward.
-        :returns: A StatsSummary NamedTuple containing (mean, std, count).
+        :returns: A StatsSummary NamedTuple containing (mean, std, sum, count).
         """
         if len(StatsReporter.stats_dict[self.category][key]) > 0:
             return StatsSummary(
                 mean=np.mean(StatsReporter.stats_dict[self.category][key]),
                 std=np.std(StatsReporter.stats_dict[self.category][key]),
+                sum=np.sum(StatsReporter.stats_dict[self.category][key]),
                 num=len(StatsReporter.stats_dict[self.category][key]),
             )
         return StatsSummary.empty()

From 9c3f05963a6fc3a0dfcae743ab057bbb63b6a371 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 00:08:01 -0800
Subject: [PATCH 04/21] added SUM in Unity StatAggregationMethod

---
 com.unity.ml-agents/Runtime/StatsRecorder.cs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/com.unity.ml-agents/Runtime/StatsRecorder.cs b/com.unity.ml-agents/Runtime/StatsRecorder.cs
index 96e4e6a29d..86b6245bd9 100644
--- a/com.unity.ml-agents/Runtime/StatsRecorder.cs
+++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs
@@ -17,7 +17,12 @@ public enum StatAggregationMethod
         /// To avoid conflicts when training with multiple concurrent environments, only
         /// stats from worker index 0 will be tracked.
         /// </summary>
-        MostRecent = 1
+        MostRecent = 1,
+
+        /// <summary>
+        /// Values within the summary period are summed up before reporting.
+        /// </summary>
+        Sum = 2
     }
 
     /// <summary>

From b7328ba04a1b2984f56ae4bb4beea5f1dce4f1c0 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 00:18:02 -0800
Subject: [PATCH 05/21] added support for SUM as StatsAggregationMethod in
 python mlagents

---
 .../side_channel/stats_side_channel.py        |  5 +++
 .../mlagents/trainers/agent_processor.py      | 10 ++++-
 ml-agents/mlagents/trainers/stats.py          | 43 +++++++++++++++----
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
index 4d8a481f69..5934770dd7 100644
--- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
@@ -14,6 +14,9 @@ class StatsAggregationMethod(Enum):
     # Only the most recent value is reported.
     MOST_RECENT = 1
 
+    # Values within the summary period are summed up before reporting.
+    SUM = 2
+
 
 StatList = List[Tuple[float, StatsAggregationMethod]]
 EnvironmentStats = Mapping[str, StatList]
@@ -35,6 +38,7 @@ def __init__(self) -> None:
     def on_message_received(self, msg: IncomingMessage) -> None:
         """
         Receive the message from the environment, and save it for later retrieval.
+
         :param msg:
         :return:
         """
@@ -47,6 +51,7 @@ def on_message_received(self, msg: IncomingMessage) -> None:
     def get_and_reset_stats(self) -> EnvironmentStats:
         """
         Returns the current stats, and resets the internal storage of the stats.
+
         :return:
         """
         s = self.stats
diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
index e56d332f63..3650d54552 100644
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
@@ -40,6 +40,7 @@ def __init__(
     ):
         """
         Create an AgentProcessor.
+
         :param trainer: Trainer instance connected to this AgentProcessor. Trainer is given trajectory
         when it is finished.
         :param policy: Policy instance associated with this AgentProcessor.
@@ -70,6 +71,7 @@ def add_experiences(
     ) -> None:
         """
         Adds experiences to each agent's experience history.
+
         :param decision_steps: current DecisionSteps.
         :param terminal_steps: current TerminalSteps.
         :param previous_action: The outputs of the Policy's get_action method.
@@ -209,6 +211,7 @@ def publish_trajectory_queue(
         """
         Adds a trajectory queue to the list of queues to publish to when this AgentProcessor
         assembles a Trajectory
+
         :param trajectory_queue: Trajectory queue to publish to.
         """
         self.trajectory_queues.append(trajectory_queue)
@@ -250,6 +253,7 @@ def __init__(self, behavior_id: str, maxlen: int = 0):
     def maxlen(self):
         """
         The maximum length of the queue.
+
         :return: Maximum length of the queue.
         """
         return self._maxlen
@@ -258,6 +262,7 @@ def maxlen(self):
     def behavior_id(self):
         """
         The Behavior ID of this queue.
+
         :return: Behavior ID associated with the queue.
         """
         return self._behavior_id
@@ -318,7 +323,8 @@ def record_environment_stats(
         """
         Pass stats from the environment to the StatsReporter.
         Depending on the StatsAggregationMethod, either StatsReporter.add_stat or StatsReporter.set_stat is used.
-        The worker_id is used to determin whether StatsReporter.set_stat should be used.
+        The worker_id is used to determine whether StatsReporter.set_stat should be used.
+
         :param env_stats:
         :param worker_id:
         :return:
@@ -332,3 +338,5 @@ def record_environment_stats(
                     # only stats from the first environment are recorded.
                     if worker_id == 0:
                         self.stats_reporter.set_stat(stat_name, val)
+                elif agg_type == StatsAggregationMethod.SUM:
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index b5289e34dc..005a3dcb30 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -7,6 +7,8 @@
 import time
 from threading import RLock
 
+from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
+
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import set_gauge
 from torch.utils.tensorboard import SummaryWriter
@@ -20,8 +22,9 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
     """
     Takes a parameter dictionary and converts it to a human-readable string.
     Recurses if there are multiple levels of dict. Used to print out hyperparameters.
-    param: param_dict: A Dictionary of key, value parameters.
-    return: A string version of this dictionary.
+
+    :param param_dict: A Dictionary of key, value parameters.
+    :return: A string version of this dictionary.
     """
     if not isinstance(param_dict, dict):
         return str(param_dict)
@@ -42,10 +45,11 @@ class StatsSummary(NamedTuple):
     std: float
     sum: float
     num: int
+    aggregation: StatsAggregationMethod
 
     @staticmethod
     def empty() -> "StatsSummary":
-        return StatsSummary(0.0, 0.0, 0.0, 0)
+        return StatsSummary(0.0, 0.0, 0.0, 0, StatsAggregationMethod.AVERAGE)
 
 
 class StatsPropertyType(Enum):
@@ -72,6 +76,7 @@ def add_property(
         Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
         a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
         with all types of properties. For instance, a TB writer doesn't need a max step.
+
         :param category: The category that the property belongs to.
         :param type: The type of property.
         :param value: The property itself.
@@ -159,10 +164,11 @@ class TensorboardWriter(StatsWriter):
     def __init__(self, base_dir: str, clear_past_data: bool = False):
         """
         A StatsWriter that writes to a Tensorboard summary.
+
         :param base_dir: The directory within which to place all the summaries. Tensorboard files will be written to a
         {base_dir}/{category} directory.
         :param clear_past_data: Whether or not to clean up existing Tensorboard files associated with the base_dir and
-            category.
+        category.
         """
         self.summary_writers: Dict[str, SummaryWriter] = {}
         self.base_dir: str = base_dir
@@ -173,7 +179,10 @@ def write_stats(
     ) -> None:
         self._maybe_create_summary_writer(category)
         for key, value in values.items():
-            self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
+            if value.aggregation == StatsAggregationMethod.SUM:
+                self.summary_writers[category].add_scalar(f"{key}", value.sum, step)
+            else:
+                self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
             self.summary_writers[category].flush()
 
     def _maybe_create_summary_writer(self, category: str) -> None:
@@ -217,6 +226,9 @@ class StatsReporter:
     writers: List[StatsWriter] = []
     stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
     lock = RLock()
+    stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
+        lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
+    )
 
     def __init__(self, category: str):
         """
@@ -237,26 +249,36 @@ def add_property(self, property_type: StatsPropertyType, value: Any) -> None:
         Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
         a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
         with all types of properties. For instance, a TB writer doesn't need a max step.
-        :param key: The type of property.
+
+        :param property_type: The type of property.
         :param value: The property itself.
         """
         with StatsReporter.lock:
             for writer in StatsReporter.writers:
                 writer.add_property(self.category, property_type, value)
 
-    def add_stat(self, key: str, value: float) -> None:
+    def add_stat(
+        self,
+        key: str,
+        value: float,
+        aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE,
+    ) -> None:
         """
         Add a float value stat to the StatsReporter.
+
         :param key: The type of statistic, e.g. Environment/Reward.
         :param value: the value of the statistic.
+        :param aggregation: the aggregation method for the statistic, default StatsAggregationMethod.AVERAGE.
         """
         with StatsReporter.lock:
             StatsReporter.stats_dict[self.category][key].append(value)
+            StatsReporter.stats_aggregation[self.category][key] = aggregation
 
     def set_stat(self, key: str, value: float) -> None:
         """
         Sets a stat value to a float. This is for values that we don't want to average, and just
         want the latest.
+
         :param key: The type of statistic, e.g. Environment/Reward.
         :param value: the value of the statistic.
         """
@@ -268,6 +290,7 @@ def write_stats(self, step: int) -> None:
         Write out all stored statistics that fall under the category specified.
         The currently stored values will be averaged, written out as a single value,
         and the buffer cleared.
+
         :param step: Training step which to write these stats as.
         """
         with StatsReporter.lock:
@@ -282,9 +305,10 @@ def write_stats(self, step: int) -> None:
 
     def get_stats_summaries(self, key: str) -> StatsSummary:
         """
-        Get the mean, std, sum, and count of a particular statistic, since last write.
+        Get the mean, std, sum, count and aggregation method of a particular statistic, since last write.
+
         :param key: The type of statistic, e.g. Environment/Reward.
-        :returns: A StatsSummary NamedTuple containing (mean, std, sum, count).
+        :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation).
         """
         if len(StatsReporter.stats_dict[self.category][key]) > 0:
             return StatsSummary(
@@ -292,5 +316,6 @@ def get_stats_summaries(self, key: str) -> StatsSummary:
                 std=np.std(StatsReporter.stats_dict[self.category][key]),
                 sum=np.sum(StatsReporter.stats_dict[self.category][key]),
                 num=len(StatsReporter.stats_dict[self.category][key]),
+                aggregation=StatsReporter.stats_aggregation[self.category][key],
             )
         return StatsSummary.empty()

From f178316594147b2b5391481fe714fb898ce53cea Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 00:19:41 -0800
Subject: [PATCH 06/21] example to use SUM as aggregation

---
 .../Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
index b93deebd47..e453ad902e 100644
--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
@@ -18,6 +18,7 @@ public class HallwayAgent : Agent
     Renderer m_GroundRenderer;
     HallwaySettings m_HallwaySettings;
     int m_Selection;
+    StatsRecorder statsRecorder;
 
     public override void Initialize()
     {
@@ -25,6 +26,7 @@ public override void Initialize()
         m_AgentRb = GetComponent<Rigidbody>();
         m_GroundRenderer = ground.GetComponent<Renderer>();
         m_GroundMaterial = m_GroundRenderer.material;
+        statsRecorder = Academy.Instance.StatsRecorder;
     }
 
     public override void CollectObservations(VectorSensor sensor)
@@ -83,11 +85,13 @@ void OnCollisionEnter(Collision col)
             {
                 SetReward(1f);
                 StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
+                statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
             }
             else
             {
                 SetReward(-0.1f);
                 StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f));
+                statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
             }
             EndEpisode();
         }

From e7c45cda622601e8189642b293bcbb53aa7078eb Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 00:33:34 -0800
Subject: [PATCH 07/21] fixed field order with default values for StatsSummary

---
 ml-agents/mlagents/trainers/stats.py            |  8 ++++----
 ml-agents/mlagents/trainers/tests/test_stats.py | 14 ++++++++------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 005a3dcb30..41d6481652 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -43,13 +43,13 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
 class StatsSummary(NamedTuple):
     mean: float
     std: float
-    sum: float
     num: int
-    aggregation: StatsAggregationMethod
+    sum: float = 0
+    aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE
 
     @staticmethod
     def empty() -> "StatsSummary":
-        return StatsSummary(0.0, 0.0, 0.0, 0, StatsAggregationMethod.AVERAGE)
+        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
 
 
 class StatsPropertyType(Enum):
@@ -314,8 +314,8 @@ def get_stats_summaries(self, key: str) -> StatsSummary:
             return StatsSummary(
                 mean=np.mean(StatsReporter.stats_dict[self.category][key]),
                 std=np.std(StatsReporter.stats_dict[self.category][key]),
-                sum=np.sum(StatsReporter.stats_dict[self.category][key]),
                 num=len(StatsReporter.stats_dict[self.category][key]),
+                sum=np.sum(StatsReporter.stats_dict[self.category][key]),
                 aggregation=StatsReporter.stats_aggregation[self.category][key],
             )
         return StatsSummary.empty()
diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
index 7a81ac684b..70179eb76f 100644
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -129,7 +129,7 @@ def test_console_writer(self):
         with self.assertLogs("mlagents.trainers", level="INFO") as cm:
             category = "category1"
             console_writer = ConsoleWriter()
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1)
             console_writer.write_stats(
                 category,
                 {
@@ -138,11 +138,11 @@ def test_console_writer(self):
                 },
                 10,
             )
-            statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1)
+            statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1, sum=0.0)
             console_writer.write_stats(
                 category,
                 {
-                    "Environment/Cumulative Reward": statssummary1,
+                    "Environment/Cumulative Reward": statssummary2,
                     "Is Training": statssummary2,
                 },
                 10,
@@ -153,7 +153,8 @@ def test_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
+            "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Sum of Reward: 1.000. Training.",
+            cm.output[0],
         )
         self.assertIn("Not Training.", cm.output[1])
 
@@ -165,7 +166,7 @@ def test_selfplay_console_writer(self):
             category = "category1"
             console_writer = ConsoleWriter()
             console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1)
             console_writer.write_stats(
                 category,
                 {
@@ -177,5 +178,6 @@ def test_selfplay_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
+            "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Sum of Reward: 1.000. Training.",
+            cm.output[0],
         )

From b6c9a2ea9b281489364e85ed036f6438dc329ad6 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 01:05:26 -0800
Subject: [PATCH 08/21] simplified StatsSummary

---
 ml-agents/mlagents/trainers/ppo/trainer.py    |  2 +-
 ml-agents/mlagents/trainers/stats.py          | 44 ++++++++++---------
 .../trainers/tests/check_env_trains.py        |  4 +-
 .../trainers/tests/test_agent_processor.py    |  4 +-
 .../mlagents/trainers/tests/test_stats.py     | 18 ++++----
 .../reward_providers/gail_reward_provider.py  |  6 +--
 6 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 62999878a5..313364a6af 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -169,7 +169,7 @@ def _update_policy(self):
 
         advantages = self.update_buffer["advantages"].get_batch()
         self.update_buffer["advantages"].set(
-            (advantages - advantages.mean()) / (advantages.std() + 1e-10)
+            (advantages - advantages.stats_value()) / (advantages.std() + 1e-10)
         )
         num_epoch = self.hyperparameters.num_epoch
         batch_update_stats = defaultdict(list)
diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 41d6481652..3e1650e5f6 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -41,15 +41,13 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
 
 
 class StatsSummary(NamedTuple):
-    mean: float
+    stats_value: float
     std: float
     num: int
-    sum: float = 0
-    aggregation: StatsAggregationMethod = StatsAggregationMethod.AVERAGE
 
     @staticmethod
     def empty() -> "StatsSummary":
-        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
+        return StatsSummary(0.0, 0.0, 0)
 
 
 class StatsPropertyType(Enum):
@@ -78,7 +76,7 @@ def add_property(
         with all types of properties. For instance, a TB writer doesn't need a max step.
 
         :param category: The category that the property belongs to.
-        :param type: The type of property.
+        :param property_type: The type of property.
         :param value: The property itself.
         """
         pass
@@ -102,7 +100,7 @@ def write_stats(
         for val, stats_summary in values.items():
             set_gauge(
                 GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
-                float(stats_summary.mean),
+                float(stats_summary.stats_value),
             )
 
 
@@ -120,7 +118,7 @@ def write_stats(
         is_training = "Not Training"
         if "Is Training" in values:
             stats_summary = values["Is Training"]
-            if stats_summary.mean > 0.0:
+            if stats_summary.stats_value > 0.0:
                 is_training = "Training"
 
         elapsed_time = time.time() - self.training_start_time
@@ -132,15 +130,14 @@ def write_stats(
             if self.rank is not None:
                 log_info.append(f"Rank: {self.rank}")
 
-            log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
+            log_info.append(f"Mean Reward: {stats_summary.stats_value:0.3f}")
             log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
             log_info.append(f"Num of Reward: {stats_summary.num:0.3f}")
-            log_info.append(f"Sum of Reward: {stats_summary.sum:0.3f}")
             log_info.append(is_training)
 
             if self.self_play and "Self-play/ELO" in values:
                 elo_stats = values["Self-play/ELO"]
-                log_info.append(f"ELO: {elo_stats.mean:0.3f}")
+                log_info.append(f"ELO: {elo_stats.stats_value:0.3f}")
         else:
             log_info.append("No episode was completed since last summary")
             log_info.append(is_training)
@@ -179,10 +176,7 @@ def write_stats(
     ) -> None:
         self._maybe_create_summary_writer(category)
         for key, value in values.items():
-            if value.aggregation == StatsAggregationMethod.SUM:
-                self.summary_writers[category].add_scalar(f"{key}", value.sum, step)
-            else:
-                self.summary_writers[category].add_scalar(f"{key}", value.mean, step)
+            self.summary_writers[category].add_scalar(f"{key}", value.stats_value, step)
             self.summary_writers[category].flush()
 
     def _maybe_create_summary_writer(self, category: str) -> None:
@@ -311,11 +305,19 @@ def get_stats_summaries(self, key: str) -> StatsSummary:
         :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation).
         """
         if len(StatsReporter.stats_dict[self.category][key]) > 0:
-            return StatsSummary(
-                mean=np.mean(StatsReporter.stats_dict[self.category][key]),
-                std=np.std(StatsReporter.stats_dict[self.category][key]),
-                num=len(StatsReporter.stats_dict[self.category][key]),
-                sum=np.sum(StatsReporter.stats_dict[self.category][key]),
-                aggregation=StatsReporter.stats_aggregation[self.category][key],
-            )
+            if (
+                StatsReporter.stats_aggregation[self.category][key]
+                == StatsAggregationMethod.SUM
+            ):
+                return StatsSummary(
+                    stats_value=np.sum(StatsReporter.stats_dict[self.category][key]),
+                    std=np.std(StatsReporter.stats_dict[self.category][key]),
+                    num=len(StatsReporter.stats_dict[self.category][key]),
+                )
+            else:
+                return StatsSummary(
+                    stats_value=np.mean(StatsReporter.stats_dict[self.category][key]),
+                    std=np.std(StatsReporter.stats_dict[self.category][key]),
+                    num=len(StatsReporter.stats_dict[self.category][key]),
+                )
         return StatsSummary.empty()
diff --git a/ml-agents/mlagents/trainers/tests/check_env_trains.py b/ml-agents/mlagents/trainers/tests/check_env_trains.py
index 20630d0b32..5bcd59801f 100644
--- a/ml-agents/mlagents/trainers/tests/check_env_trains.py
+++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py
@@ -28,8 +28,8 @@ def write_stats(
     ) -> None:
         for val, stats_summary in values.items():
             if val == "Environment/Cumulative Reward":
-                print(step, val, stats_summary.mean)
-                self._last_reward_summary[category] = stats_summary.mean
+                print(step, val, stats_summary.stats_value)
+                self._last_reward_summary[category] = stats_summary.stats_value
 
 
 # The reward processor is passed as an argument to _check_environment_trains.
diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
index efa2549f20..fad5d0d61c 100644
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
@@ -272,8 +272,8 @@ def test_agent_manager_stats():
         manager.record_environment_stats(env_stats, worker_id=0)
 
     expected_stats = {
-        "averaged": StatsSummary(mean=2.0, std=mock.ANY, num=2),
-        "most_recent": StatsSummary(mean=4.0, std=0.0, num=1),
+        "averaged": StatsSummary(stats_value=2.0, std=mock.ANY, num=2),
+        "most_recent": StatsSummary(stats_value=4.0, std=0.0, num=1),
     }
     stats_reporter.write_stats(123)
     writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)
diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
index 70179eb76f..be4bb2858e 100644
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -36,8 +36,8 @@ def test_stat_reporter_add_summary_write():
 
     assert statssummary1.num == 10
     assert statssummary2.num == 10
-    assert statssummary1.mean == 4.5
-    assert statssummary2.mean == 4.5
+    assert statssummary1.stats_value == 4.5
+    assert statssummary2.stats_value == 4.5
     assert statssummary1.std == pytest.approx(2.9, abs=0.1)
     assert statssummary2.std == pytest.approx(2.9, abs=0.1)
 
@@ -74,7 +74,7 @@ def test_tensorboard_writer(mock_summary):
     category = "category1"
     with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
         tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
-        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+        statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
         tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
 
         # Test that the filewriter has been created and the directory has been created.
@@ -97,7 +97,7 @@ def test_tensorboard_writer(mock_summary):
 
 def test_tensorboard_writer_clear(tmp_path):
     tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
-    statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+    statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
     tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
     # TB has some sort of timeout before making a new file
     time.sleep(1.0)
@@ -129,7 +129,7 @@ def test_console_writer(self):
         with self.assertLogs("mlagents.trainers", level="INFO") as cm:
             category = "category1"
             console_writer = ConsoleWriter()
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1)
+            statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
             console_writer.write_stats(
                 category,
                 {
@@ -138,7 +138,7 @@ def test_console_writer(self):
                 },
                 10,
             )
-            statssummary2 = StatsSummary(mean=0.0, std=0.0, num=1, sum=0.0)
+            statssummary2 = StatsSummary(stats_value=0.0, std=0.0, num=1)
             console_writer.write_stats(
                 category,
                 {
@@ -153,7 +153,7 @@ def test_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Sum of Reward: 1.000. Training.",
+            "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Training.",
             cm.output[0],
         )
         self.assertIn("Not Training.", cm.output[1])
@@ -166,7 +166,7 @@ def test_selfplay_console_writer(self):
             category = "category1"
             console_writer = ConsoleWriter()
             console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
-            statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1, sum=1)
+            statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
             console_writer.write_stats(
                 category,
                 {
@@ -178,6 +178,6 @@ def test_selfplay_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Sum of Reward: 1.000. Training.",
+            "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.",
             cm.output[0],
         )
diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
index 1514b6693a..15fd7d7467 100644
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
@@ -161,12 +161,12 @@ def compute_loss(
         expert_estimate, expert_mu = self.compute_estimate(
             expert_batch, use_vail_noise=True
         )
-        stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
-        stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
+        stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.stats_value().item()
+        stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.stats_value().item()
         discriminator_loss = -(
             torch.log(expert_estimate + self.EPSILON)
             + torch.log(1.0 - policy_estimate + self.EPSILON)
-        ).mean()
+        ).stats_value()
         stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
         total_loss += discriminator_loss
         if self._settings.use_vail:

From 9206b46f68dbe2b1ca0e4d75c0c85617090fd727 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 10:47:34 -0800
Subject: [PATCH 09/21] add default value for custom stats

---
 .../Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
index e453ad902e..ae02b47d1d 100644
--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
@@ -160,5 +160,7 @@ public override void OnEpisodeBegin()
             symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
             symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
         }
+        statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
+        statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
     }
 }

From 4a4451413df0fbd973d5517027e79f33be7c2243 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Sun, 3 Jan 2021 10:48:41 -0800
Subject: [PATCH 10/21] fixed tests

---
 ml-agents/mlagents/trainers/ppo/trainer.py       | 10 +++++++++-
 ml-agents/mlagents/trainers/stats.py             |  2 +-
 ml-agents/mlagents/trainers/tests/test_learn.py  | 16 ++++++++++++----
 .../mlagents/trainers/tests/test_rl_trainer.py   |  3 ++-
 ml-agents/mlagents/trainers/tests/test_stats.py  |  2 +-
 .../reward_providers/gail_reward_provider.py     |  7 ++++---
 .../mlagents/trainers/trainer/rl_trainer.py      |  7 +++++++
 7 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index 313364a6af..f875ee0fbd 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -35,6 +35,7 @@ def __init__(
     ):
         """
         Responsible for collecting experiences and training PPO model.
+
         :param behavior_name: The name of the behavior associated with trainer config
         :param reward_buff_cap: Max reward history to track in the reward buffer
         :param trainer_settings: The parameters for the trainer.
@@ -61,6 +62,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
         """
         Takes a trajectory and processes it, putting it into the update buffer.
         Processing involves calculating value and advantage targets for model updating step.
+
         :param trajectory: The Trajectory tuple containing the steps to be processed.
         """
         super()._process_trajectory(trajectory)
@@ -141,6 +143,7 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
     def _is_ready_update(self):
         """
         Returns whether or not the trainer has enough elements to run update model
+
         :return: A boolean corresponding to whether or not update_model() can be run
         """
         size_of_buffer = self.update_buffer.num_experiences
@@ -169,7 +172,7 @@ def _update_policy(self):
 
         advantages = self.update_buffer["advantages"].get_batch()
         self.update_buffer["advantages"].set(
-            (advantages - advantages.stats_value()) / (advantages.std() + 1e-10)
+            (advantages - advantages.mean()) / (advantages.std() + 1e-10)
         )
         num_epoch = self.hyperparameters.num_epoch
         batch_update_stats = defaultdict(list)
@@ -199,6 +202,7 @@ def create_torch_policy(
     ) -> TorchPolicy:
         """
         Creates a policy with a PyTorch backend and PPO hyperparameters
+
         :param parsed_behavior_id:
         :param behavior_spec: specifications for policy construction
         :return policy
@@ -222,6 +226,7 @@ def add_policy(
     ) -> None:
         """
         Adds policy to trainer.
+
         :param parsed_behavior_id: Behavior identifiers that the policy should belong to.
         :param policy: Policy to associate with name_behavior_id.
         """
@@ -249,6 +254,7 @@ def add_policy(
     def get_policy(self, name_behavior_id: str) -> Policy:
         """
         Gets policy from trainer associated with name_behavior_id
+
         :param name_behavior_id: full identifier of policy
         """
 
@@ -258,6 +264,7 @@ def get_policy(self, name_behavior_id: str) -> Policy:
 def discount_rewards(r, gamma=0.99, value_next=0.0):
     """
     Computes discounted sum of future rewards for use in updating value estimate.
+
     :param r: List of rewards.
     :param gamma: Discount factor.
     :param value_next: T+1 value estimate for returns calculation.
@@ -274,6 +281,7 @@ def discount_rewards(r, gamma=0.99, value_next=0.0):
 def get_gae(rewards, value_estimates, value_next=0.0, gamma=0.99, lambd=0.95):
     """
     Computes generalized advantage estimate for use in updating policy.
+
     :param rewards: list of rewards for time-steps t to T.
     :param value_next: Value estimate for time-step T+1.
     :param value_estimates: list of value estimates for time-steps t to T.
diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 3e1650e5f6..b3c240f61a 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -221,7 +221,7 @@ class StatsReporter:
     stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
     lock = RLock()
     stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
-        lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
+        lambda: defaultdict(lambda: {"", StatsAggregationMethod.AVERAGE})
     )
 
     def __init__(self, category: str):
diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py
index 81e1e5847d..54783b41ff 100644
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
@@ -8,6 +8,7 @@
 from mlagents_envs.exception import UnityEnvironmentException
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
+import os.path
 
 
 def basic_options(extra_args=None):
@@ -75,17 +76,24 @@ def test_run_training(
                 learn.run_training(0, options)
                 mock_init.assert_called_once_with(
                     trainer_factory_mock.return_value,
-                    "results/ppo",
+                    os.path.join("results", "ppo"),
                     "ppo",
                     "mock_param_manager",
                     True,
                     0,
                 )
                 handle_dir_mock.assert_called_once_with(
-                    "results/ppo", False, False, "results/notuselessrun"
+                    os.path.join("results", "ppo"),
+                    False,
+                    False,
+                    os.path.join("results", "notuselessrun"),
+                )
+                write_timing_tree_mock.assert_called_once_with(
+                    os.path.join("results", "ppo", "run_logs")
+                )
+                write_run_options_mock.assert_called_once_with(
+                    os.path.join("results", "ppo"), options
                 )
-                write_timing_tree_mock.assert_called_once_with("results/ppo/run_logs")
-                write_run_options_mock.assert_called_once_with("results/ppo", options)
     StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py
 
 
diff --git a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
index 9a1c583dd4..ae0b64b5be 100644
--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
@@ -9,6 +9,7 @@
 from mlagents.trainers.settings import TrainerSettings
 from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
 from mlagents_envs.base_env import ActionSpec
+import os.path
 
 
 # Add concrete implementations of abstract methods
@@ -171,7 +172,7 @@ def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
             trainer.brain_name,
             ModelCheckpoint(
                 step,
-                f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
+                f"{trainer.model_saver.model_path}{os.path.sep}{trainer.brain_name}-{step}.{export_ext}",
                 None,
                 mock.ANY,
             ),
diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
index be4bb2858e..b42949b759 100644
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -153,7 +153,7 @@ def test_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 0.500. Std of Reward: 0.707. Num of Reward: 2.000. Training.",
+            "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.",
             cm.output[0],
         )
         self.assertIn("Not Training.", cm.output[1])
diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
index 15fd7d7467..41af1711fb 100644
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
@@ -128,6 +128,7 @@ def compute_estimate(
         """
         Given a mini_batch, computes the estimate (How much the discriminator believes
         the data was sampled from the demonstration data).
+
         :param mini_batch: The AgentBuffer of data
         :param use_vail_noise: Only when using VAIL : If true, will sample the code, if
         false, will return the mean of the code.
@@ -161,12 +162,12 @@ def compute_loss(
         expert_estimate, expert_mu = self.compute_estimate(
             expert_batch, use_vail_noise=True
         )
-        stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.stats_value().item()
-        stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.stats_value().item()
+        stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
+        stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
         discriminator_loss = -(
             torch.log(expert_estimate + self.EPSILON)
             + torch.log(1.0 - policy_estimate + self.EPSILON)
-        ).stats_value()
+        ).mean()
         stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
         total_loss += discriminator_loss
         if self._settings.use_vail:
diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
index 93fa60d551..3e676bbede 100644
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
@@ -100,6 +100,7 @@ def _clear_update_buffer(self) -> None:
     def _is_ready_update(self):
         """
         Returns whether or not the trainer has enough elements to run update model
+
         :return: A boolean corresponding to wether or not update_model() can be run
         """
         return False
@@ -186,6 +187,7 @@ def save_model(self) -> None:
     def _update_policy(self) -> bool:
         """
         Uses demonstration_buffer to update model.
+
         :return: Whether or not the policy was updated.
         """
         pass
@@ -193,6 +195,7 @@ def _update_policy(self) -> bool:
     def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
         """
         Increment the step count of the trainer
+
         :param n_steps: number of steps to increment the step count by
         """
         self.step += n_steps
@@ -207,6 +210,7 @@ def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
     def _get_next_interval_step(self, interval: int) -> int:
         """
         Get the next step count that should result in an action.
+
         :param interval: The interval between actions.
         """
         return self.step + (interval - self.step % interval)
@@ -222,6 +226,7 @@ def _write_summary(self, step: int) -> None:
     def _process_trajectory(self, trajectory: Trajectory) -> None:
         """
         Takes a trajectory and processes it, putting it into the update buffer.
+
         :param trajectory: The Trajectory tuple containing the steps to be processed.
         """
         self._maybe_write_summary(self.get_step + len(trajectory.steps))
@@ -232,6 +237,7 @@ def _maybe_write_summary(self, step_after_process: int) -> None:
         """
         If processing the trajectory will make the step exceed the next summary write,
         write the summary. This logic ensures summaries are written on the update step and not in between.
+
         :param step_after_process: the step count after processing the next trajectory.
         """
         if self._next_summary_step == 0:  # Don't write out the first one
@@ -243,6 +249,7 @@ def _maybe_save_model(self, step_after_process: int) -> None:
         """
         If processing the trajectory will make the step exceed the next model write,
         save the model. This logic ensures models are written on the update step and not in between.
+
         :param step_after_process: the step count after processing the next trajectory.
         """
         if self._next_save_step == 0:  # Don't save the first one

From 129906246c5ff09b974d3bd7c9acec759eb660a4 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Tue, 5 Jan 2021 20:07:30 -0800
Subject: [PATCH 11/21] extended test test_agent_manager_stats in
 test_agent_processor.py to have sum

---
 ml-agents/mlagents/trainers/tests/test_agent_processor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
index fad5d0d61c..2cbaf13ecd 100644
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
@@ -262,10 +262,12 @@ def test_agent_manager_stats():
         {
             "averaged": [(1.0, StatsAggregationMethod.AVERAGE)],
             "most_recent": [(2.0, StatsAggregationMethod.MOST_RECENT)],
+            "summed": [(3.1, StatsAggregationMethod.SUM)],
         },
         {
             "averaged": [(3.0, StatsAggregationMethod.AVERAGE)],
             "most_recent": [(4.0, StatsAggregationMethod.MOST_RECENT)],
+            "summed": [(1.1, StatsAggregationMethod.SUM)],
         },
     ]
     for env_stats in all_env_stats:
@@ -274,6 +276,7 @@ def test_agent_manager_stats():
     expected_stats = {
         "averaged": StatsSummary(stats_value=2.0, std=mock.ANY, num=2),
         "most_recent": StatsSummary(stats_value=4.0, std=0.0, num=1),
+        "summed": StatsSummary(stats_value=4.2, std=mock.ANY, num=2),
     }
     stats_reporter.write_stats(123)
     writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)

From f667c0bc1cbd22ac69c088d40559042563cc829e Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Wed, 6 Jan 2021 22:51:28 -0800
Subject: [PATCH 12/21] refractor StatsSummary to add sum as property

---
 ml-agents/mlagents/trainers/stats.py          | 55 ++++++++++---------
 .../trainers/tests/check_env_trains.py        |  4 +-
 .../trainers/tests/test_agent_processor.py    | 24 +++++++-
 .../mlagents/trainers/tests/test_stats.py     | 45 ++++++++++++---
 4 files changed, 91 insertions(+), 37 deletions(-)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index b3c240f61a..472db4ddc1 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -41,13 +41,22 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
 
 
 class StatsSummary(NamedTuple):
-    stats_value: float
+    mean: float
     std: float
     num: int
+    sum: float
+    aggregation_method: StatsAggregationMethod
 
     @staticmethod
     def empty() -> "StatsSummary":
-        return StatsSummary(0.0, 0.0, 0)
+        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
+
+    @property
+    def aggregated_value(self):
+        if self.aggregation_method == StatsAggregationMethod.SUM:
+            return self.sum
+        else:
+            return self.mean
 
 
 class StatsPropertyType(Enum):
@@ -99,8 +108,8 @@ def write_stats(
     ) -> None:
         for val, stats_summary in values.items():
             set_gauge(
-                GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
-                float(stats_summary.stats_value),
+                GaugeWriter.sanitize_string(f"{category}.{val}.aggregated_value"),
+                float(stats_summary.aggregated_value),
             )
 
 
@@ -118,7 +127,7 @@ def write_stats(
         is_training = "Not Training"
         if "Is Training" in values:
             stats_summary = values["Is Training"]
-            if stats_summary.stats_value > 0.0:
+            if stats_summary.aggregated_value > 0.0:
                 is_training = "Training"
 
         elapsed_time = time.time() - self.training_start_time
@@ -130,14 +139,14 @@ def write_stats(
             if self.rank is not None:
                 log_info.append(f"Rank: {self.rank}")
 
-            log_info.append(f"Mean Reward: {stats_summary.stats_value:0.3f}")
+            log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
             log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
             log_info.append(f"Num of Reward: {stats_summary.num:0.3f}")
             log_info.append(is_training)
 
             if self.self_play and "Self-play/ELO" in values:
                 elo_stats = values["Self-play/ELO"]
-                log_info.append(f"ELO: {elo_stats.stats_value:0.3f}")
+                log_info.append(f"ELO: {elo_stats.aggregated_value:0.3f}")
         else:
             log_info.append("No episode was completed since last summary")
             log_info.append(is_training)
@@ -176,7 +185,9 @@ def write_stats(
     ) -> None:
         self._maybe_create_summary_writer(category)
         for key, value in values.items():
-            self.summary_writers[category].add_scalar(f"{key}", value.stats_value, step)
+            self.summary_writers[category].add_scalar(
+                f"{key}", value.aggregated_value, step
+            )
             self.summary_writers[category].flush()
 
     def _maybe_create_summary_writer(self, category: str) -> None:
@@ -304,20 +315,14 @@ def get_stats_summaries(self, key: str) -> StatsSummary:
         :param key: The type of statistic, e.g. Environment/Reward.
         :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation).
         """
-        if len(StatsReporter.stats_dict[self.category][key]) > 0:
-            if (
-                StatsReporter.stats_aggregation[self.category][key]
-                == StatsAggregationMethod.SUM
-            ):
-                return StatsSummary(
-                    stats_value=np.sum(StatsReporter.stats_dict[self.category][key]),
-                    std=np.std(StatsReporter.stats_dict[self.category][key]),
-                    num=len(StatsReporter.stats_dict[self.category][key]),
-                )
-            else:
-                return StatsSummary(
-                    stats_value=np.mean(StatsReporter.stats_dict[self.category][key]),
-                    std=np.std(StatsReporter.stats_dict[self.category][key]),
-                    num=len(StatsReporter.stats_dict[self.category][key]),
-                )
-        return StatsSummary.empty()
+        stat_values = StatsReporter.stats_dict[self.category][key]
+        if len(stat_values) == 0:
+            return StatsSummary.empty()
+
+        return StatsSummary(
+            mean=np.mean(stat_values),
+            std=np.std(stat_values),
+            num=len(stat_values),
+            sum=np.sum(stat_values),
+            aggregation_method=StatsReporter.stats_aggregation[self.category][key],
+        )
diff --git a/ml-agents/mlagents/trainers/tests/check_env_trains.py b/ml-agents/mlagents/trainers/tests/check_env_trains.py
index 5bcd59801f..0742b773f5 100644
--- a/ml-agents/mlagents/trainers/tests/check_env_trains.py
+++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py
@@ -28,8 +28,8 @@ def write_stats(
     ) -> None:
         for val, stats_summary in values.items():
             if val == "Environment/Cumulative Reward":
-                print(step, val, stats_summary.stats_value)
-                self._last_reward_summary[category] = stats_summary.stats_value
+                print(step, val, stats_summary.aggregated_value)
+                self._last_reward_summary[category] = stats_summary.aggregated_value
 
 
 # The reward processor is passed as an argument to _check_environment_trains.
diff --git a/ml-agents/mlagents/trainers/tests/test_agent_processor.py b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
index 2cbaf13ecd..5301b535d2 100644
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
@@ -274,9 +274,27 @@ def test_agent_manager_stats():
         manager.record_environment_stats(env_stats, worker_id=0)
 
     expected_stats = {
-        "averaged": StatsSummary(stats_value=2.0, std=mock.ANY, num=2),
-        "most_recent": StatsSummary(stats_value=4.0, std=0.0, num=1),
-        "summed": StatsSummary(stats_value=4.2, std=mock.ANY, num=2),
+        "averaged": StatsSummary(
+            mean=2.0,
+            std=mock.ANY,
+            num=2,
+            sum=4.0,
+            aggregation_method=StatsAggregationMethod.AVERAGE,
+        ),
+        "most_recent": StatsSummary(
+            mean=4.0,
+            std=0.0,
+            num=1,
+            sum=4.0,
+            aggregation_method=StatsAggregationMethod.MOST_RECENT,
+        ),
+        "summed": StatsSummary(
+            mean=2.1,
+            std=mock.ANY,
+            num=2,
+            sum=4.2,
+            aggregation_method=StatsAggregationMethod.SUM,
+        ),
     }
     stats_reporter.write_stats(123)
     writer.write_stats.assert_any_call("FakeCategory", expected_stats, 123)
diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
index b42949b759..010ed48efd 100644
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -12,6 +12,7 @@
     GaugeWriter,
     ConsoleWriter,
     StatsPropertyType,
+    StatsAggregationMethod,
 )
 
 
@@ -36,8 +37,8 @@ def test_stat_reporter_add_summary_write():
 
     assert statssummary1.num == 10
     assert statssummary2.num == 10
-    assert statssummary1.stats_value == 4.5
-    assert statssummary2.stats_value == 4.5
+    assert statssummary1.mean == 4.5
+    assert statssummary2.mean == 4.5
     assert statssummary1.std == pytest.approx(2.9, abs=0.1)
     assert statssummary2.std == pytest.approx(2.9, abs=0.1)
 
@@ -74,7 +75,13 @@ def test_tensorboard_writer(mock_summary):
     category = "category1"
     with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
         tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
-        statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
+        statssummary1 = StatsSummary(
+            mean=1.0,
+            std=1.0,
+            num=1,
+            sum=1.0,
+            aggregation_method=StatsAggregationMethod.AVERAGE,
+        )
         tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
 
         # Test that the filewriter has been created and the directory has been created.
@@ -97,7 +104,13 @@ def test_tensorboard_writer(mock_summary):
 
 def test_tensorboard_writer_clear(tmp_path):
     tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
-    statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
+    statssummary1 = StatsSummary(
+        mean=1.0,
+        std=1.0,
+        num=1,
+        sum=1.0,
+        aggregation_method=StatsAggregationMethod.AVERAGE,
+    )
     tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
     # TB has some sort of timeout before making a new file
     time.sleep(1.0)
@@ -129,7 +142,13 @@ def test_console_writer(self):
         with self.assertLogs("mlagents.trainers", level="INFO") as cm:
             category = "category1"
             console_writer = ConsoleWriter()
-            statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(
+                mean=1.0,
+                std=1.0,
+                num=1,
+                sum=1.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
             console_writer.write_stats(
                 category,
                 {
@@ -138,7 +157,13 @@ def test_console_writer(self):
                 },
                 10,
             )
-            statssummary2 = StatsSummary(stats_value=0.0, std=0.0, num=1)
+            statssummary2 = StatsSummary(
+                mean=0.0,
+                std=0.0,
+                num=1,
+                sum=0.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
             console_writer.write_stats(
                 category,
                 {
@@ -166,7 +191,13 @@ def test_selfplay_console_writer(self):
             category = "category1"
             console_writer = ConsoleWriter()
             console_writer.add_property(category, StatsPropertyType.SELF_PLAY, True)
-            statssummary1 = StatsSummary(stats_value=1.0, std=1.0, num=1)
+            statssummary1 = StatsSummary(
+                mean=1.0,
+                std=1.0,
+                num=1,
+                sum=1.0,
+                aggregation_method=StatsAggregationMethod.AVERAGE,
+            )
             console_writer.write_stats(
                 category,
                 {

From 1531ec32809a82ec62315ed623949d642cab8dbb Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Wed, 6 Jan 2021 23:17:32 -0800
Subject: [PATCH 13/21] fixed tests

---
 ml-agents/mlagents/trainers/agent_processor.py | 6 +++---
 ml-agents/mlagents/trainers/stats.py           | 7 +++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
index 3650d54552..3512b9df02 100644
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
@@ -332,11 +332,11 @@ def record_environment_stats(
         for stat_name, value_list in env_stats.items():
             for val, agg_type in value_list:
                 if agg_type == StatsAggregationMethod.AVERAGE:
-                    self.stats_reporter.add_stat(stat_name, val)
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
+                elif agg_type == StatsAggregationMethod.SUM:
+                    self.stats_reporter.add_stat(stat_name, val, agg_type)
                 elif agg_type == StatsAggregationMethod.MOST_RECENT:
                     # In order to prevent conflicts between multiple environments,
                     # only stats from the first environment are recorded.
                     if worker_id == 0:
                         self.stats_reporter.set_stat(stat_name, val)
-                elif agg_type == StatsAggregationMethod.SUM:
-                    self.stats_reporter.add_stat(stat_name, val, agg_type)
diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 472db4ddc1..9f6ad2eccc 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -232,7 +232,7 @@ class StatsReporter:
     stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
     lock = RLock()
     stats_aggregation: Dict[str, Dict[str, StatsAggregationMethod]] = defaultdict(
-        lambda: defaultdict(lambda: {"", StatsAggregationMethod.AVERAGE})
+        lambda: defaultdict(lambda: StatsAggregationMethod.AVERAGE)
     )
 
     def __init__(self, category: str):
@@ -289,6 +289,9 @@ def set_stat(self, key: str, value: float) -> None:
         """
         with StatsReporter.lock:
             StatsReporter.stats_dict[self.category][key] = [value]
+            StatsReporter.stats_aggregation[self.category][
+                key
+            ] = StatsAggregationMethod.MOST_RECENT
 
     def write_stats(self, step: int) -> None:
         """
@@ -310,7 +313,7 @@ def write_stats(self, step: int) -> None:
 
     def get_stats_summaries(self, key: str) -> StatsSummary:
         """
-        Get the mean, std, sum, count and aggregation method of a particular statistic, since last write.
+        Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
 
         :param key: The type of statistic, e.g. Environment/Reward.
         :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation).

From dadf67d7f778e6a01cd0d1f8122ca0a4fbd4be61 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Thu, 7 Jan 2021 18:45:19 -0800
Subject: [PATCH 14/21] Unity coding standard

---
 .../Examples/Hallway/Scripts/HallwayAgent.cs         | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
index ae02b47d1d..aa7daf1a57 100644
--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
@@ -18,7 +18,7 @@ public class HallwayAgent : Agent
     Renderer m_GroundRenderer;
     HallwaySettings m_HallwaySettings;
     int m_Selection;
-    StatsRecorder statsRecorder;
+    StatsRecorder m_statsRecorder;
 
     public override void Initialize()
     {
@@ -26,7 +26,7 @@ public override void Initialize()
         m_AgentRb = GetComponent<Rigidbody>();
         m_GroundRenderer = ground.GetComponent<Renderer>();
         m_GroundMaterial = m_GroundRenderer.material;
-        statsRecorder = Academy.Instance.StatsRecorder;
+        m_statsRecorder = Academy.Instance.StatsRecorder;
     }
 
     public override void CollectObservations(VectorSensor sensor)
@@ -85,13 +85,13 @@ void OnCollisionEnter(Collision col)
             {
                 SetReward(1f);
                 StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.goalScoredMaterial, 0.5f));
-                statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
+                m_statsRecorder.Add("Goal/Correct", 1, StatAggregationMethod.Sum);
             }
             else
             {
                 SetReward(-0.1f);
                 StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f));
-                statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
+                m_statsRecorder.Add("Goal/Wrong", 1, StatAggregationMethod.Sum);
             }
             EndEpisode();
         }
@@ -160,7 +160,7 @@ public override void OnEpisodeBegin()
             symbolXGoal.transform.position = new Vector3(7f, 0.5f, 22.29f) + area.transform.position;
             symbolOGoal.transform.position = new Vector3(-7f, 0.5f, 22.29f) + area.transform.position;
         }
-        statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
-        statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
+        m_statsRecorder.Add("Goal/Correct", 0, StatAggregationMethod.Sum);
+        m_statsRecorder.Add("Goal/Wrong", 0, StatAggregationMethod.Sum);
     }
 }

From b5e63974b76c13e2cff2448e4dc1956396c84821 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Thu, 7 Jan 2021 18:45:42 -0800
Subject: [PATCH 15/21] reverted docstring empty lines

---
 .../torch/components/reward_providers/gail_reward_provider.py    | 1 -
 ml-agents/mlagents/trainers/trainer/rl_trainer.py                | 1 -
 2 files changed, 2 deletions(-)

diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
index 41af1711fb..1514b6693a 100644
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
@@ -128,7 +128,6 @@ def compute_estimate(
         """
         Given a mini_batch, computes the estimate (How much the discriminator believes
         the data was sampled from the demonstration data).
-
         :param mini_batch: The AgentBuffer of data
         :param use_vail_noise: Only when using VAIL : If true, will sample the code, if
         false, will return the mean of the code.
diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
index 3e676bbede..0653cb2b8e 100644
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
@@ -100,7 +100,6 @@ def _clear_update_buffer(self) -> None:
     def _is_ready_update(self):
         """
         Returns whether or not the trainer has enough elements to run update model
-
         :return: A boolean corresponding to wether or not update_model() can be run
         """
         return False

From 913fddfd08957cf0034fe690223eb3db745f6473 Mon Sep 17 00:00:00 2001
From: brccabral <cabral.brc@gmail.com>
Date: Thu, 7 Jan 2021 18:46:02 -0800
Subject: [PATCH 16/21] GaugeWriter fix

---
 ml-agents/mlagents/trainers/stats.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 9f6ad2eccc..858a5767c6 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -108,8 +108,12 @@ def write_stats(
     ) -> None:
         for val, stats_summary in values.items():
             set_gauge(
-                GaugeWriter.sanitize_string(f"{category}.{val}.aggregated_value"),
-                float(stats_summary.aggregated_value),
+                GaugeWriter.sanitize_string(f"{category}.{val}.mean"),
+                float(stats_summary.mean),
+            )
+            set_gauge(
+                GaugeWriter.sanitize_string(f"{category}.{val}.sum"),
+                float(stats_summary.sum),
             )
 
 
@@ -146,7 +150,7 @@ def write_stats(
 
             if self.self_play and "Self-play/ELO" in values:
                 elo_stats = values["Self-play/ELO"]
-                log_info.append(f"ELO: {elo_stats.aggregated_value:0.3f}")
+                log_info.append(f"ELO: {elo_stats.mean:0.3f}")
         else:
             log_info.append("No episode was completed since last summary")
             log_info.append(is_training)
@@ -316,7 +320,7 @@ def get_stats_summaries(self, key: str) -> StatsSummary:
         Get the mean, std, count, sum and aggregation method of a particular statistic, since last write.
 
         :param key: The type of statistic, e.g. Environment/Reward.
-        :returns: A StatsSummary NamedTuple containing (mean, std, sum, count, aggregation).
+        :returns: A StatsSummary containing summary statistics.
         """
         stat_values = StatsReporter.stats_dict[self.category][key]
         if len(stat_values) == 0:

From 8bc9892df9a6010f0dac325bb14bf569fae72bda Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Fri, 8 Jan 2021 09:40:13 -0800
Subject: [PATCH 17/21] revert some whitespace

---
 ml-agents/mlagents/trainers/agent_processor.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/ml-agents/mlagents/trainers/agent_processor.py b/ml-agents/mlagents/trainers/agent_processor.py
index 3512b9df02..748e77df78 100644
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
@@ -71,7 +71,6 @@ def add_experiences(
     ) -> None:
         """
         Adds experiences to each agent's experience history.
-
         :param decision_steps: current DecisionSteps.
         :param terminal_steps: current TerminalSteps.
         :param previous_action: The outputs of the Policy's get_action method.
@@ -114,7 +113,12 @@ def add_experiences(
                     )
 
     def _process_step(
-        self, step: Union[TerminalStep, DecisionStep], global_id: str, index: int
+        self,
+        step: Union[
+            TerminalStep, DecisionStep
+        ],  # pylint: disable=unsubscriptable-object
+        global_id: str,
+        index: int,
     ) -> None:
         terminated = isinstance(step, TerminalStep)
         stored_decision_step, idx = self.last_step_result.get(global_id, (None, None))
@@ -211,7 +215,6 @@ def publish_trajectory_queue(
         """
         Adds a trajectory queue to the list of queues to publish to when this AgentProcessor
         assembles a Trajectory
-
         :param trajectory_queue: Trajectory queue to publish to.
         """
         self.trajectory_queues.append(trajectory_queue)
@@ -253,7 +256,6 @@ def __init__(self, behavior_id: str, maxlen: int = 0):
     def maxlen(self):
         """
         The maximum length of the queue.
-
         :return: Maximum length of the queue.
         """
         return self._maxlen
@@ -262,7 +264,6 @@ def maxlen(self):
     def behavior_id(self):
         """
         The Behavior ID of this queue.
-
         :return: Behavior ID associated with the queue.
         """
         return self._behavior_id

From cc76bea9ea84853e174b786a0f0dfad0fb4997dd Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Fri, 8 Jan 2021 09:40:52 -0800
Subject: [PATCH 18/21] undo whitespace

---
 ml-agents/mlagents/trainers/ppo/trainer.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/ml-agents/mlagents/trainers/ppo/trainer.py b/ml-agents/mlagents/trainers/ppo/trainer.py
index f875ee0fbd..62999878a5 100644
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
@@ -35,7 +35,6 @@ def __init__(
     ):
         """
         Responsible for collecting experiences and training PPO model.
-
         :param behavior_name: The name of the behavior associated with trainer config
         :param reward_buff_cap: Max reward history to track in the reward buffer
         :param trainer_settings: The parameters for the trainer.
@@ -62,7 +61,6 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
         """
         Takes a trajectory and processes it, putting it into the update buffer.
         Processing involves calculating value and advantage targets for model updating step.
-
         :param trajectory: The Trajectory tuple containing the steps to be processed.
         """
         super()._process_trajectory(trajectory)
@@ -143,7 +141,6 @@ def _process_trajectory(self, trajectory: Trajectory) -> None:
     def _is_ready_update(self):
         """
         Returns whether or not the trainer has enough elements to run update model
-
         :return: A boolean corresponding to whether or not update_model() can be run
         """
         size_of_buffer = self.update_buffer.num_experiences
@@ -202,7 +199,6 @@ def create_torch_policy(
     ) -> TorchPolicy:
         """
         Creates a policy with a PyTorch backend and PPO hyperparameters
-
         :param parsed_behavior_id:
         :param behavior_spec: specifications for policy construction
         :return policy
@@ -226,7 +222,6 @@ def add_policy(
     ) -> None:
         """
         Adds policy to trainer.
-
         :param parsed_behavior_id: Behavior identifiers that the policy should belong to.
         :param policy: Policy to associate with name_behavior_id.
         """
@@ -254,7 +249,6 @@ def add_policy(
     def get_policy(self, name_behavior_id: str) -> Policy:
         """
         Gets policy from trainer associated with name_behavior_id
-
         :param name_behavior_id: full identifier of policy
         """
 
@@ -264,7 +258,6 @@ def get_policy(self, name_behavior_id: str) -> Policy:
 def discount_rewards(r, gamma=0.99, value_next=0.0):
     """
     Computes discounted sum of future rewards for use in updating value estimate.
-
     :param r: List of rewards.
     :param gamma: Discount factor.
     :param value_next: T+1 value estimate for returns calculation.
@@ -281,7 +274,6 @@ def discount_rewards(r, gamma=0.99, value_next=0.0):
 def get_gae(rewards, value_estimates, value_next=0.0, gamma=0.99, lambd=0.95):
     """
     Computes generalized advantage estimate for use in updating policy.
-
     :param rewards: list of rewards for time-steps t to T.
     :param value_next: Value estimate for time-step T+1.
     :param value_estimates: list of value estimates for time-steps t to T.

From 31f400d4fd77ca55902d0adf532e90555f637076 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Fri, 8 Jan 2021 09:45:07 -0800
Subject: [PATCH 19/21] undo undesired change, undo whitespace

---
 ml-agents/mlagents/trainers/stats.py              | 3 +--
 ml-agents/mlagents/trainers/trainer/rl_trainer.py | 6 ------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py
index 858a5767c6..c55c4a471c 100644
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
@@ -40,7 +40,7 @@ def _dict_to_str(param_dict: Dict[str, Any], num_tabs: int) -> str:
         )
 
 
-class StatsSummary(NamedTuple):
+class StatsSummary(NamedTuple):  # pylint: disable=inherit-non-class
     mean: float
     std: float
     num: int
@@ -145,7 +145,6 @@ def write_stats(
 
             log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
             log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
-            log_info.append(f"Num of Reward: {stats_summary.num:0.3f}")
             log_info.append(is_training)
 
             if self.self_play and "Self-play/ELO" in values:
diff --git a/ml-agents/mlagents/trainers/trainer/rl_trainer.py b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
index 0653cb2b8e..93fa60d551 100644
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
@@ -186,7 +186,6 @@ def save_model(self) -> None:
     def _update_policy(self) -> bool:
         """
         Uses demonstration_buffer to update model.
-
         :return: Whether or not the policy was updated.
         """
         pass
@@ -194,7 +193,6 @@ def _update_policy(self) -> bool:
     def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
         """
         Increment the step count of the trainer
-
         :param n_steps: number of steps to increment the step count by
         """
         self.step += n_steps
@@ -209,7 +207,6 @@ def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
     def _get_next_interval_step(self, interval: int) -> int:
         """
         Get the next step count that should result in an action.
-
         :param interval: The interval between actions.
         """
         return self.step + (interval - self.step % interval)
@@ -225,7 +222,6 @@ def _write_summary(self, step: int) -> None:
     def _process_trajectory(self, trajectory: Trajectory) -> None:
         """
         Takes a trajectory and processes it, putting it into the update buffer.
-
         :param trajectory: The Trajectory tuple containing the steps to be processed.
         """
         self._maybe_write_summary(self.get_step + len(trajectory.steps))
@@ -236,7 +232,6 @@ def _maybe_write_summary(self, step_after_process: int) -> None:
         """
         If processing the trajectory will make the step exceed the next summary write,
         write the summary. This logic ensures summaries are written on the update step and not in between.
-
         :param step_after_process: the step count after processing the next trajectory.
         """
         if self._next_summary_step == 0:  # Don't write out the first one
@@ -248,7 +243,6 @@ def _maybe_save_model(self, step_after_process: int) -> None:
         """
         If processing the trajectory will make the step exceed the next model write,
         save the model. This logic ensures models are written on the update step and not in between.
-
         :param step_after_process: the step count after processing the next trajectory.
         """
         if self._next_save_step == 0:  # Don't save the first one

From eaf51a824c87ec6c08d4db1150b60d15ebffe0fb Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Fri, 8 Jan 2021 09:48:33 -0800
Subject: [PATCH 20/21] revert unit test logging strings

---
 ml-agents/mlagents/trainers/tests/test_stats.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py
index 010ed48efd..8a8ff19a49 100644
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
@@ -178,8 +178,7 @@ def test_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.",
-            cm.output[0],
+            "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
         )
         self.assertIn("Not Training.", cm.output[1])
 
@@ -209,6 +208,5 @@ def test_selfplay_console_writer(self):
             )
 
         self.assertIn(
-            "Mean Reward: 1.000. Std of Reward: 1.000. Num of Reward: 1.000. Training.",
-            cm.output[0],
+            "Mean Reward: 1.000. Std of Reward: 1.000. Training.", cm.output[0]
         )

From 201f0996525a7a07c8d7405ac1e54e3272439500 Mon Sep 17 00:00:00 2001
From: Chris Elion <chris.elion@unity3d.com>
Date: Fri, 8 Jan 2021 10:04:15 -0800
Subject: [PATCH 21/21] changelog

---
 com.unity.ml-agents/CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index eccb84ac1f..4a6e59f099 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -15,6 +15,9 @@ and this project adheres to
 
 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
+- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
+will result in the values being summed (instead of averaged) when written to
+TensorBoard. Thanks to @brccabral for the contribution! (#4816)
 
 #### ml-agents / ml-agents-envs / gym-unity (Python)