Unity-Technologies · ervteng · Jul 25, 2019 · Jun 21, 2019 · Jun 28, 2019 · Jun 28, 2019
diff --git a/config/generalize_test.yaml b/config/generalize_test.yaml
@@ -0,0 +1,19 @@
+episode-length: 1000
+
+mass:
+    sampler-type: "uniform"
+    min_value: 0.5
+    max_value: 10
+    seed: 5
+
+gravity:
+    sampler-type: "uniform"
+    min_value: 7
+    max_value: 12
+    seed: 5
+
+scale:
+    sampler-type: "uniform"
+    min_value: 0.75
+    max_value: 3
+    seed: 5
diff --git a/ml-agents-envs/mlagents/envs/exception.py b/ml-agents-envs/mlagents/envs/exception.py
@@ -26,6 +26,12 @@ class UnityActionException(UnityException):
 
     pass
 
+class SamplerException(UnityException):
+    """
+    Related to errors with the sampler actions.
+    """
+
+    pass
 
 class UnityTimeOutException(UnityException):
     """

diff --git a/ml-agents-envs/mlagents/envs/sampler_class.py b/ml-agents-envs/mlagents/envs/sampler_class.py
@@ -0,0 +1,99 @@
+import numpy as np
+from functools import *
+from abc import ABC, abstractmethod
+
+from .exception import SamplerException
+
+class SamplerException(Exception):
+    pass
+
+class Sampler(ABC): 
+
+    @abstractmethod
+    def sample_parameter(self, *args, **kwargs):
+        pass
+
+
+class UniformSampler(Sampler):
+    # kwargs acts as a sink for extra unneeded args
+    def __init__(self, min_value, max_value, **kwargs):
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def sample_parameter(self):
+        return np.random.uniform(self.min_value, self.max_value)
+
+
+class MultiRangeUniformSampler(Sampler):
+    def __init__(self, intervals, **kwargs):
+        self.intervals = intervals
+        # Measure the length of the intervals
+        self.interval_lengths = list(map(lambda x: abs(x[1] - x[0]), self.intervals))
+        # Cumulative size of the intervals
+        self.cum_interval_length = reduce(lambda x,y: x + y, self.interval_lengths, 0)
+        # Assign weights to an interval proportionate to the interval size
+        self.interval_weights = list(map(lambda x: x/self.cum_interval_length, self.interval_lengths))
+
+
+    def sample_parameter(self):
+        cur_min, cur_max = self.intervals[np.random.choice(len(self.intervals), p=self.interval_weights)]
+        return np.random.uniform(cur_min, cur_max)
+
+
+class GaussianSampler(Sampler):
+    def __init__(self, mean, var, **kwargs):
+        self.mean = mean
+        self.var = var
+
+    def sample_parameter(self):
+        return np.random.normal(self.mean, self.var)
+
+
+# To introduce new sampling methods, just need to 'register' them to this sampler factory
+class SamplerFactory:
+    NAME_TO_CLASS = {
+    "uniform": UniformSampler,
+    "gaussian": GaussianSampler,
+    "multirange_uniform": MultiRangeUniformSampler,
+    }
+
+    @staticmethod
+    def register_sampler(name, sampler_cls):
+        SamplerFactory.NAME_TO_CLASS[name] = sampler_cls
+
+    @staticmethod
+    def init_sampler_class(name, param_dict):
+        if name not in SamplerFactory.NAME_TO_CLASS:
+            raise SamplerException(
+                name + " sampler is not registered in the SamplerFactory."
+                " Use the register_sample method to register the string"
+                " associated to your sampler in the SamplerFactory."
+            )
+        sampler_cls = SamplerFactory.NAME_TO_CLASS[name]
+        return sampler_cls(**param_dict)
+
+
+class SamplerManager:
+    def __init__(self, reset_param_dict):
+        self.reset_param_dict = reset_param_dict
+        self.samplers = {}
+        if reset_param_dict == None:
+            return
+        for param_name, cur_param_dict in self.reset_param_dict.items():
+            if "sampler-type" not in cur_param_dict:
+                raise SamplerException(
+                    "'sampler_type' argument hasn't been supplied for the {0} parameter".format(param_name)
+                )
+            sampler_name = cur_param_dict.pop("sampler-type")
+            param_sampler = SamplerFactory.init_sampler_class(sampler_name, cur_param_dict)
+
+            self.samplers[param_name] = param_sampler
+
+    def sample_all(self):
-    def sample_all(self):
+    def sample_all(self) -> Dict[str, float]:
-    def sample_all(self):
+    def sample_all(self) -> Dict[str, float]:
+        res = {}
+        if self.samplers == {}:
+            pass
+        else:
+            for param_name, param_sampler in list(self.samplers.items()):
+                res[param_name] = param_sampler.sample_parameter()
+        return res
diff --git a/ml-agents/mlagents/trainers/exception.py b/ml-agents/mlagents/trainers/exception.py
@@ -23,3 +23,6 @@ class MetaCurriculumError(TrainerError):
     """
     Any error related to the configuration of a metacurriculum.
     """
+
+    pass
+
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -16,7 +16,8 @@
 from mlagents.trainers.exception import TrainerError
 from mlagents.trainers import MetaCurriculumError, MetaCurriculum
 from mlagents.envs import UnityEnvironment
-from mlagents.envs.exception import UnityEnvironmentException
+from mlagents.envs.sampler_class import SamplerManager
+from mlagents.envs.exception import UnityEnvironmentException, SamplerException
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.subprocess_environment import SubprocessUnityEnvironment
 
@@ -52,6 +53,8 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
     fast_simulation = not bool(run_options["--slow"])
     no_graphics = run_options["--no-graphics"]
     trainer_config_path = run_options["<trainer-config-path>"]
+    sampler_file_path = run_options["--sampler"] if run_options ["--sampler"] != "None" else None
+
     # Recognize and use docker volume if one is passed as an argument
     if not docker_target_name:
         model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id)
@@ -73,6 +76,21 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
             docker_target_name=docker_target_name
         )
 
+    sampler = None
+    lesson_length = None
+    if sampler_file_path is not None:
+        sampler = load_config(sampler_file_path)
+        if ("episode-length") in sampler:
+            lesson_length = sampler["episode-length"]
+            del sampler["episode-length"]
+        else:
+            raise SamplerException(
+                "Episode Length was not specified in the sampler file."
+                " Please specify it with the 'episode-length' key in the sampler config file."
+            )
+    sampler_manager = SamplerManager(sampler)
+
+
     trainer_config = load_config(trainer_config_path)
     env_factory = create_environment_factory(
         env_path,
@@ -84,6 +102,7 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
     env = SubprocessUnityEnvironment(env_factory, num_envs)
     maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
 
+
     # Create controller and begin training.
     tc = TrainerController(
         model_path,
@@ -98,6 +117,8 @@ def run_training(sub_id: int, run_seed: int, run_options, process_queue):
         env.external_brains,
         run_seed,
         fast_simulation,
+        sampler_manager,
+        lesson_length,
     )
 
     # Signal that environment has been launched.
@@ -242,22 +263,23 @@ def main():
       mlagents-learn --help
 
     Options:
-      --env=<file>               Name of the Unity executable [default: None].
-      --curriculum=<directory>   Curriculum json directory for environment [default: None].
-      --keep-checkpoints=<n>     How many model checkpoints to keep [default: 5].
-      --lesson=<n>               Start learning from this lesson [default: 0].
-      --load                     Whether to load the model or randomly initialize [default: False].
-      --run-id=<path>            The directory name for model and summary statistics [default: ppo].
-      --num-runs=<n>             Number of concurrent training sessions [default: 1].
-      --save-freq=<n>            Frequency at which to save model [default: 50000].
-      --seed=<n>                 Random seed used for training [default: -1].
-      --slow                     Whether to run the game at training speed [default: False].
-      --train                    Whether to train model, or only run inference [default: False].
-      --base-port=<n>            Base port for environment communication [default: 5005].
-      --num-envs=<n>             Number of parallel environments to use for training [default: 1]
-      --docker-target-name=<dt>  Docker volume to store training-specific files [default: None].
-      --no-graphics              Whether to run the environment in no-graphics mode [default: False].
-      --debug                    Whether to run ML-Agents in debug mode with detailed logging [default: False].
+      --env=<file>                Name of the Unity executable [default: None].
+      --curriculum=<directory>    Curriculum json directory for environment [default: None].
+      --keep-checkpoints=<n>      How many model checkpoints to keep [default: 5].
+      --lesson=<n>                Start learning from this lesson [default: 0].
+      --load                      Whether to load the model or randomly initialize [default: False].
+      --run-id=<path>             The directory name for model and summary statistics [default: ppo].
+      --num-runs=<n>              Number of concurrent training sessions [default: 1].
+      --save-freq=<n>             Frequency at which to save model [default: 50000].
+      --seed=<n>                  Random seed used for training [default: -1].
+      --slow                      Whether to run the game at training speed [default: False].
+      --train                     Whether to train model, or only run inference [default: False].
+      --base-port=<n>             Base port for environment communication [default: 5005].
+      --num-envs=<n>              Number of parallel environments to use for training [default: 1]
+      --docker-target-name=<dt>   Docker volume to store training-specific files [default: None].
+      --no-graphics               Whether to run the environment in no-graphics mode [default: False].
+      --sampler=<directory>       Reset parameter yaml directory for sampling of environment reset parameters [default: None].
+      --debug                     Whether to run ML-Agents in debug mode with detailed logging [default: False].       
     """
 
     options = docopt(_USAGE)

diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -22,7 +22,9 @@
 from mlagents.trainers.meta_curriculum import MetaCurriculum
 
 
+
 class TrainerController(object):
+    # Type of reset_param_dict unspecified as typing library does not support heterogeneous dictionary types
 map<string, float> float_parameters = 1;  
 map<string, float> float_parameters = 1;  
     def __init__(
         self,
         model_path: str,
@@ -37,6 +39,8 @@ def __init__(
         external_brains: Dict[str, BrainParameters],
         training_seed: int,
         fast_simulation: bool,
+        sampler_manager,
+        lesson_length: Optional[int],
     ):
         """
         :param model_path: Path to save the model.
@@ -50,6 +54,8 @@ def __init__(
         :param lesson: Start learning from this lesson.
         :param external_brains: dictionary of external brain names to BrainInfo objects.
         :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
+        :param sampler_manager: SamplerManager object which stores information about samplers to use for the reset parameters.
+        :param lesson_length: Specifies number of steps after which reset parameters are resampled.
         """
 
         self.model_path = model_path
@@ -72,6 +78,8 @@ def __init__(
         self.fast_simulation = fast_simulation
         np.random.seed(self.seed)
         tf.set_random_seed(self.seed)
+        self.sampler_manager = sampler_manager
+        self.lesson_length = lesson_length
 
     def _get_measure_vals(self):
         if self.meta_curriculum:
@@ -90,6 +98,7 @@ def _get_measure_vals(self):
                     measure_val = np.mean(self.trainers[brain_name].reward_buffer)
                     brain_names_to_measure_vals[brain_name] = measure_val
             return brain_names_to_measure_vals
+
         else:
             return None
 
@@ -167,13 +176,16 @@ def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]):
                     self.run_id,
                 )
             elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
+                # Find lesson length based on the form of learning
+                if self.meta_curriculum:
+                    lesson_length = self.meta_curriculum.brains_to_curriculums[
+                        brain_name].min_lesson_length
+                else:
+                    lesson_length = 0
+
                 self.trainers[brain_name] = PPOTrainer(
                     self.external_brains[brain_name],
-                    self.meta_curriculum.brains_to_curriculums[
-                        brain_name
-                    ].min_lesson_length
-                    if self.meta_curriculum
-                    else 0,
+                    lesson_length,
                     trainer_parameters_dict[brain_name],
                     self.train_model,
                     self.load_model,
@@ -203,20 +215,30 @@ def _create_model_path(model_path):
                 "permissions are set correctly.".format(model_path)
             )
 
+    @staticmethod
+    def _check_reset_params(reset_params, new_config):
+        for k in new_config:
+            if (k in reset_params) and (isinstance(config[k], (int, float))):
+                continue
+            elif not isinstance(new_config[k], (int, float)):
+                raise UnityEnvironmentException(
+                    "The parameter '{0}'' generated by the sampler doesn't exist in this environment.".format(
+                        k
+                    )
+                )
+
     def _reset_env(self, env: BaseUnityEnvironment):
         """Resets the environment.
 
         Returns:
             A Data structure corresponding to the initial reset state of the
             environment.
         """
-        if self.meta_curriculum is not None:
-            return env.reset(
-                train_mode=self.fast_simulation,
-                config=self.meta_curriculum.get_config(),
-            )
-        else:
-            return env.reset(train_mode=self.fast_simulation)
+        sampled_reset_param = self.sampler_manager.sample_all()
+        new_meta_curriculum_config = (self.meta_curriculum.get_config() 
+                                    if self.meta_curriculum else {})
+        sampled_reset_param.update(new_meta_curriculum_config)
+        return env.reset(train_mode = self.fast_simulation, config = sampled_reset_param)
 
     def start_learning(self, env: BaseUnityEnvironment, trainer_config):
         # TODO: Should be able to start learning at different lesson numbers
@@ -263,6 +285,22 @@ def start_learning(self, env: BaseUnityEnvironment, trainer_config):
             self._write_training_metrics()
             self._export_graph()
 
+    def end_trainer_episodes(self, env, lessons_incremented):
-    def end_trainer_episodes(self, env, lessons_incremented):
+    def end_trainer_episodes(self, env: BaseUnityEnvironment, lessons_incremented: Dict[str, bool]) -> (whatever _reset_env returns):
-    def end_trainer_episodes(self, env, lessons_incremented):
+    def end_trainer_episodes(self, env: BaseUnityEnvironment, lessons_incremented: Dict[str, bool]) -> (whatever _reset_env returns):
+        curr_info = self._reset_env(env)
+        for brain_name, trainer in self.trainers.items():
+            trainer.end_episode()
+        for brain_name, changed in lessons_incremented.items():
+            if changed:
+                self.trainers[brain_name].reward_buffer.clear()
+        return curr_info
+
+    def check_empty_sampler_manager(self):
+        """
+        If self.samplers is empty, then bool of it returns false, indicating
+        there is no sampler manager.
+        """
+        return not bool(self.sampler_manager.samplers)
+
     def take_step(self, env: BaseUnityEnvironment, curr_info: AllBrainInfo):
         if self.meta_curriculum:
             # Get the sizes of the reward buffers.
@@ -279,14 +317,12 @@ def take_step(self, env: BaseUnityEnvironment, curr_info: AllBrainInfo):
 
         # If any lessons were incremented or the environment is
         # ready to be reset
-        if self.meta_curriculum and any(lessons_incremented.values()):
-            curr_info = self._reset_env(env)
-            for brain_name, trainer in self.trainers.items():
-                trainer.end_episode()
-            for brain_name, changed in lessons_incremented.items():
-                if changed:
-                    self.trainers[brain_name].reward_buffer.clear()
-
+        if ( ((self.meta_curriculum)  and any(lessons_incremented.values()))
+            or ( (not self.check_empty_sampler_manager()) and (self.global_step % self.lesson_length == 0)
+                and (self.global_step != 0)) ):
+            curr_info = self.end_trainer_episodes(env, lessons_incremented)
+
+
         # Decide and take an action
         take_action_vector = {}
         take_action_memories = {}
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,6 +26,12 @@ class UnityActionException(UnityException): @@
         pass
+    class SamplerException(UnityException):
+        """
+        Related to errors with the sampler actions.
+        """
+        pass
     class UnityTimeOutException(UnityException):
         """
@@ Expand Down @@