From cff37edd5ae58396b4d937fc07f7e1dc9430e282 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Wed, 30 May 2018 14:11:33 -0700 Subject: [PATCH 01/30] First draft to specify variations in dynamics randomization A simple data structure consisting of a list of variation objects was implemented. Each variation object is an instance of the Variation class that works as a container for each of the fields used to randomized a dynamic parameter within the simulation environment. This list of variations is further tested in script test_dynamics_rand.py to verify that fields within each variation can be set and get. --- rllab/dynamics_randomization/__init__.py | 1 + .../test_dynamics_rand.py | 69 +++++++++++++++ rllab/dynamics_randomization/tosser.xml | 86 +++++++++++++++++++ rllab/dynamics_randomization/variation.py | 72 ++++++++++++++++ 4 files changed, 228 insertions(+) create mode 100644 rllab/dynamics_randomization/__init__.py create mode 100644 rllab/dynamics_randomization/test_dynamics_rand.py create mode 100644 rllab/dynamics_randomization/tosser.xml create mode 100644 rllab/dynamics_randomization/variation.py diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py new file mode 100644 index 000000000..91331400b --- /dev/null +++ b/rllab/dynamics_randomization/__init__.py @@ -0,0 +1 @@ +from rllab.dynamics_randomization.variation import VariationsList diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py new file mode 100644 index 000000000..cc422e7f3 --- /dev/null +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +""" +Benchmark model mutation for dynamics randomization +""" +import os +import os.path as osp +import xml.etree.ElementTree as ET + +import numpy as np +from mujoco_py import load_model_from_xml +from mujoco_py import MjSim +from mujoco_py import MjViewer + +from rllab.dynamics_randomization import VariationsList + +#Execute at the root of rllab +MUJOCO_PY_PATH = os.getcwd() +TOSSER_XML = osp.join(MUJOCO_PY_PATH, "rllab/dynamics_randomization/tosser.xml") + +# Load original model text into memory +tosser = ET.parse(TOSSER_XML) + +var_list = VariationsList().\ + add_variation(".//motor[@name='a1']", "gear", "coefficient", "uniform", (0.5, 1.5)).\ + add_variation(".//motor[@name='a2']", "gear", "coefficient", "uniform", (0.5, 1.5)).\ + add_variation(".//joint[@name='wr_js']", "damping", "absolute", "uniform", (5, 15)) + +# Retrieve defaults and cache etree elems +for v in var_list.get_list(): + e = tosser.find(v.xpath) + v.elem = e + v.default = float(e.attrib[v.attrib]) + print(e) + print(v.default) + +for _ in range(1000): + # Mutate model randomly + for v in var_list.get_list(): + e = v.elem + if v.method == "coefficient": + c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + e.attrib[v.attrib] = str(c * v.default) + elif v.method == "absolute": + c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + e.attrib[v.attrib] = str(c) + else: + raise NotImplementedError("Unknown method") + + # Reify model + model_xml = ET.tostring(tosser.getroot()).decode("ascii") + + # Run model loop + model = load_model_from_xml(model_xml) + print(model_xml) + sim = MjSim(model) + #viewer = MjViewer(sim) + + #sim_state = sim.get_state() + + + #sim.set_state(sim_state) + + for i in range(1000): + if i < 150: + sim.data.ctrl[:] = 0.0 + else: + sim.data.ctrl[:] = -1.0 + sim.step() + #viewer.render() diff --git a/rllab/dynamics_randomization/tosser.xml b/rllab/dynamics_randomization/tosser.xml new file mode 100644 index 000000000..39cba60a4 --- /dev/null +++ b/rllab/dynamics_randomization/tosser.xml @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py new file mode 100644 index 000000000..5653662b4 --- /dev/null +++ b/rllab/dynamics_randomization/variation.py @@ -0,0 +1,72 @@ +class Variation: + def __init__(self, xpath, attrib, method, distribution, var_range): + """ + Parameters + - xpath: path expression to identify a node within the XML file + of the MuJoCo environment. + - attrib: name of the dynamic parameter to randomize within the + node defined in xpath. + - method: if equal to "absolute", it sets the dyanmic parameter + equal to the random coefficient obtained from the distribution, or + if equal to "coefficient", it multiplies the default value provieded + in the XML file by the random coefficient. + - distribution: it specifies the probability distribution used to + obtain the random coefficient. + - var_range: it defines the range of values the random coefficient + could take. + """ + self._xpath = xpath + self._attrib = attrib + self._method = method + self._distribution = distribution + self._var_range = var_range + self._elem = None + self._default = None + + @property + def xpath(self): + return self._xpath + + @property + def elem(self): + return self._elem + + @elem.setter + def elem(self, elem): + self._elem = elem + + @property + def attrib(self): + return self._attrib + + @property + def default(self): + return self._default + + @default.setter + def default(self, default): + self._default = default + + @property + def method(self): + return self._method + + @property + def distribution(self): + return self._distribution + + @property + def var_range(self): + return self._var_range + +class VariationsList: + def __init__(self): + self._list = [] + + def add_variation(self, xpath, attrib, method, distribution, var_range): + variation = Variation(xpath, attrib, method, distribution, var_range) + self._list.append(variation) + return self + + def get_list(self): + return self._list From 0986defe7efcce9b478eafa0cf879b713d4ba16b Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Wed, 30 May 2018 14:19:27 -0700 Subject: [PATCH 02/30] Apply PEP8 format with YEPF --- rllab/dynamics_randomization/test_dynamics_rand.py | 4 ++-- rllab/dynamics_randomization/variation.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py index cc422e7f3..b0430f41a 100644 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -15,7 +15,8 @@ #Execute at the root of rllab MUJOCO_PY_PATH = os.getcwd() -TOSSER_XML = osp.join(MUJOCO_PY_PATH, "rllab/dynamics_randomization/tosser.xml") +TOSSER_XML = osp.join(MUJOCO_PY_PATH, + "rllab/dynamics_randomization/tosser.xml") # Load original model text into memory tosser = ET.parse(TOSSER_XML) @@ -57,7 +58,6 @@ #sim_state = sim.get_state() - #sim.set_state(sim_state) for i in range(1000): diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index 5653662b4..6a7118ca8 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -59,6 +59,7 @@ def distribution(self): def var_range(self): return self._var_range + class VariationsList: def __init__(self): self._list = [] From 84ca64dbb7ee53f7fe00a5daacf3349c9b3572d8 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Wed, 30 May 2018 17:39:18 -0700 Subject: [PATCH 03/30] Reimplement the fluent interface to create individual setters A setter for each field in the Variation class is used now instead of a constructor containing all fields as parameters. This allows a modular setting of fields for different configuration scenarios for dynamics randomization. To define the methods and distributions, two enumeration classes were created: VariationMethods and VariationDistributions. --- rllab/dynamics_randomization/__init__.py | 4 +- .../test_dynamics_rand.py | 38 ++++-- rllab/dynamics_randomization/variation.py | 115 ++++++++++++++---- 3 files changed, 120 insertions(+), 37 deletions(-) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index 91331400b..cac52ea44 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -1 +1,3 @@ -from rllab.dynamics_randomization.variation import VariationsList +from rllab.dynamics_randomization.variation import Variations +from rllab.dynamics_randomization.variation import VariationMethods +from rllab.dynamics_randomization.variation import VariationDistributions diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py index b0430f41a..20d7978d3 100644 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -11,7 +11,8 @@ from mujoco_py import MjSim from mujoco_py import MjViewer -from rllab.dynamics_randomization import VariationsList +from rllab.dynamics_randomization import Variations, VariationMethods +from rllab.dynamics_randomization import VariationDistributions #Execute at the root of rllab MUJOCO_PY_PATH = os.getcwd() @@ -21,27 +22,41 @@ # Load original model text into memory tosser = ET.parse(TOSSER_XML) -var_list = VariationsList().\ - add_variation(".//motor[@name='a1']", "gear", "coefficient", "uniform", (0.5, 1.5)).\ - add_variation(".//motor[@name='a2']", "gear", "coefficient", "uniform", (0.5, 1.5)).\ - add_variation(".//joint[@name='wr_js']", "damping", "absolute", "uniform", (5, 15)) +variations = Variations() +variations.randomize().\ + attribute("gear").\ + at_xpath(".//motor[@name='a1']").\ + with_method(VariationMethods.COEFFICIENT).\ + sampled_from(VariationDistributions.UNIFORM).\ + with_range(0.5, 1.5).\ + randomize().\ + attribute("gear").\ + at_xpath(".//motor[@name='a2']").\ + sampled_from(VariationDistributions.UNIFORM).\ + with_method(VariationMethods.COEFFICIENT).\ + with_range(0.5, 1.5) + +variations.randomize().\ + attribute("damping").\ + at_xpath(".//joint[@name='wr_js']").\ + with_method(VariationMethods.ABSOLUTE).\ + sampled_from(VariationDistributions.UNIFORM).\ + with_range(5, 15) # Retrieve defaults and cache etree elems -for v in var_list.get_list(): +for v in variations.get_list(): e = tosser.find(v.xpath) v.elem = e v.default = float(e.attrib[v.attrib]) - print(e) - print(v.default) for _ in range(1000): # Mutate model randomly - for v in var_list.get_list(): + for v in variations.get_list(): e = v.elem - if v.method == "coefficient": + if v.method == VariationMethods.COEFFICIENT: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c * v.default) - elif v.method == "absolute": + elif v.method == VariationMethods.ABSOLUTE: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c) else: @@ -52,7 +67,6 @@ # Run model loop model = load_model_from_xml(model_xml) - print(model_xml) sim = MjSim(model) #viewer = MjViewer(sim) diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index 6a7118ca8..c9a009c0b 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -1,25 +1,20 @@ +from enum import Enum + +class VariationMethods(Enum): + COEFFICIENT = 1 + ABSOLUTE = 2 + +class VariationDistributions(Enum): + GAUSSIAN = 1 + UNIFORM = 2 + class Variation: - def __init__(self, xpath, attrib, method, distribution, var_range): - """ - Parameters - - xpath: path expression to identify a node within the XML file - of the MuJoCo environment. - - attrib: name of the dynamic parameter to randomize within the - node defined in xpath. - - method: if equal to "absolute", it sets the dyanmic parameter - equal to the random coefficient obtained from the distribution, or - if equal to "coefficient", it multiplies the default value provieded - in the XML file by the random coefficient. - - distribution: it specifies the probability distribution used to - obtain the random coefficient. - - var_range: it defines the range of values the random coefficient - could take. - """ - self._xpath = xpath - self._attrib = attrib - self._method = method - self._distribution = distribution - self._var_range = var_range + def __init__(self): + self._xpath = None + self._attrib = None + self._method = None + self._distribution = None + self._var_range = None self._elem = None self._default = None @@ -27,6 +22,10 @@ def __init__(self, xpath, attrib, method, distribution, var_range): def xpath(self): return self._xpath + @xpath.setter + def xpath(self, xpath): + self._xpath = xpath + @property def elem(self): return self._elem @@ -39,6 +38,10 @@ def elem(self, elem): def attrib(self): return self._attrib + @attrib.setter + def attrib(self, attrib): + self._attrib = attrib + @property def default(self): return self._default @@ -51,23 +54,87 @@ def default(self, default): def method(self): return self._method + @method.setter + def method(self, method): + self._method = method + @property def distribution(self): return self._distribution + @distribution.setter + def distribution(self, distribution): + self._distribution = distribution + @property def var_range(self): return self._var_range + @var_range.setter + def var_range(self, var_range): + self._var_range = var_range + -class VariationsList: +class Variations: def __init__(self): self._list = [] - def add_variation(self, xpath, attrib, method, distribution, var_range): - variation = Variation(xpath, attrib, method, distribution, var_range) + def randomize(self): + variation = Variation() self._list.append(variation) return self + def at_xpath(self, xpath): + """ + Parameters + - xpath: path expression to identify a node within the XML file + of the MuJoCo environment. + """ + if self._list: + self._list[-1].xpath = xpath + return self + + def attribute(self, attrib): + """ + Parameters + - attrib: name of the dynamic parameter to randomize within the + node defined in xpath. + """ + if self._list: + self._list[-1].attrib = attrib + return self + + def with_method(self, method): + """ + Parameters + - method: if equal to "absolute", it sets the dyanmic parameter + equal to the random coefficient obtained from the distribution, or + if equal to "coefficient", it multiplies the default value provieded + in the XML file by the random coefficient. + """ + if self._list: + self._list[-1].method = method + return self + + def sampled_from(self, distribution): + """ + Parameters + - distribution: it specifies the probability distribution used to + obtain the random coefficient. + """ + if self._list: + self._list[-1].distribution = distribution + return self + + def with_range(self, low, high): + """ + Parameters + - low: inclusive low value of the range + - high: exclusive high value of the range + """ + if self._list: + self._list[-1].var_range = (low, high) + return self + def get_list(self): return self._list From 12c7f3604eb2f6dcee7ec382a73a19edfe6fd247 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Wed, 30 May 2018 17:42:11 -0700 Subject: [PATCH 04/30] Apply PEP8 format with YEPF --- rllab/dynamics_randomization/variation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index c9a009c0b..e607b015f 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -1,13 +1,16 @@ from enum import Enum + class VariationMethods(Enum): COEFFICIENT = 1 ABSOLUTE = 2 + class VariationDistributions(Enum): GAUSSIAN = 1 UNIFORM = 2 + class Variation: def __init__(self): self._xpath = None From 9104c487f10cc4756683b4f7592564b3155d69a5 Mon Sep 17 00:00:00 2001 From: Chang Date: Thu, 31 May 2018 12:58:30 -0700 Subject: [PATCH 05/30] Add fisrt draft of randomize environment Implement basic feature of a wrappered environment, which choose new randomized physics params in mujoco on every reset(). --- rllab/dynamics_randomization/RandomizeEnv.py | 101 +++++++++++++++++++ rllab/dynamics_randomization/__init__.py | 1 + rllab/dynamics_randomization/trpo_swimmer.py | 39 +++++++ 3 files changed, 141 insertions(+) create mode 100644 rllab/dynamics_randomization/RandomizeEnv.py create mode 100644 rllab/dynamics_randomization/trpo_swimmer.py diff --git a/rllab/dynamics_randomization/RandomizeEnv.py b/rllab/dynamics_randomization/RandomizeEnv.py new file mode 100644 index 000000000..ae89a8592 --- /dev/null +++ b/rllab/dynamics_randomization/RandomizeEnv.py @@ -0,0 +1,101 @@ +from lxml import etree +from rllab.envs import Env +import os.path as osp +import numpy as np +from rllab.dynamics_randomization import VariationMethods +from rllab.dynamics_randomization.variation import VariationDistributions +from mujoco_py import load_model_from_xml +from mujoco_py import MjSim +from rllab.core import Serializable + +MODEL_DIR = osp.abspath( + osp.join(osp.dirname(__file__), '../../vendor/mujoco_models')) + + +class RandomizedEnv(Env, Serializable): + def __init__(self, mujoco_env, variations): + Serializable.quick_init(self, locals()) + self._wrapped_env = mujoco_env + self._variations = variations + self._file_path = osp.join(MODEL_DIR, mujoco_env.FILE) + self._model = etree.parse(self._file_path) + + for v in variations.get_list(): + e = self._model.find(v.xpath) + v.elem = e + + # todo: handle AttributeError + val = e.attrib[v.attrib].split(' ') + if len(val) == 1: + v.default = float(e.attrib[v.attrib]) + else: + v.default = np.array(list(map(float, val))) + + def reset(self): + for v in self._variations.get_list(): + e = v.elem + # todo: handle size + if v.distribution == VariationDistributions.GAUSSIAN: + c = np.random.normal(loc=v.var_range[0], scale=v.var_range[1]) + elif v.distribution == VariationDistributions.UNIFORM: + c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + if v.method == VariationMethods.COEFFICIENT: + e.attrib[v.attrib] = str(c * v.default) + elif v.method == VariationMethods.ABSOLUTE: + e.attrib[v.attrib] = str(c) + else: + raise NotImplementedError("Unknown method") + + model_xml = etree.tostring(self._model.getroot()).decode("ascii") + self._wrapped_env.model = load_model_from_xml(model_xml) + self._wrapped_env.sim = MjSim(self._wrapped_env.model) + self._wrapped_env.data = self._wrapped_env.sim.data + self._wrapped_env.viewer = None + self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos + self._wrapped_env.init_qvel = self._wrapped_env.sim.data.qvel + self._wrapped_env.init_qacc = self._wrapped_env.sim.data.qacc + self._wrapped_env.init_ctrl = self._wrapped_env.sim.data.ctrl + self._wrapped_env.qpos_dim = self._wrapped_env.init_qpos.size + self._wrapped_env.qvel_dim = self._wrapped_env.init_qvel.size + self._wrapped_env.ctrl_dim = self._wrapped_env.init_ctrl.size + self._wrapped_env.frame_skip = 1 + self._wrapped_env.dcom = None + self._wrapped_env.current_com = None + return self._wrapped_env.reset() + + def step(self, action): + return self._wrapped_env.step(action) + + def render(self, *args, **kwargs): + return self._wrapped_env.render(*args, **kwargs) + + def log_diagnostics(self, paths, *args, **kwargs): + self._wrapped_env.log_diagnostics(paths, *args, **kwargs) + + def terminate(self): + self._wrapped_env.terminate() + + def get_param_values(self): + return self._wrapped_env.get_param_values() + + def set_param_values(self, params): + self._wrapped_env.set_param_values(params) + + @property + def wrapped_env(self): + return self._wrapped_env + + @property + def action_space(self): + return self._wrapped_env.action_space + + @property + def observation_space(self): + return self._wrapped_env.observation_space + + @property + def horizon(self): + return self._wrapped_env.horizon + + +randomize = RandomizedEnv diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index cac52ea44..49b5ccb97 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -1,3 +1,4 @@ from rllab.dynamics_randomization.variation import Variations from rllab.dynamics_randomization.variation import VariationMethods from rllab.dynamics_randomization.variation import VariationDistributions +from rllab.dynamics_randomization.RandomizeEnv import RandomizedEnv diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/dynamics_randomization/trpo_swimmer.py new file mode 100644 index 000000000..5e0e6db05 --- /dev/null +++ b/rllab/dynamics_randomization/trpo_swimmer.py @@ -0,0 +1,39 @@ +from rllab.algos import TRPO +from rllab.baselines import LinearFeatureBaseline +from rllab.envs.mujoco import SwimmerEnv +from rllab.envs import normalize +from rllab.policies import GaussianMLPPolicy +from rllab.dynamics_randomization import RandomizedEnv +from rllab.dynamics_randomization import Variations +from rllab.dynamics_randomization import VariationMethods +from rllab.dynamics_randomization import VariationDistributions + +variations = Variations() +variations.randomize().\ + at_xpath(".//geom[@name='torso']").\ + attribute("density").\ + with_method(VariationMethods.COEFFICIENT).\ + sampled_from(VariationDistributions.UNIFORM).\ + with_range(0.5, 1.5) + +env = normalize(RandomizedEnv(SwimmerEnv(), variations)) + +policy = GaussianMLPPolicy( + env_spec=env.spec, + # The neural network policy should have two hidden layers, each with 32 hidden units. + hidden_sizes=(32, 32)) + +baseline = LinearFeatureBaseline(env_spec=env.spec) + +algo = TRPO( + env=env, + policy=policy, + baseline=baseline, + batch_size=4000, + max_path_length=500, + n_itr=40, + discount=0.99, + step_size=0.01, + # plot=True +) +algo.train() From 9eba2dae72e6a642cc6c6750ea1780b3b606d5c6 Mon Sep 17 00:00:00 2001 From: Chang Date: Thu, 31 May 2018 15:25:57 -0700 Subject: [PATCH 06/30] Rename RandomizeEnv.py to randomized_env.py All the python files have file names in lowercase. To keep this standard, refactor RandomizeEnv.py to randomized_env.py. --- rllab/dynamics_randomization/__init__.py | 2 +- .../{RandomizeEnv.py => randomize_env.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename rllab/dynamics_randomization/{RandomizeEnv.py => randomize_env.py} (100%) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index 49b5ccb97..b3b64c5db 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -1,4 +1,4 @@ from rllab.dynamics_randomization.variation import Variations from rllab.dynamics_randomization.variation import VariationMethods from rllab.dynamics_randomization.variation import VariationDistributions -from rllab.dynamics_randomization.RandomizeEnv import RandomizedEnv +from rllab.dynamics_randomization.randomize_env import RandomizedEnv diff --git a/rllab/dynamics_randomization/RandomizeEnv.py b/rllab/dynamics_randomization/randomize_env.py similarity index 100% rename from rllab/dynamics_randomization/RandomizeEnv.py rename to rllab/dynamics_randomization/randomize_env.py From 6fc661b9479697b612575f59850051b716cf726b Mon Sep 17 00:00:00 2001 From: Chang Date: Thu, 31 May 2018 15:47:44 -0700 Subject: [PATCH 07/30] Add features and remove variables in RandomizedEnv Add error handling in constructor and reset(). Remove variables that doesn't depend on self._wrapped_env.sim in reset(). Reuse MODEL_DIR in mujoco_env.py Alphabetize imports. --- rllab/dynamics_randomization/randomize_env.py | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/rllab/dynamics_randomization/randomize_env.py b/rllab/dynamics_randomization/randomize_env.py index ae89a8592..a14eb4818 100644 --- a/rllab/dynamics_randomization/randomize_env.py +++ b/rllab/dynamics_randomization/randomize_env.py @@ -1,15 +1,15 @@ -from lxml import etree -from rllab.envs import Env import os.path as osp -import numpy as np -from rllab.dynamics_randomization import VariationMethods -from rllab.dynamics_randomization.variation import VariationDistributions + +from lxml import etree from mujoco_py import load_model_from_xml from mujoco_py import MjSim -from rllab.core import Serializable +import numpy as np -MODEL_DIR = osp.abspath( - osp.join(osp.dirname(__file__), '../../vendor/mujoco_models')) +from rllab.envs import Env +from rllab.envs.mujoco.mujoco_env import MODEL_DIR +from rllab.core import Serializable +from rllab.dynamics_randomization import VariationMethods +from rllab.dynamics_randomization.variation import VariationDistributions class RandomizedEnv(Env, Serializable): @@ -22,9 +22,12 @@ def __init__(self, mujoco_env, variations): for v in variations.get_list(): e = self._model.find(v.xpath) + if not e: + raise AttributeError("Can't find node in xml") v.elem = e - # todo: handle AttributeError + if v.attrib not in e.attrib: + raise KeyError("Attribute doesn't exist") val = e.attrib[v.attrib].split(' ') if len(val) == 1: v.default = float(e.attrib[v.attrib]) @@ -39,6 +42,8 @@ def reset(self): c = np.random.normal(loc=v.var_range[0], scale=v.var_range[1]) elif v.distribution == VariationDistributions.UNIFORM: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + else: + raise NotImplementedError("Unkown distribution") if v.method == VariationMethods.COEFFICIENT: e.attrib[v.attrib] = str(c * v.default) elif v.method == VariationMethods.ABSOLUTE: @@ -50,17 +55,10 @@ def reset(self): self._wrapped_env.model = load_model_from_xml(model_xml) self._wrapped_env.sim = MjSim(self._wrapped_env.model) self._wrapped_env.data = self._wrapped_env.sim.data - self._wrapped_env.viewer = None self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos self._wrapped_env.init_qvel = self._wrapped_env.sim.data.qvel self._wrapped_env.init_qacc = self._wrapped_env.sim.data.qacc self._wrapped_env.init_ctrl = self._wrapped_env.sim.data.ctrl - self._wrapped_env.qpos_dim = self._wrapped_env.init_qpos.size - self._wrapped_env.qvel_dim = self._wrapped_env.init_qvel.size - self._wrapped_env.ctrl_dim = self._wrapped_env.init_ctrl.size - self._wrapped_env.frame_skip = 1 - self._wrapped_env.dcom = None - self._wrapped_env.current_com = None return self._wrapped_env.reset() def step(self, action): @@ -96,6 +94,3 @@ def observation_space(self): @property def horizon(self): return self._wrapped_env.horizon - - -randomize = RandomizedEnv From e4ce9b1bf99f9c38ea632bf021d8948a58aff542 Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Thu, 31 May 2018 16:37:23 -0700 Subject: [PATCH 08/30] Fix bugs in error handling Fix wrong AttributeError raising in constructor when there is element in xml. Add error handling towards the Variation.range attribute. When the range shape isn't the same as the attribute value shape, raise an AttributeError. --- rllab/dynamics_randomization/randomize_env.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rllab/dynamics_randomization/randomize_env.py b/rllab/dynamics_randomization/randomize_env.py index a14eb4818..c6ce8d8c2 100644 --- a/rllab/dynamics_randomization/randomize_env.py +++ b/rllab/dynamics_randomization/randomize_env.py @@ -22,7 +22,7 @@ def __init__(self, mujoco_env, variations): for v in variations.get_list(): e = self._model.find(v.xpath) - if not e: + if e is None: raise AttributeError("Can't find node in xml") v.elem = e @@ -34,10 +34,12 @@ def __init__(self, mujoco_env, variations): else: v.default = np.array(list(map(float, val))) + if len(v.var_range) != 2 * len(val): + raise AttributeError("Range shape != default value shape") + def reset(self): for v in self._variations.get_list(): e = v.elem - # todo: handle size if v.distribution == VariationDistributions.GAUSSIAN: c = np.random.normal(loc=v.var_range[0], scale=v.var_range[1]) elif v.distribution == VariationDistributions.UNIFORM: From 28d73e86c3bf70981f12560164d5916dd2efb286 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Fri, 1 Jun 2018 19:17:46 -0700 Subject: [PATCH 09/30] Add thread to generate Mujoco models --- .../mujoco_model_gen.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 rllab/dynamics_randomization/mujoco_model_gen.py diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py new file mode 100644 index 000000000..0482216fd --- /dev/null +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -0,0 +1,100 @@ +import threading.Thread +import threading.RLock +import threading.Event +import queue.Queue + +''' +A worker thread to produce to MuJoCo models with randomized dynamic +parameters, which are specified by the users of rllab with the class +Variations. +''' +class MujocoModelGenerator: + """ + Starts all the member fields of the class and the worker thread. + Parameters + ---------- + file_path : string + The absolute path to the XML file that contains the MuJoCo + model. + variations: Variations + An list of Variation objects that indicate the dynamic parameters + to randomize in the XML file. + """ + def __init__(self, file_path, variations): + self._parsed_model = etree.parse(file_path) + self._variations = variations + + for v in variations.get_list(): + e = self.parsed_model.find(v.xpath) + if e is None: + raise AttributeError("Can't find node in xml") + v.elem = e + + if v.attrib not in e.attrib: + raise KeyError("Attribute doesn't exist") + val = e.attrib[v.attrib].split(' ') + if len(val) == 1: + v.default = float(e.attrib[v.attrib]) + else: + v.default = np.array(list(map(float, val))) + + if len(v.var_range) != 2 * len(val): + raise AttributeError("Range shape != default value shape") + # Worker Thread + self._worker_thread = Thread(target=self._generator_routine) + # Reference to the generated model + self._mujoco_model = None + # Communicates the calling thread with the worker thread by awaking + # the worker thread so as to generate a new model. + self._model_requested = Event() + # Communicates the worker thread with the calling thread by awaking + # the calling thread so as to retrieve the generated model. + self._model_ready = Event() + self._worker_thread.start() + + + """ + Gets the MuJoCo model produced by the worker thread in this class. + This call may block in case the calling thread asks for the model before + the worker thread has finished. + Returns + ------- + PyMjModel + A MuJoCo model with randomized dynamic parameters specified by the + user in this class. + """ + def get_model(self): + if not self._model_ready.is_set(): + # If the model is not ready yet, wait for it to be finished. + self._model_ready.wait() + # Cleat the event flag for the next iteration + self._model_ready.clear() + # Request a new model to the worker thread. + self._model_requested.set() + return self._mujoco_model + + def _generator_routine(self): + while(True): + for v in self._variations.get_list(): + e = v.elem + if v.distribution == VariationDistributions.GAUSSIAN: + c = np.random.normal(loc=v.var_range[0], scale=v.var_range[1]) + elif v.distribution == VariationDistributions.UNIFORM: + c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + else: + raise NotImplementedError("Unkown distribution") + if v.method == VariationMethods.COEFFICIENT: + e.attrib[v.attrib] = str(c * v.default) + elif v.method == VariationMethods.ABSOLUTE: + e.attrib[v.attrib] = str(c) + else: + raise NotImplementedError("Unknown method") + + model_xml = etree.tostring(self._parsed_model.getroot()).decode("ascii") + self._mujoco_model = load_model_from_xml(model_xml) + + # Wake up the calling thread if it was waiting + self._model_ready.set() + # Go to idle mode (wait for event) + self._model_requested.wait() + self._model_requested.clear() From 21e42ee0023279a41174f09213ce223f569c9ef5 Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Mon, 4 Jun 2018 11:38:03 -0700 Subject: [PATCH 10/30] Add thread terminate in mujoco_model_gen --- .../mujoco_model_gen.py | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 0482216fd..edac19c25 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -1,7 +1,12 @@ -import threading.Thread -import threading.RLock -import threading.Event -import queue.Queue +from threading import Event +from threading import Thread + +from lxml import etree +from mujoco_py import load_model_from_xml +import numpy as np + +from rllab.dynamics_randomization.variation import VariationDistributions +from rllab.dynamics_randomization.variation import VariationMethods ''' A worker thread to produce to MuJoCo models with randomized dynamic @@ -22,10 +27,10 @@ class MujocoModelGenerator: """ def __init__(self, file_path, variations): self._parsed_model = etree.parse(file_path) - self._variations = variations + self._variations = variations for v in variations.get_list(): - e = self.parsed_model.find(v.xpath) + e = self._parsed_model.find(v.xpath) if e is None: raise AttributeError("Can't find node in xml") v.elem = e @@ -40,16 +45,17 @@ def __init__(self, file_path, variations): if len(v.var_range) != 2 * len(val): raise AttributeError("Range shape != default value shape") - # Worker Thread + # Worker Thread self._worker_thread = Thread(target=self._generator_routine) # Reference to the generated model self._mujoco_model = None - # Communicates the calling thread with the worker thread by awaking + # Communicates the calling thread with the worker thread by awaking # the worker thread so as to generate a new model. self._model_requested = Event() - # Communicates the worker thread with the calling thread by awaking + # Communicates the worker thread with the calling thread by awaking # the calling thread so as to retrieve the generated model. self._model_ready = Event() + self._stop_event = Event() self._worker_thread.start() @@ -74,7 +80,9 @@ def get_model(self): return self._mujoco_model def _generator_routine(self): - while(True): + while True: + if self._stop_event.is_set(): + return for v in self._variations.get_list(): e = v.elem if v.distribution == VariationDistributions.GAUSSIAN: @@ -98,3 +106,7 @@ def _generator_routine(self): # Go to idle mode (wait for event) self._model_requested.wait() self._model_requested.clear() + + def stop(self): + self._stop_event.set() + self._worker_thread.join(timeout=0.1) From 009ddbf78f54fdb8149d9d329e5f044660e660cf Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Mon, 4 Jun 2018 12:16:35 -0700 Subject: [PATCH 11/30] Add interrupt handler to mujoco model gen Finish the thread when the simulaton is interrupted. --- rllab/dynamics_randomization/__init__.py | 1 + .../mujoco_model_gen.py | 26 ++++++---- rllab/dynamics_randomization/randomize_env.py | 47 +++---------------- 3 files changed, 26 insertions(+), 48 deletions(-) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index b3b64c5db..42224f87a 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -1,3 +1,4 @@ +from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator from rllab.dynamics_randomization.variation import Variations from rllab.dynamics_randomization.variation import VariationMethods from rllab.dynamics_randomization.variation import VariationDistributions diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index edac19c25..90ddabab7 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -1,3 +1,4 @@ +import atexit from threading import Event from threading import Thread @@ -7,12 +8,13 @@ from rllab.dynamics_randomization.variation import VariationDistributions from rllab.dynamics_randomization.variation import VariationMethods - ''' A worker thread to produce to MuJoCo models with randomized dynamic parameters, which are specified by the users of rllab with the class Variations. ''' + + class MujocoModelGenerator: """ Starts all the member fields of the class and the worker thread. @@ -25,6 +27,7 @@ class MujocoModelGenerator: An list of Variation objects that indicate the dynamic parameters to randomize in the XML file. """ + def __init__(self, file_path, variations): self._parsed_model = etree.parse(file_path) self._variations = variations @@ -46,7 +49,8 @@ def __init__(self, file_path, variations): if len(v.var_range) != 2 * len(val): raise AttributeError("Range shape != default value shape") # Worker Thread - self._worker_thread = Thread(target=self._generator_routine) + self._worker_thread = Thread( + target=self._generator_routine, daemon=True) # Reference to the generated model self._mujoco_model = None # Communicates the calling thread with the worker thread by awaking @@ -57,7 +61,7 @@ def __init__(self, file_path, variations): self._model_ready = Event() self._stop_event = Event() self._worker_thread.start() - + atexit.register(self.stop) """ Gets the MuJoCo model produced by the worker thread in this class. @@ -69,6 +73,7 @@ def __init__(self, file_path, variations): A MuJoCo model with randomized dynamic parameters specified by the user in this class. """ + def get_model(self): if not self._model_ready.is_set(): # If the model is not ready yet, wait for it to be finished. @@ -86,9 +91,11 @@ def _generator_routine(self): for v in self._variations.get_list(): e = v.elem if v.distribution == VariationDistributions.GAUSSIAN: - c = np.random.normal(loc=v.var_range[0], scale=v.var_range[1]) + c = np.random.normal( + loc=v.var_range[0], scale=v.var_range[1]) elif v.distribution == VariationDistributions.UNIFORM: - c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + c = np.random.uniform( + low=v.var_range[0], high=v.var_range[1]) else: raise NotImplementedError("Unkown distribution") if v.method == VariationMethods.COEFFICIENT: @@ -98,7 +105,8 @@ def _generator_routine(self): else: raise NotImplementedError("Unknown method") - model_xml = etree.tostring(self._parsed_model.getroot()).decode("ascii") + model_xml = etree.tostring( + self._parsed_model.getroot()).decode("ascii") self._mujoco_model = load_model_from_xml(model_xml) # Wake up the calling thread if it was waiting @@ -108,5 +116,7 @@ def _generator_routine(self): self._model_requested.clear() def stop(self): - self._stop_event.set() - self._worker_thread.join(timeout=0.1) + if self._worker_thread.is_alive(): + self._model_requested.set() + self._stop_event.set() + self._worker_thread.join(timeout=0.1) diff --git a/rllab/dynamics_randomization/randomize_env.py b/rllab/dynamics_randomization/randomize_env.py index c6ce8d8c2..cd5bf29e2 100644 --- a/rllab/dynamics_randomization/randomize_env.py +++ b/rllab/dynamics_randomization/randomize_env.py @@ -1,15 +1,11 @@ import os.path as osp -from lxml import etree -from mujoco_py import load_model_from_xml from mujoco_py import MjSim -import numpy as np +from rllab.core import Serializable +from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator from rllab.envs import Env from rllab.envs.mujoco.mujoco_env import MODEL_DIR -from rllab.core import Serializable -from rllab.dynamics_randomization import VariationMethods -from rllab.dynamics_randomization.variation import VariationDistributions class RandomizedEnv(Env, Serializable): @@ -18,43 +14,11 @@ def __init__(self, mujoco_env, variations): self._wrapped_env = mujoco_env self._variations = variations self._file_path = osp.join(MODEL_DIR, mujoco_env.FILE) - self._model = etree.parse(self._file_path) - - for v in variations.get_list(): - e = self._model.find(v.xpath) - if e is None: - raise AttributeError("Can't find node in xml") - v.elem = e - if v.attrib not in e.attrib: - raise KeyError("Attribute doesn't exist") - val = e.attrib[v.attrib].split(' ') - if len(val) == 1: - v.default = float(e.attrib[v.attrib]) - else: - v.default = np.array(list(map(float, val))) - - if len(v.var_range) != 2 * len(val): - raise AttributeError("Range shape != default value shape") + self._mujoco_model = MujocoModelGenerator(self._file_path, variations) def reset(self): - for v in self._variations.get_list(): - e = v.elem - if v.distribution == VariationDistributions.GAUSSIAN: - c = np.random.normal(loc=v.var_range[0], scale=v.var_range[1]) - elif v.distribution == VariationDistributions.UNIFORM: - c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) - else: - raise NotImplementedError("Unkown distribution") - if v.method == VariationMethods.COEFFICIENT: - e.attrib[v.attrib] = str(c * v.default) - elif v.method == VariationMethods.ABSOLUTE: - e.attrib[v.attrib] = str(c) - else: - raise NotImplementedError("Unknown method") - - model_xml = etree.tostring(self._model.getroot()).decode("ascii") - self._wrapped_env.model = load_model_from_xml(model_xml) + self._wrapped_env.model = self._mujoco_model.get_model() self._wrapped_env.sim = MjSim(self._wrapped_env.model) self._wrapped_env.data = self._wrapped_env.sim.data self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos @@ -81,6 +45,9 @@ def get_param_values(self): def set_param_values(self, params): self._wrapped_env.set_param_values(params) + def terminate(self): + self._mujoco_model.stop() + @property def wrapped_env(self): return self._wrapped_env From 5485bfb26e74db60ba6bb0fbf0c3d9a5f421d614 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Mon, 4 Jun 2018 14:49:03 -0700 Subject: [PATCH 12/30] Add documentation to new classes and sort import headers Other miscellaneous changes include: - Rename classes VariationsMethods and VariationDistributions to VariationsMethod and VariationDistribution respectively. - The parsing of the XML string and fetch of the dynamic parameters to randomize is now done within the worker thread. - The file randomize_env.py was renamed to randomized_env.py --- rllab/dynamics_randomization/__init__.py | 8 +- .../mujoco_model_gen.py | 137 +++++++++--------- .../{randomize_env.py => randomized_env.py} | 39 +++-- .../test_dynamics_rand.py | 29 ++-- rllab/dynamics_randomization/trpo_swimmer.py | 8 +- rllab/dynamics_randomization/variation.py | 79 ++++++++-- 6 files changed, 192 insertions(+), 108 deletions(-) rename rllab/dynamics_randomization/{randomize_env.py => randomized_env.py} (64%) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index 42224f87a..86e165086 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -1,5 +1,5 @@ -from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator from rllab.dynamics_randomization.variation import Variations -from rllab.dynamics_randomization.variation import VariationMethods -from rllab.dynamics_randomization.variation import VariationDistributions -from rllab.dynamics_randomization.randomize_env import RandomizedEnv +from rllab.dynamics_randomization.variation import VariationMethod +from rllab.dynamics_randomization.variation import VariationDistribution +from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator +from rllab.dynamics_randomization.randomized_env import RandomizedEnv diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 90ddabab7..7ab81b8cf 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -1,53 +1,36 @@ -import atexit +from lxml import etree +from mujoco_py import load_model_from_xml +from rllab.dynamics_randomization import VariationMethod +from rllab.dynamics_randomization import VariationDistribution from threading import Event +from threading import RLock from threading import Thread -from lxml import etree -from mujoco_py import load_model_from_xml +import atexit import numpy as np -from rllab.dynamics_randomization.variation import VariationDistributions -from rllab.dynamics_randomization.variation import VariationMethods -''' -A worker thread to produce to MuJoCo models with randomized dynamic -parameters, which are specified by the users of rllab with the class -Variations. -''' - class MujocoModelGenerator: """ - Starts all the member fields of the class and the worker thread. - Parameters - ---------- - file_path : string - The absolute path to the XML file that contains the MuJoCo - model. - variations: Variations - An list of Variation objects that indicate the dynamic parameters - to randomize in the XML file. + A worker thread to produce to MuJoCo models with randomized dynamic + parameters, which are specified by the users of rllab with the class + Variations. """ def __init__(self, file_path, variations): - self._parsed_model = etree.parse(file_path) + """ + Starts all the member fields of the class and the worker thread. + Parameters + ---------- + file_path : string + The absolute path to the XML file that contains the MuJoCo + model. + variations: Variations + An list of Variation objects that indicate the dynamic parameters + to randomize in the XML file. + """ self._variations = variations - - for v in variations.get_list(): - e = self._parsed_model.find(v.xpath) - if e is None: - raise AttributeError("Can't find node in xml") - v.elem = e - - if v.attrib not in e.attrib: - raise KeyError("Attribute doesn't exist") - val = e.attrib[v.attrib].split(' ') - if len(val) == 1: - v.default = float(e.attrib[v.attrib]) - else: - v.default = np.array(list(map(float, val))) - - if len(v.var_range) != 2 * len(val): - raise AttributeError("Range shape != default value shape") + self._file_path = file_path # Worker Thread self._worker_thread = Thread( target=self._generator_routine, daemon=True) @@ -59,22 +42,22 @@ def __init__(self, file_path, variations): # Communicates the worker thread with the calling thread by awaking # the calling thread so as to retrieve the generated model. self._model_ready = Event() + # Event to stop the worker thread self._stop_event = Event() - self._worker_thread.start() atexit.register(self.stop) - - """ - Gets the MuJoCo model produced by the worker thread in this class. - This call may block in case the calling thread asks for the model before - the worker thread has finished. - Returns - ------- - PyMjModel - A MuJoCo model with randomized dynamic parameters specified by the - user in this class. - """ + self._worker_thread.start() def get_model(self): + """ + Gets the MuJoCo model produced by the worker thread in this class. + This call may block in case the calling thread asks for the model before + the worker thread has finished. + Returns + ------- + PyMjModel + A MuJoCo model with randomized dynamic parameters specified by the + user in this class. + """ if not self._model_ready.is_set(): # If the model is not ready yet, wait for it to be finished. self._model_ready.wait() @@ -84,29 +67,59 @@ def get_model(self): self._model_requested.set() return self._mujoco_model + def stop(self): + """ + Stops the worker thread. This method has to be called when the corresponding + randomized environment is terminated or when the training is interrupted. + """ + if self._worker_thread.is_alive(): + self._model_requested.set() + self._stop_event.set() + self._worker_thread.join() + def _generator_routine(self): - while True: - if self._stop_event.is_set(): - return + """ + Routine of the worker thread in this class. + """ + # Initialize parsing of the model from XML + parsed_model = etree.parse(self._file_path) + for v in self._variations.get_list(): + e = parsed_model.find(v.xpath) + if e is None: + raise AttributeError("Can't find node in xml") + v.elem = e + + if v.attrib not in e.attrib: + raise KeyError("Attribute doesn't exist") + val = e.attrib[v.attrib].split(' ') + if len(val) == 1: + v.default = float(e.attrib[v.attrib]) + else: + v.default = np.array(list(map(float, val))) + + if len(v.var_range) != 2 * len(val): + raise AttributeError("Range shape != default value shape") + + # Generate model with randomized dynamic parameters + while (not self._stop_event.is_set()): for v in self._variations.get_list(): e = v.elem - if v.distribution == VariationDistributions.GAUSSIAN: + if v.distribution == VariationDistribution.GAUSSIAN: c = np.random.normal( loc=v.var_range[0], scale=v.var_range[1]) - elif v.distribution == VariationDistributions.UNIFORM: + elif v.distribution == VariationDistribution.UNIFORM: c = np.random.uniform( low=v.var_range[0], high=v.var_range[1]) else: raise NotImplementedError("Unkown distribution") - if v.method == VariationMethods.COEFFICIENT: + if v.method == VariationMethod.COEFFICIENT: e.attrib[v.attrib] = str(c * v.default) - elif v.method == VariationMethods.ABSOLUTE: + elif v.method == VariationMethod.ABSOLUTE: e.attrib[v.attrib] = str(c) else: raise NotImplementedError("Unknown method") - model_xml = etree.tostring( - self._parsed_model.getroot()).decode("ascii") + model_xml = etree.tostring(parsed_model.getroot()).decode("ascii") self._mujoco_model = load_model_from_xml(model_xml) # Wake up the calling thread if it was waiting @@ -114,9 +127,3 @@ def _generator_routine(self): # Go to idle mode (wait for event) self._model_requested.wait() self._model_requested.clear() - - def stop(self): - if self._worker_thread.is_alive(): - self._model_requested.set() - self._stop_event.set() - self._worker_thread.join(timeout=0.1) diff --git a/rllab/dynamics_randomization/randomize_env.py b/rllab/dynamics_randomization/randomized_env.py similarity index 64% rename from rllab/dynamics_randomization/randomize_env.py rename to rllab/dynamics_randomization/randomized_env.py index cd5bf29e2..96a84ad63 100644 --- a/rllab/dynamics_randomization/randomize_env.py +++ b/rllab/dynamics_randomization/randomized_env.py @@ -1,24 +1,39 @@ -import os.path as osp - from mujoco_py import MjSim - from rllab.core import Serializable -from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator +from rllab.dynamics_randomization import MujocoModelGenerator from rllab.envs import Env from rllab.envs.mujoco.mujoco_env import MODEL_DIR +import os.path as osp + class RandomizedEnv(Env, Serializable): + """ + This class is just a wrapper class for the MujocoEnv to perform + the training using Dynamics Randomization. + Only code in the methods reset and terminate has been added. + """ + def __init__(self, mujoco_env, variations): + """ + An instance of the class MujocoModelGenerator is created to + generate the Mujoco models with the randomization of the + requested dynamic parameters. + """ Serializable.quick_init(self, locals()) self._wrapped_env = mujoco_env self._variations = variations self._file_path = osp.join(MODEL_DIR, mujoco_env.FILE) - - self._mujoco_model = MujocoModelGenerator(self._file_path, variations) + self._model_generator = MujocoModelGenerator(self._file_path, + variations) def reset(self): - self._wrapped_env.model = self._mujoco_model.get_model() + """ + The new model with randomized parameters is requested and the + corresponding parameters in the MuJoCo environment class are + set. + """ + self._wrapped_env.model = self._model_generator.get_model() self._wrapped_env.sim = MjSim(self._wrapped_env.model) self._wrapped_env.data = self._wrapped_env.sim.data self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos @@ -36,9 +51,6 @@ def render(self, *args, **kwargs): def log_diagnostics(self, paths, *args, **kwargs): self._wrapped_env.log_diagnostics(paths, *args, **kwargs) - def terminate(self): - self._wrapped_env.terminate() - def get_param_values(self): return self._wrapped_env.get_param_values() @@ -46,7 +58,12 @@ def set_param_values(self, params): self._wrapped_env.set_param_values(params) def terminate(self): - self._mujoco_model.stop() + """ + Besides regular termination, the MuJoCo model generator is + stopped. + """ + self._model_generator.stop() + self._wrapped_env.terminate() @property def wrapped_env(self): diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py index 20d7978d3..3e6ec2aa7 100644 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -2,17 +2,16 @@ """ Benchmark model mutation for dynamics randomization """ -import os -import os.path as osp -import xml.etree.ElementTree as ET - -import numpy as np from mujoco_py import load_model_from_xml from mujoco_py import MjSim from mujoco_py import MjViewer +from rllab.dynamics_randomization import Variations, VariationMethod +from rllab.dynamics_randomization import VariationDistribution -from rllab.dynamics_randomization import Variations, VariationMethods -from rllab.dynamics_randomization import VariationDistributions +import numpy as np +import os +import os.path as osp +import xml.etree.ElementTree as ET #Execute at the root of rllab MUJOCO_PY_PATH = os.getcwd() @@ -26,21 +25,21 @@ variations.randomize().\ attribute("gear").\ at_xpath(".//motor[@name='a1']").\ - with_method(VariationMethods.COEFFICIENT).\ - sampled_from(VariationDistributions.UNIFORM).\ + with_method(VariationMethod.COEFFICIENT).\ + sampled_from(VariationDistribution.UNIFORM).\ with_range(0.5, 1.5).\ randomize().\ attribute("gear").\ at_xpath(".//motor[@name='a2']").\ - sampled_from(VariationDistributions.UNIFORM).\ - with_method(VariationMethods.COEFFICIENT).\ + sampled_from(VariationDistribution.UNIFORM).\ + with_method(VariationMethod.COEFFICIENT).\ with_range(0.5, 1.5) variations.randomize().\ attribute("damping").\ at_xpath(".//joint[@name='wr_js']").\ - with_method(VariationMethods.ABSOLUTE).\ - sampled_from(VariationDistributions.UNIFORM).\ + with_method(VariationMethod.ABSOLUTE).\ + sampled_from(VariationDistribution.UNIFORM).\ with_range(5, 15) # Retrieve defaults and cache etree elems @@ -53,10 +52,10 @@ # Mutate model randomly for v in variations.get_list(): e = v.elem - if v.method == VariationMethods.COEFFICIENT: + if v.method == VariationMethod.COEFFICIENT: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c * v.default) - elif v.method == VariationMethods.ABSOLUTE: + elif v.method == VariationMethod.ABSOLUTE: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c) else: diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/dynamics_randomization/trpo_swimmer.py index 5e0e6db05..aae3197c5 100644 --- a/rllab/dynamics_randomization/trpo_swimmer.py +++ b/rllab/dynamics_randomization/trpo_swimmer.py @@ -5,15 +5,15 @@ from rllab.policies import GaussianMLPPolicy from rllab.dynamics_randomization import RandomizedEnv from rllab.dynamics_randomization import Variations -from rllab.dynamics_randomization import VariationMethods -from rllab.dynamics_randomization import VariationDistributions +from rllab.dynamics_randomization import VariationMethod +from rllab.dynamics_randomization import VariationDistribution variations = Variations() variations.randomize().\ at_xpath(".//geom[@name='torso']").\ attribute("density").\ - with_method(VariationMethods.COEFFICIENT).\ - sampled_from(VariationDistributions.UNIFORM).\ + with_method(VariationMethod.COEFFICIENT).\ + sampled_from(VariationDistribution.UNIFORM).\ with_range(0.5, 1.5) env = normalize(RandomizedEnv(SwimmerEnv(), variations)) diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index e607b015f..3ec38d547 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -1,17 +1,33 @@ from enum import Enum -class VariationMethods(Enum): +class VariationMethod(Enum): + """ + The random coefficient is applied according to these methods. + """ + """ The randomization is the product of the coeffcient and the dynamic parameter """ COEFFICIENT = 1 + """ The randomization is equal to the coefficient """ ABSOLUTE = 2 -class VariationDistributions(Enum): +class VariationDistribution(Enum): + """ + The different ways to produce the random cofficient. + """ + """ Guassian distribution """ GAUSSIAN = 1 + """ Uniform distribution """ UNIFORM = 2 class Variation: + """ + Each dynamic parameter to randomize is represented by a Variation. This + class works more like a data structure to store the data fields required + to find the dynamic parameter and the randomization to apply to it. + """ + def __init__(self): self._xpath = None self._attrib = None @@ -79,18 +95,34 @@ def var_range(self, var_range): class Variations: + """ + The purpose of this class is to keep a list of all the variations + that have to be applied to the RandomizedEnv class. + The class implements the fluent interface pattern, so each call + to set an attribute will return the instance of this class. + """ + def __init__(self): self._list = [] def randomize(self): + """ + Creates a new entry in the list of variations. After calling this + method, call the setters for each of the attributes to be used with + this new entry using the fluent interface pattern. + """ variation = Variation() self._list.append(variation) return self def at_xpath(self, xpath): """ + Sets the xpath for the last variation in the list. + Parameters - - xpath: path expression to identify a node within the XML file + ---------- + xpath : string + path expression to identify a node within the XML file of the MuJoCo environment. """ if self._list: @@ -99,8 +131,12 @@ def at_xpath(self, xpath): def attribute(self, attrib): """ + Sets the attribute for the last variation in the list. + Parameters - - attrib: name of the dynamic parameter to randomize within the + ---------- + attrib : string + name of the dynamic parameter to randomize within the node defined in xpath. """ if self._list: @@ -109,8 +145,13 @@ def attribute(self, attrib): def with_method(self, method): """ + Sets the method to apply the random coefficient for the last variation + in the list. + Parameters - - method: if equal to "absolute", it sets the dyanmic parameter + ---------- + method : VariationMethod + if equal to "absolute", it sets the dyanmic parameter equal to the random coefficient obtained from the distribution, or if equal to "coefficient", it multiplies the default value provieded in the XML file by the random coefficient. @@ -121,9 +162,14 @@ def with_method(self, method): def sampled_from(self, distribution): """ + Sets the distribution where the random coefficient is sampled from for + the last variation in the list. + Parameters - - distribution: it specifies the probability distribution used to - obtain the random coefficient. + ---------- + distribution : VariationDistribution + it specifies the probability distribution used to obtain the random + coefficient. """ if self._list: self._list[-1].distribution = distribution @@ -131,13 +177,28 @@ def sampled_from(self, distribution): def with_range(self, low, high): """ + Sets the range for the random coefficient for the last variation in + the list. + Parameters - - low: inclusive low value of the range - - high: exclusive high value of the range + ---------- + low : int + inclusive low value of the range + high : int + exclusive high value of the range """ if self._list: self._list[-1].var_range = (low, high) return self def get_list(self): + """ + Returns a list with all the variations + + Returns + ------- + [Variation] + A list of all the dynamic parameters to find in the model XML + and the configuration to randomize each of them + """ return self._list From 4442f123f703a937c8d173ad9d687318e79d7d89 Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Mon, 4 Jun 2018 16:03:03 -0700 Subject: [PATCH 13/30] Fix bug in MujocoModelGenerator Before this commit, when there is an error raised when loading the xml object, only the worker_thread terminates. This commit fixes this bug by terminating all the processes. Fix some typo in the last commit. --- .../mujoco_model_gen.py | 23 +++++++++++-------- .../dynamics_randomization/randomized_env.py | 10 +++++--- rllab/dynamics_randomization/variation.py | 8 +++---- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 7ab81b8cf..108fa3991 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -1,14 +1,15 @@ -from lxml import etree -from mujoco_py import load_model_from_xml -from rllab.dynamics_randomization import VariationMethod -from rllab.dynamics_randomization import VariationDistribution +import atexit +import sys from threading import Event -from threading import RLock from threading import Thread -import atexit +from lxml import etree +from mujoco_py import load_model_from_xml import numpy as np +from rllab.dynamics_randomization import VariationDistribution +from rllab.dynamics_randomization import VariationMethod + class MujocoModelGenerator: """ @@ -33,7 +34,7 @@ def __init__(self, file_path, variations): self._file_path = file_path # Worker Thread self._worker_thread = Thread( - target=self._generator_routine, daemon=True) + target=self._generator_routine, daemon=True, name="Worker-Thread") # Reference to the generated model self._mujoco_model = None # Communicates the calling thread with the worker thread by awaking @@ -58,6 +59,10 @@ def get_model(self): A MuJoCo model with randomized dynamic parameters specified by the user in this class. """ + if not self._worker_thread.is_alive(): + # If worker thread is dead because of an error, raise an error in main thread + raise ChildProcessError("Error raised in Worker-Thread") + if not self._model_ready.is_set(): # If the model is not ready yet, wait for it to be finished. self._model_ready.wait() @@ -101,7 +106,7 @@ def _generator_routine(self): raise AttributeError("Range shape != default value shape") # Generate model with randomized dynamic parameters - while (not self._stop_event.is_set()): + while not self._stop_event.is_set(): for v in self._variations.get_list(): e = v.elem if v.distribution == VariationDistribution.GAUSSIAN: @@ -111,7 +116,7 @@ def _generator_routine(self): c = np.random.uniform( low=v.var_range[0], high=v.var_range[1]) else: - raise NotImplementedError("Unkown distribution") + raise NotImplementedError("Unknown distribution") if v.method == VariationMethod.COEFFICIENT: e.attrib[v.attrib] = str(c * v.default) elif v.method == VariationMethod.ABSOLUTE: diff --git a/rllab/dynamics_randomization/randomized_env.py b/rllab/dynamics_randomization/randomized_env.py index 96a84ad63..f6f298a2c 100644 --- a/rllab/dynamics_randomization/randomized_env.py +++ b/rllab/dynamics_randomization/randomized_env.py @@ -1,11 +1,12 @@ +import os.path as osp + from mujoco_py import MjSim + from rllab.core import Serializable from rllab.dynamics_randomization import MujocoModelGenerator from rllab.envs import Env from rllab.envs.mujoco.mujoco_env import MODEL_DIR -import os.path as osp - class RandomizedEnv(Env, Serializable): """ @@ -33,7 +34,10 @@ def reset(self): corresponding parameters in the MuJoCo environment class are set. """ - self._wrapped_env.model = self._model_generator.get_model() + try: + self._wrapped_env.model = self._model_generator.get_model() + except AttributeError as e: + raise e self._wrapped_env.sim = MjSim(self._wrapped_env.model) self._wrapped_env.data = self._wrapped_env.sim.data self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index 3ec38d547..abe880010 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -5,7 +5,7 @@ class VariationMethod(Enum): """ The random coefficient is applied according to these methods. """ - """ The randomization is the product of the coeffcient and the dynamic parameter """ + """ The randomization is the product of the coefficient and the dynamic parameter """ COEFFICIENT = 1 """ The randomization is equal to the coefficient """ ABSOLUTE = 2 @@ -13,7 +13,7 @@ class VariationMethod(Enum): class VariationDistribution(Enum): """ - The different ways to produce the random cofficient. + The different ways to produce the random coefficient. """ """ Guassian distribution """ GAUSSIAN = 1 @@ -151,9 +151,9 @@ def with_method(self, method): Parameters ---------- method : VariationMethod - if equal to "absolute", it sets the dyanmic parameter + if equal to "absolute", it sets the dynamic parameter equal to the random coefficient obtained from the distribution, or - if equal to "coefficient", it multiplies the default value provieded + if equal to "coefficient", it multiplies the default value provided in the XML file by the random coefficient. """ if self._list: From 4a96217e132135cb2ea7c9f54e53405824a9fce8 Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Mon, 4 Jun 2018 17:18:19 -0700 Subject: [PATCH 14/30] Change multi-thread MujocoModelGen to n-length queue Create an 10-length queue in MujocoModelGenerator to store the mujoco_models. --- .../mujoco_model_gen.py | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 108fa3991..60840726a 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -1,5 +1,5 @@ import atexit -import sys +from queue import Queue from threading import Event from threading import Thread @@ -32,6 +32,8 @@ def __init__(self, file_path, variations): """ self._variations = variations self._file_path = file_path + # Synchronized queue to store mujoco_models + self._models = Queue(maxsize=10) # Worker Thread self._worker_thread = Thread( target=self._generator_routine, daemon=True, name="Worker-Thread") @@ -60,17 +62,10 @@ def get_model(self): user in this class. """ if not self._worker_thread.is_alive(): - # If worker thread is dead because of an error, raise an error in main thread + # If worker thread terminates because of an error, terminates main thread raise ChildProcessError("Error raised in Worker-Thread") - if not self._model_ready.is_set(): - # If the model is not ready yet, wait for it to be finished. - self._model_ready.wait() - # Cleat the event flag for the next iteration - self._model_ready.clear() - # Request a new model to the worker thread. - self._model_requested.set() - return self._mujoco_model + return self._models.get() def stop(self): """ @@ -78,6 +73,9 @@ def stop(self): randomized environment is terminated or when the training is interrupted. """ if self._worker_thread.is_alive(): + while not self._models.empty(): + self._models.get() + self._model_requested.set() self._stop_event.set() self._worker_thread.join() @@ -126,9 +124,4 @@ def _generator_routine(self): model_xml = etree.tostring(parsed_model.getroot()).decode("ascii") self._mujoco_model = load_model_from_xml(model_xml) - - # Wake up the calling thread if it was waiting - self._model_ready.set() - # Go to idle mode (wait for event) - self._model_requested.wait() - self._model_requested.clear() + self._models.put(self._mujoco_model) From c1a4a81fd3144705fd0e844785c5d98d028a8781 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Tue, 5 Jun 2018 10:25:15 -0700 Subject: [PATCH 15/30] Miscellaneous changes to improve the code - Renamed classes VariationMethod and VariationDistribution to Method and Distribution. - Enforced the use of methods exclusive for uniform or normal distributions in the fluent interface pattern provided in class Variations by splitting the class into VariationsBase, VariationsGaussian and VariationsUniform. - Included the module os.path.osp in rllab.envs.mujoco modules. - Changed error types and improved messages for two errors in class MujocoModelGenerator. --- rllab/dynamics_randomization/__init__.py | 4 +- .../mujoco_model_gen.py | 16 ++-- .../dynamics_randomization/randomized_env.py | 3 +- .../test_dynamics_rand.py | 22 ++--- rllab/dynamics_randomization/trpo_swimmer.py | 8 +- rllab/dynamics_randomization/variation.py | 82 +++++++++++++++---- rllab/envs/mujoco/__init__.py | 1 + 7 files changed, 92 insertions(+), 44 deletions(-) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index 86e165086..5bfb069f0 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -1,5 +1,5 @@ from rllab.dynamics_randomization.variation import Variations -from rllab.dynamics_randomization.variation import VariationMethod -from rllab.dynamics_randomization.variation import VariationDistribution +from rllab.dynamics_randomization.variation import Method +from rllab.dynamics_randomization.variation import Distribution from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator from rllab.dynamics_randomization.randomized_env import RandomizedEnv diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 60840726a..85d71397d 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -7,8 +7,8 @@ from mujoco_py import load_model_from_xml import numpy as np -from rllab.dynamics_randomization import VariationDistribution -from rllab.dynamics_randomization import VariationMethod +from rllab.dynamics_randomization import Distribution +from rllab.dynamics_randomization import Method class MujocoModelGenerator: @@ -89,11 +89,11 @@ def _generator_routine(self): for v in self._variations.get_list(): e = parsed_model.find(v.xpath) if e is None: - raise AttributeError("Can't find node in xml") + raise ValueError("Could not find node in the XML model: %s" % v.xpath) v.elem = e if v.attrib not in e.attrib: - raise KeyError("Attribute doesn't exist") + raise ValueError("Attribute %s doesn't exist in node %s" % (v.attrib, v.xpath)) val = e.attrib[v.attrib].split(' ') if len(val) == 1: v.default = float(e.attrib[v.attrib]) @@ -107,17 +107,17 @@ def _generator_routine(self): while not self._stop_event.is_set(): for v in self._variations.get_list(): e = v.elem - if v.distribution == VariationDistribution.GAUSSIAN: + if v.distribution == Distribution.GAUSSIAN: c = np.random.normal( loc=v.var_range[0], scale=v.var_range[1]) - elif v.distribution == VariationDistribution.UNIFORM: + elif v.distribution == Distribution.UNIFORM: c = np.random.uniform( low=v.var_range[0], high=v.var_range[1]) else: raise NotImplementedError("Unknown distribution") - if v.method == VariationMethod.COEFFICIENT: + if v.method == Method.COEFFICIENT: e.attrib[v.attrib] = str(c * v.default) - elif v.method == VariationMethod.ABSOLUTE: + elif v.method == Method.ABSOLUTE: e.attrib[v.attrib] = str(c) else: raise NotImplementedError("Unknown method") diff --git a/rllab/dynamics_randomization/randomized_env.py b/rllab/dynamics_randomization/randomized_env.py index f6f298a2c..557c12273 100644 --- a/rllab/dynamics_randomization/randomized_env.py +++ b/rllab/dynamics_randomization/randomized_env.py @@ -1,10 +1,9 @@ -import os.path as osp - from mujoco_py import MjSim from rllab.core import Serializable from rllab.dynamics_randomization import MujocoModelGenerator from rllab.envs import Env +from rllab.envs.mujoco import osp from rllab.envs.mujoco.mujoco_env import MODEL_DIR diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py index 3e6ec2aa7..cdf865e1b 100644 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -5,12 +5,12 @@ from mujoco_py import load_model_from_xml from mujoco_py import MjSim from mujoco_py import MjViewer -from rllab.dynamics_randomization import Variations, VariationMethod -from rllab.dynamics_randomization import VariationDistribution +from rllab.dynamics_randomization import Variations, Method +from rllab.dynamics_randomization import Distribution +from rllab.envs.mujoco import osp import numpy as np import os -import os.path as osp import xml.etree.ElementTree as ET #Execute at the root of rllab @@ -25,21 +25,21 @@ variations.randomize().\ attribute("gear").\ at_xpath(".//motor[@name='a1']").\ - with_method(VariationMethod.COEFFICIENT).\ - sampled_from(VariationDistribution.UNIFORM).\ + with_method(Method.COEFFICIENT).\ + sampled_from(Distribution.UNIFORM).\ with_range(0.5, 1.5).\ randomize().\ attribute("gear").\ at_xpath(".//motor[@name='a2']").\ - sampled_from(VariationDistribution.UNIFORM).\ - with_method(VariationMethod.COEFFICIENT).\ + sampled_from(Distribution.UNIFORM).\ + with_method(Method.COEFFICIENT).\ with_range(0.5, 1.5) variations.randomize().\ attribute("damping").\ at_xpath(".//joint[@name='wr_js']").\ - with_method(VariationMethod.ABSOLUTE).\ - sampled_from(VariationDistribution.UNIFORM).\ + with_method(Method.ABSOLUTE).\ + sampled_from(Distribution.UNIFORM).\ with_range(5, 15) # Retrieve defaults and cache etree elems @@ -52,10 +52,10 @@ # Mutate model randomly for v in variations.get_list(): e = v.elem - if v.method == VariationMethod.COEFFICIENT: + if v.method == Method.COEFFICIENT: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c * v.default) - elif v.method == VariationMethod.ABSOLUTE: + elif v.method == Method.ABSOLUTE: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c) else: diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/dynamics_randomization/trpo_swimmer.py index aae3197c5..427d29ea6 100644 --- a/rllab/dynamics_randomization/trpo_swimmer.py +++ b/rllab/dynamics_randomization/trpo_swimmer.py @@ -5,15 +5,15 @@ from rllab.policies import GaussianMLPPolicy from rllab.dynamics_randomization import RandomizedEnv from rllab.dynamics_randomization import Variations -from rllab.dynamics_randomization import VariationMethod -from rllab.dynamics_randomization import VariationDistribution +from rllab.dynamics_randomization import Method +from rllab.dynamics_randomization import Distribution variations = Variations() variations.randomize().\ at_xpath(".//geom[@name='torso']").\ attribute("density").\ - with_method(VariationMethod.COEFFICIENT).\ - sampled_from(VariationDistribution.UNIFORM).\ + with_method(Method.COEFFICIENT).\ + sampled_from(Distribution.UNIFORM).\ with_range(0.5, 1.5) env = normalize(RandomizedEnv(SwimmerEnv(), variations)) diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index abe880010..9d718caf5 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -1,7 +1,7 @@ from enum import Enum -class VariationMethod(Enum): +class Method(Enum): """ The random coefficient is applied according to these methods. """ @@ -11,7 +11,7 @@ class VariationMethod(Enum): ABSOLUTE = 2 -class VariationDistribution(Enum): +class Distribution(Enum): """ The different ways to produce the random coefficient. """ @@ -36,6 +36,7 @@ def __init__(self): self._var_range = None self._elem = None self._default = None + self._mean_std = None @property def xpath(self): @@ -93,8 +94,16 @@ def var_range(self): def var_range(self, var_range): self._var_range = var_range + @property + def mean_std(self): + return self._var_range + + @mean_std.setter + def mean_std(self, var_range): + self._mean_std = mean_std -class Variations: + +class VariationsBase: """ The purpose of this class is to keep a list of all the variations that have to be applied to the RandomizedEnv class. @@ -102,8 +111,8 @@ class Variations: to set an attribute will return the instance of this class. """ - def __init__(self): - self._list = [] + def __init__(self, variations_list=[]): + self._list = variations_list def randomize(self): """ @@ -113,7 +122,7 @@ def randomize(self): """ variation = Variation() self._list.append(variation) - return self + return Variations(self._list) def at_xpath(self, xpath): """ @@ -150,7 +159,7 @@ def with_method(self, method): Parameters ---------- - method : VariationMethod + method : Method if equal to "absolute", it sets the dynamic parameter equal to the random coefficient obtained from the distribution, or if equal to "coefficient", it multiplies the default value provided @@ -160,6 +169,24 @@ def with_method(self, method): self._list[-1].method = method return self + def get_list(self): + """ + Returns a list with all the variations + + Returns + ------- + [Variation] + A list of all the dynamic parameters to find in the model XML + and the configuration to randomize each of them + """ + return self._list + + +class Variations(VariationsBase): + """ + Contains all the methods that have to be called once per variation entry. + """ + def sampled_from(self, distribution): """ Sets the distribution where the random coefficient is sampled from for @@ -167,18 +194,29 @@ def sampled_from(self, distribution): Parameters ---------- - distribution : VariationDistribution + distribution : Distribution it specifies the probability distribution used to obtain the random coefficient. """ if self._list: self._list[-1].distribution = distribution + + if distribution is Distribution.GAUSSIAN: + return VariationsGaussian(self._list) + elif distribution is Distribution.UNIFORM: + return VariationsUniform(self._list) return self + +class VariationsUniform(VariationsBase): + """ + Contains all the methods for variation entries with uniform distributions + """ + def with_range(self, low, high): """ Sets the range for the random coefficient for the last variation in - the list. + the list. Only to be used for Distribution.UNIFORM Parameters ---------- @@ -191,14 +229,24 @@ def with_range(self, low, high): self._list[-1].var_range = (low, high) return self - def get_list(self): + +class VariationsGaussian(Variations): + """ + Contains all the methods for variation entries with Gaussian distributions + """ + + def with_mean_std(self, mean, std_deviation): """ - Returns a list with all the variations + Sets the range for the random coefficient for the last variation in + the list. Only to be used for Distribution.GAUSSIAN - Returns - ------- - [Variation] - A list of all the dynamic parameters to find in the model XML - and the configuration to randomize each of them + Parameters + ---------- + mean : int + mean of the distribution + std_deviation : int + standard mean of the distribution """ - return self._list + if self._list: + self._list[-1].mean_std = (mean, std_deviation) + return self diff --git a/rllab/envs/mujoco/__init__.py b/rllab/envs/mujoco/__init__.py index 507fb0fc4..fd6ed5661 100644 --- a/rllab/envs/mujoco/__init__.py +++ b/rllab/envs/mujoco/__init__.py @@ -1,3 +1,4 @@ +import os.path as osp from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv from rllab.envs.mujoco.hopper_env import HopperEnv from rllab.envs.mujoco.inverted_double_pendulum_env import InvertedDoublePendulumEnv From cf45d0ca92f3fcb1e632454e34ad450acaa59de2 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Tue, 5 Jun 2018 12:54:24 -0700 Subject: [PATCH 16/30] Fix PEP8 formatting in file mujoco_model_gen.py --- rllab/dynamics_randomization/mujoco_model_gen.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 85d71397d..8b8d132da 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -89,11 +89,13 @@ def _generator_routine(self): for v in self._variations.get_list(): e = parsed_model.find(v.xpath) if e is None: - raise ValueError("Could not find node in the XML model: %s" % v.xpath) + raise ValueError( + "Could not find node in the XML model: %s" % v.xpath) v.elem = e if v.attrib not in e.attrib: - raise ValueError("Attribute %s doesn't exist in node %s" % (v.attrib, v.xpath)) + raise ValueError("Attribute %s doesn't exist in node %s" % + (v.attrib, v.xpath)) val = e.attrib[v.attrib].split(' ') if len(val) == 1: v.default = float(e.attrib[v.attrib]) From 735ed803e3cb79daf7891a675da4f7167b773109 Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Tue, 5 Jun 2018 14:15:24 -0700 Subject: [PATCH 17/30] Add miscellaneous changes to improve the code - Delete unused threading.Event in MujocoModelGenerator. - Correct error types in MujocoModelGenerator. - Renamed classes RandomizedEnv to RandomizedDynamicsEnv. - Delete wrong try-except in RandomizedDynamicsEnv. - Use randomize_dynamics() in the launcher. - Format method chains onto multiple lines. - Correct wrong param name in Variation. --- rllab/dynamics_randomization/__init__.py | 2 +- .../mujoco_model_gen.py | 11 +----- ...ized_env.py => randomized_dynamics_env.py} | 10 ++--- .../test_dynamics_rand.py | 39 ++++++++++--------- rllab/dynamics_randomization/trpo_swimmer.py | 19 +++++---- rllab/dynamics_randomization/variation.py | 2 +- 6 files changed, 38 insertions(+), 45 deletions(-) rename rllab/dynamics_randomization/{randomized_env.py => randomized_dynamics_env.py} (93%) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index 5bfb069f0..6f3d853d1 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -2,4 +2,4 @@ from rllab.dynamics_randomization.variation import Method from rllab.dynamics_randomization.variation import Distribution from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator -from rllab.dynamics_randomization.randomized_env import RandomizedEnv +from rllab.dynamics_randomization.randomized_dynamics_env import randomize_dynamics diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index 8b8d132da..ef8195cd7 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -39,12 +39,6 @@ def __init__(self, file_path, variations): target=self._generator_routine, daemon=True, name="Worker-Thread") # Reference to the generated model self._mujoco_model = None - # Communicates the calling thread with the worker thread by awaking - # the worker thread so as to generate a new model. - self._model_requested = Event() - # Communicates the worker thread with the calling thread by awaking - # the calling thread so as to retrieve the generated model. - self._model_ready = Event() # Event to stop the worker thread self._stop_event = Event() atexit.register(self.stop) @@ -76,7 +70,6 @@ def stop(self): while not self._models.empty(): self._models.get() - self._model_requested.set() self._stop_event.set() self._worker_thread.join() @@ -116,13 +109,13 @@ def _generator_routine(self): c = np.random.uniform( low=v.var_range[0], high=v.var_range[1]) else: - raise NotImplementedError("Unknown distribution") + raise ValueError("Unknown distribution") if v.method == Method.COEFFICIENT: e.attrib[v.attrib] = str(c * v.default) elif v.method == Method.ABSOLUTE: e.attrib[v.attrib] = str(c) else: - raise NotImplementedError("Unknown method") + raise ValueError("Unknown method") model_xml = etree.tostring(parsed_model.getroot()).decode("ascii") self._mujoco_model = load_model_from_xml(model_xml) diff --git a/rllab/dynamics_randomization/randomized_env.py b/rllab/dynamics_randomization/randomized_dynamics_env.py similarity index 93% rename from rllab/dynamics_randomization/randomized_env.py rename to rllab/dynamics_randomization/randomized_dynamics_env.py index 557c12273..2551d209e 100644 --- a/rllab/dynamics_randomization/randomized_env.py +++ b/rllab/dynamics_randomization/randomized_dynamics_env.py @@ -7,7 +7,7 @@ from rllab.envs.mujoco.mujoco_env import MODEL_DIR -class RandomizedEnv(Env, Serializable): +class RandomizedDynamicsEnv(Env, Serializable): """ This class is just a wrapper class for the MujocoEnv to perform the training using Dynamics Randomization. @@ -33,10 +33,7 @@ def reset(self): corresponding parameters in the MuJoCo environment class are set. """ - try: - self._wrapped_env.model = self._model_generator.get_model() - except AttributeError as e: - raise e + self._wrapped_env.model = self._model_generator.get_model() self._wrapped_env.sim = MjSim(self._wrapped_env.model) self._wrapped_env.data = self._wrapped_env.sim.data self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos @@ -83,3 +80,6 @@ def observation_space(self): @property def horizon(self): return self._wrapped_env.horizon + + +randomize_dynamics = RandomizedDynamicsEnv \ No newline at end of file diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py index cdf865e1b..cfc61e9a7 100644 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -13,7 +13,7 @@ import os import xml.etree.ElementTree as ET -#Execute at the root of rllab +# Execute at the root of rllab MUJOCO_PY_PATH = os.getcwd() TOSSER_XML = osp.join(MUJOCO_PY_PATH, "rllab/dynamics_randomization/tosser.xml") @@ -22,25 +22,26 @@ tosser = ET.parse(TOSSER_XML) variations = Variations() -variations.randomize().\ - attribute("gear").\ - at_xpath(".//motor[@name='a1']").\ - with_method(Method.COEFFICIENT).\ - sampled_from(Distribution.UNIFORM).\ - with_range(0.5, 1.5).\ - randomize().\ - attribute("gear").\ - at_xpath(".//motor[@name='a2']").\ - sampled_from(Distribution.UNIFORM).\ - with_method(Method.COEFFICIENT).\ - with_range(0.5, 1.5) +variations.randomize() \ + .attribute("gear") \ + .at_xpath(".//motor[@name='a1']") \ + .with_method(Method.COEFFICIENT) \ + .sampled_from(Distribution.UNIFORM) \ + .with_range(0.5, 1.5) -variations.randomize().\ - attribute("damping").\ - at_xpath(".//joint[@name='wr_js']").\ - with_method(Method.ABSOLUTE).\ - sampled_from(Distribution.UNIFORM).\ - with_range(5, 15) +variations.randomize() \ + .attribute("gear") \ + .at_xpath(".//motor[@name='a2']") \ + .sampled_from(Distribution.UNIFORM) \ + .with_method(Method.COEFFICIENT) \ + .with_range(0.5, 1.5) + +variations.randomize()\ + .attribute("damping") \ + .at_xpath(".//joint[@name='wr_js']") \ + .with_method(Method.ABSOLUTE) \ + .sampled_from(Distribution.UNIFORM) \ + .with_range(5, 15) # Retrieve defaults and cache etree elems for v in variations.get_list(): diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/dynamics_randomization/trpo_swimmer.py index 427d29ea6..f54b87ed4 100644 --- a/rllab/dynamics_randomization/trpo_swimmer.py +++ b/rllab/dynamics_randomization/trpo_swimmer.py @@ -1,22 +1,21 @@ from rllab.algos import TRPO from rllab.baselines import LinearFeatureBaseline from rllab.envs.mujoco import SwimmerEnv -from rllab.envs import normalize -from rllab.policies import GaussianMLPPolicy -from rllab.dynamics_randomization import RandomizedEnv +from rllab.dynamics_randomization import randomize_dynamics from rllab.dynamics_randomization import Variations from rllab.dynamics_randomization import Method from rllab.dynamics_randomization import Distribution +from rllab.policies import GaussianMLPPolicy variations = Variations() -variations.randomize().\ - at_xpath(".//geom[@name='torso']").\ - attribute("density").\ - with_method(Method.COEFFICIENT).\ - sampled_from(Distribution.UNIFORM).\ - with_range(0.5, 1.5) +variations.randomize() \ + .at_xpath(".//geom[@name='torso']") \ + .attribute("density") \ + .with_method(Method.COEFFICIENT) \ + .sampled_from(Distribution.UNIFORM) \ + .with_range(0.5, 1.5) -env = normalize(RandomizedEnv(SwimmerEnv(), variations)) +env = randomize_dynamics(SwimmerEnv(), variations) policy = GaussianMLPPolicy( env_spec=env.spec, diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index 9d718caf5..2b5388adc 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -99,7 +99,7 @@ def mean_std(self): return self._var_range @mean_std.setter - def mean_std(self, var_range): + def mean_std(self, mean_std): self._mean_std = mean_std From 25db0feb8fe617130cdc85ce70cce5aafd02e8fb Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Tue, 5 Jun 2018 15:52:45 -0700 Subject: [PATCH 18/30] Add local cache of elems in MujocoModelGenerator Solve the problem with v.elem=e, which calls the setter method in Variation. Replace this with a local cache of elements. Same with v.default in MujocoModelGenerator. --- rllab/dynamics_randomization/mujoco_model_gen.py | 16 +++++++++------- rllab/dynamics_randomization/variation.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index ef8195cd7..adc39ac26 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -79,39 +79,41 @@ def _generator_routine(self): """ # Initialize parsing of the model from XML parsed_model = etree.parse(self._file_path) + elem_cache = {} + default_cache = {} for v in self._variations.get_list(): e = parsed_model.find(v.xpath) if e is None: raise ValueError( "Could not find node in the XML model: %s" % v.xpath) - v.elem = e + elem_cache[v] = e if v.attrib not in e.attrib: raise ValueError("Attribute %s doesn't exist in node %s" % (v.attrib, v.xpath)) val = e.attrib[v.attrib].split(' ') if len(val) == 1: - v.default = float(e.attrib[v.attrib]) + default_cache[v] = float(e.attrib[v.attrib]) else: - v.default = np.array(list(map(float, val))) + default_cache[v] = np.array(list(map(float, val))) if len(v.var_range) != 2 * len(val): - raise AttributeError("Range shape != default value shape") + raise ValueError("Range shape != default value shape") # Generate model with randomized dynamic parameters while not self._stop_event.is_set(): for v in self._variations.get_list(): - e = v.elem + e = elem_cache[v] if v.distribution == Distribution.GAUSSIAN: c = np.random.normal( - loc=v.var_range[0], scale=v.var_range[1]) + loc=v.mean_std[0], scale=v.mean_std[1]) elif v.distribution == Distribution.UNIFORM: c = np.random.uniform( low=v.var_range[0], high=v.var_range[1]) else: raise ValueError("Unknown distribution") if v.method == Method.COEFFICIENT: - e.attrib[v.attrib] = str(c * v.default) + e.attrib[v.attrib] = str(c * default_cache[v]) elif v.method == Method.ABSOLUTE: e.attrib[v.attrib] = str(c) else: diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index 2b5388adc..adaa19cb5 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -96,7 +96,7 @@ def var_range(self, var_range): @property def mean_std(self): - return self._var_range + return self._mean_std @mean_std.setter def mean_std(self, mean_std): From 22d69af4a4d32c0977c36d67072f1b6f55688521 Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Wed, 6 Jun 2018 11:53:43 -0700 Subject: [PATCH 19/30] Add miscellaneous changes to improve the code - Remove the setter in Variation - Add check of parameter shape in MujocoModelGenerator - Fix some typo --- .../mujoco_model_gen.py | 9 +- .../test_dynamics_rand.py | 20 +- rllab/dynamics_randomization/trpo_swimmer.py | 3 +- rllab/dynamics_randomization/variation.py | 207 ++++++++---------- 4 files changed, 118 insertions(+), 121 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index adc39ac26..c07950ff0 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -97,9 +97,6 @@ def _generator_routine(self): else: default_cache[v] = np.array(list(map(float, val))) - if len(v.var_range) != 2 * len(val): - raise ValueError("Range shape != default value shape") - # Generate model with randomized dynamic parameters while not self._stop_event.is_set(): for v in self._variations.get_list(): @@ -112,6 +109,12 @@ def _generator_routine(self): low=v.var_range[0], high=v.var_range[1]) else: raise ValueError("Unknown distribution") + + if not isinstance(c, type(default_cache[v])): + raise ValueError( + "Sampled value %s don't match with default value %s" % + (c, default_cache[v])) + if v.method == Method.COEFFICIENT: e.attrib[v.attrib] = str(c * default_cache[v]) elif v.method == Method.ABSOLUTE: diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py index cfc61e9a7..3354c5147 100644 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ b/rllab/dynamics_randomization/test_dynamics_rand.py @@ -27,35 +27,40 @@ .at_xpath(".//motor[@name='a1']") \ .with_method(Method.COEFFICIENT) \ .sampled_from(Distribution.UNIFORM) \ - .with_range(0.5, 1.5) + .with_range(0.5, 1.5) \ + .add() variations.randomize() \ .attribute("gear") \ .at_xpath(".//motor[@name='a2']") \ .sampled_from(Distribution.UNIFORM) \ .with_method(Method.COEFFICIENT) \ - .with_range(0.5, 1.5) + .with_range(0.5, 1.5) \ + .add() variations.randomize()\ .attribute("damping") \ .at_xpath(".//joint[@name='wr_js']") \ .with_method(Method.ABSOLUTE) \ .sampled_from(Distribution.UNIFORM) \ - .with_range(5, 15) + .with_range(5, 15) \ + .add() +elem_cache = {} +default_cache = {} # Retrieve defaults and cache etree elems for v in variations.get_list(): e = tosser.find(v.xpath) - v.elem = e - v.default = float(e.attrib[v.attrib]) + elem_cache[v] = e + default_cache[v] = float(e.attrib[v.attrib]) for _ in range(1000): # Mutate model randomly for v in variations.get_list(): - e = v.elem + e = elem_cache[v] if v.method == Method.COEFFICIENT: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) - e.attrib[v.attrib] = str(c * v.default) + e.attrib[v.attrib] = str(c * default_cache[v]) elif v.method == Method.ABSOLUTE: c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) e.attrib[v.attrib] = str(c) @@ -64,6 +69,7 @@ # Reify model model_xml = ET.tostring(tosser.getroot()).decode("ascii") + print(model_xml) # Run model loop model = load_model_from_xml(model_xml) diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/dynamics_randomization/trpo_swimmer.py index f54b87ed4..cc1056a7f 100644 --- a/rllab/dynamics_randomization/trpo_swimmer.py +++ b/rllab/dynamics_randomization/trpo_swimmer.py @@ -13,7 +13,8 @@ .attribute("density") \ .with_method(Method.COEFFICIENT) \ .sampled_from(Distribution.UNIFORM) \ - .with_range(0.5, 1.5) + .with_range(0.5, 1.5) \ + .add() env = randomize_dynamics(SwimmerEnv(), variations) diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index adaa19cb5..e12194cef 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -15,7 +15,7 @@ class Distribution(Enum): """ The different ways to produce the random coefficient. """ - """ Guassian distribution """ + """ Gaussian distribution """ GAUSSIAN = 1 """ Uniform distribution """ UNIFORM = 2 @@ -28,101 +28,117 @@ class works more like a data structure to store the data fields required to find the dynamic parameter and the randomization to apply to it. """ - def __init__(self): - self._xpath = None - self._attrib = None - self._method = None - self._distribution = None - self._var_range = None - self._elem = None - self._default = None - self._mean_std = None + def __init__(self, + xpath=None, + attrib=None, + method=None, + distribution=None, + var_range=None, + mean_std=None, + elem=None, + default=None): + + if distribution is Distribution.GAUSSIAN and mean_std is None: + raise ValueError("Need to call with_mean_std when sampled from Gaussian") + + if distribution is Distribution.UNIFORM and var_range is None: + raise ValueError("Need to call with_range when sampled from Uniform") + + self._xpath = xpath + self._attrib = attrib + self._method = method + self._distribution = distribution + self._var_range = var_range + self._mean_std = mean_std + self._elem = elem + self._default = default @property def xpath(self): return self._xpath - @xpath.setter - def xpath(self, xpath): - self._xpath = xpath - @property def elem(self): return self._elem - @elem.setter - def elem(self, elem): - self._elem = elem - @property def attrib(self): return self._attrib - @attrib.setter - def attrib(self, attrib): - self._attrib = attrib - @property def default(self): return self._default - @default.setter - def default(self, default): - self._default = default - @property def method(self): return self._method - @method.setter - def method(self, method): - self._method = method - @property def distribution(self): return self._distribution - @distribution.setter - def distribution(self, distribution): - self._distribution = distribution - @property def var_range(self): return self._var_range - @var_range.setter - def var_range(self, var_range): - self._var_range = var_range - @property def mean_std(self): return self._mean_std - @mean_std.setter - def mean_std(self, mean_std): - self._mean_std = mean_std - -class VariationsBase: +class Variations: """ The purpose of this class is to keep a list of all the variations that have to be applied to the RandomizedEnv class. - The class implements the fluent interface pattern, so each call - to set an attribute will return the instance of this class. """ - def __init__(self, variations_list=[]): - self._list = variations_list + def __init__(self): + self._list = [] def randomize(self): """ - Creates a new entry in the list of variations. After calling this - method, call the setters for each of the attributes to be used with - this new entry using the fluent interface pattern. + Creates a VariationSpec instance to store values of dynamic parameters. + + Returns + ------- + VariationSpec + """ + return VariationSpec(self) + + def get_list(self): + """ + Returns a list with all the variations + + Returns + ------- + [Variation] + A list of all the dynamic parameters to find in the model XML + and the configuration to randomize each of them """ - variation = Variation() + return self._list + + def add(self, variation): self._list.append(variation) - return Variations(self._list) + + +class VariationSpec: + """ + The purpose of this class is to set the values of each dynamic + parameter. + The class implements the fluent interface pattern, so each call + to set an attribute will return the instance of this class. + """ + + def __init__(self, variations): + self._variations = variations + self._xpath = None + self._attrib = None + self._method = None + self._distribution = None + self._mean_std = None + self._var_range = None + self._elem = None + self._default = None def at_xpath(self, xpath): """ @@ -134,8 +150,7 @@ def at_xpath(self, xpath): path expression to identify a node within the XML file of the MuJoCo environment. """ - if self._list: - self._list[-1].xpath = xpath + self._xpath = xpath return self def attribute(self, attrib): @@ -148,8 +163,7 @@ def attribute(self, attrib): name of the dynamic parameter to randomize within the node defined in xpath. """ - if self._list: - self._list[-1].attrib = attrib + self._attrib = attrib return self def with_method(self, method): @@ -165,28 +179,9 @@ def with_method(self, method): if equal to "coefficient", it multiplies the default value provided in the XML file by the random coefficient. """ - if self._list: - self._list[-1].method = method + self._method = method return self - def get_list(self): - """ - Returns a list with all the variations - - Returns - ------- - [Variation] - A list of all the dynamic parameters to find in the model XML - and the configuration to randomize each of them - """ - return self._list - - -class Variations(VariationsBase): - """ - Contains all the methods that have to be called once per variation entry. - """ - def sampled_from(self, distribution): """ Sets the distribution where the random coefficient is sampled from for @@ -198,20 +193,23 @@ def sampled_from(self, distribution): it specifies the probability distribution used to obtain the random coefficient. """ - if self._list: - self._list[-1].distribution = distribution - - if distribution is Distribution.GAUSSIAN: - return VariationsGaussian(self._list) - elif distribution is Distribution.UNIFORM: - return VariationsUniform(self._list) + self._distribution = distribution return self + def with_mean_std(self, mean, std_deviation): + """ + Sets the range for the random coefficient for the last variation in + the list. Only to be used for Distribution.GAUSSIAN -class VariationsUniform(VariationsBase): - """ - Contains all the methods for variation entries with uniform distributions - """ + Parameters + ---------- + mean : int + mean of the distribution + std_deviation : int + standard mean of the distribution + """ + self._mean_std = (mean, std_deviation) + return self def with_range(self, low, high): """ @@ -225,28 +223,17 @@ def with_range(self, low, high): high : int exclusive high value of the range """ - if self._list: - self._list[-1].var_range = (low, high) + self._var_range = (low, high) return self - -class VariationsGaussian(Variations): - """ - Contains all the methods for variation entries with Gaussian distributions - """ - - def with_mean_std(self, mean, std_deviation): - """ - Sets the range for the random coefficient for the last variation in - the list. Only to be used for Distribution.GAUSSIAN - - Parameters - ---------- - mean : int - mean of the distribution - std_deviation : int - standard mean of the distribution - """ - if self._list: - self._list[-1].mean_std = (mean, std_deviation) - return self + def add(self): + self._variations.add( + Variation( + xpath=self._xpath, + attrib=self._attrib, + method=self._method, + distribution=self._distribution, + var_range=self._var_range, + mean_std=self._mean_std, + elem=self._elem, + default=self._default)) From a4c84425dbef5b040ff683159d2a14aab24a91de Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Wed, 6 Jun 2018 12:01:08 -0700 Subject: [PATCH 20/30] Fix PEP formatting with YAPF --- rllab/dynamics_randomization/variation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rllab/dynamics_randomization/variation.py b/rllab/dynamics_randomization/variation.py index e12194cef..37149d114 100644 --- a/rllab/dynamics_randomization/variation.py +++ b/rllab/dynamics_randomization/variation.py @@ -39,10 +39,12 @@ def __init__(self, default=None): if distribution is Distribution.GAUSSIAN and mean_std is None: - raise ValueError("Need to call with_mean_std when sampled from Gaussian") + raise ValueError( + "Need to call with_mean_std when sampled from Gaussian") if distribution is Distribution.UNIFORM and var_range is None: - raise ValueError("Need to call with_range when sampled from Uniform") + raise ValueError( + "Need to call with_range when sampled from Uniform") self._xpath = xpath self._attrib = attrib From 43594ea62e39d8b4435fb283f1950130ee91032c Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Wed, 6 Jun 2018 13:54:18 -0700 Subject: [PATCH 21/30] Add error handling to MujocoModelGenerator - Add more detailed information in handling the shape of the sampled value with the default value - Add timeout in the Queue.get() in MujocoModelGenerator so the main thread will catch error raised in worker thread --- rllab/dynamics_randomization/mujoco_model_gen.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/dynamics_randomization/mujoco_model_gen.py index c07950ff0..b0e57d29a 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/dynamics_randomization/mujoco_model_gen.py @@ -1,4 +1,5 @@ import atexit +import queue from queue import Queue from threading import Event from threading import Thread @@ -59,7 +60,11 @@ def get_model(self): # If worker thread terminates because of an error, terminates main thread raise ChildProcessError("Error raised in Worker-Thread") - return self._models.get() + try: + return self._models.get(timeout=1) + except queue.Empty: + # If the queue is empty after 1s, there's something wrong in the worker thread + raise ChildProcessError("Error raised in Worker-Thread") def stop(self): """ @@ -110,10 +115,11 @@ def _generator_routine(self): else: raise ValueError("Unknown distribution") - if not isinstance(c, type(default_cache[v])): + # Check if the sampled value has the same shape with default value + if np.array(c).shape != np.array(default_cache[v]).shape: raise ValueError( - "Sampled value %s don't match with default value %s" % - (c, default_cache[v])) + "Sampled value you input %s don't match with default value %s in the xml node %s" + % (c, default_cache[v], v.xpath)) if v.method == Method.COEFFICIENT: e.attrib[v.attrib] = str(c * default_cache[v]) From 4221002177b3ef7db3aa6ef36f06b5055728498e Mon Sep 17 00:00:00 2001 From: CatherineSue Date: Wed, 6 Jun 2018 14:17:31 -0700 Subject: [PATCH 22/30] Rename RandomizedDynamicsEnv to RandomizedEnv --- rllab/dynamics_randomization/__init__.py | 2 +- .../{randomized_dynamics_env.py => randomized_env.py} | 4 ++-- rllab/dynamics_randomization/trpo_swimmer.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename rllab/dynamics_randomization/{randomized_dynamics_env.py => randomized_env.py} (96%) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py index 6f3d853d1..d97333cb2 100644 --- a/rllab/dynamics_randomization/__init__.py +++ b/rllab/dynamics_randomization/__init__.py @@ -2,4 +2,4 @@ from rllab.dynamics_randomization.variation import Method from rllab.dynamics_randomization.variation import Distribution from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator -from rllab.dynamics_randomization.randomized_dynamics_env import randomize_dynamics +from rllab.dynamics_randomization.randomized_env import randomize diff --git a/rllab/dynamics_randomization/randomized_dynamics_env.py b/rllab/dynamics_randomization/randomized_env.py similarity index 96% rename from rllab/dynamics_randomization/randomized_dynamics_env.py rename to rllab/dynamics_randomization/randomized_env.py index 2551d209e..98da461da 100644 --- a/rllab/dynamics_randomization/randomized_dynamics_env.py +++ b/rllab/dynamics_randomization/randomized_env.py @@ -7,7 +7,7 @@ from rllab.envs.mujoco.mujoco_env import MODEL_DIR -class RandomizedDynamicsEnv(Env, Serializable): +class RandomizedEnv(Env, Serializable): """ This class is just a wrapper class for the MujocoEnv to perform the training using Dynamics Randomization. @@ -82,4 +82,4 @@ def horizon(self): return self._wrapped_env.horizon -randomize_dynamics = RandomizedDynamicsEnv \ No newline at end of file +randomize = RandomizedEnv \ No newline at end of file diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/dynamics_randomization/trpo_swimmer.py index cc1056a7f..340a3fe4f 100644 --- a/rllab/dynamics_randomization/trpo_swimmer.py +++ b/rllab/dynamics_randomization/trpo_swimmer.py @@ -1,7 +1,7 @@ from rllab.algos import TRPO from rllab.baselines import LinearFeatureBaseline from rllab.envs.mujoco import SwimmerEnv -from rllab.dynamics_randomization import randomize_dynamics +from rllab.dynamics_randomization import randomize from rllab.dynamics_randomization import Variations from rllab.dynamics_randomization import Method from rllab.dynamics_randomization import Distribution @@ -16,7 +16,7 @@ .with_range(0.5, 1.5) \ .add() -env = randomize_dynamics(SwimmerEnv(), variations) +env = randomize(SwimmerEnv(), variations) policy = GaussianMLPPolicy( env_spec=env.spec, From 133e2cb3b1254f889bb90d9ca64a176859e417b8 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Wed, 6 Jun 2018 17:55:10 -0700 Subject: [PATCH 23/30] Fix mujoco exception caused by cached propery action_space The cached property action_space found in mujoco_env.py produces an error in Linux for dynamics randomization. The idea behind the cached property is to avoid doing an expensive computation several times, so for regular execution, action_space is obtained from the model that is used for the entire training once, improving the performance. However, for dynamics randomization there's a new model for each episode, and that requires that the action_space is updated accordingly, but that does not happen because it's cached. To update action_space and not make an invasive change, the attribute is invalidated for each reset in the RandomizedEnv class. --- rllab/dynamics_randomization/randomized_env.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rllab/dynamics_randomization/randomized_env.py b/rllab/dynamics_randomization/randomized_env.py index 98da461da..43ac3b02b 100644 --- a/rllab/dynamics_randomization/randomized_env.py +++ b/rllab/dynamics_randomization/randomized_env.py @@ -34,6 +34,8 @@ def reset(self): set. """ self._wrapped_env.model = self._model_generator.get_model() + if hasattr(self._wrapped_env, 'action_space'): + del self._wrapped_env.__dict__['action_space'] self._wrapped_env.sim = MjSim(self._wrapped_env.model) self._wrapped_env.data = self._wrapped_env.sim.data self._wrapped_env.init_qpos = self._wrapped_env.sim.data.qpos From 27ec7489ec905737c103e138402cfa172913853d Mon Sep 17 00:00:00 2001 From: Chang Date: Thu, 7 Jun 2018 07:29:14 -0700 Subject: [PATCH 24/30] Add miscellaneous changes to improve the code - Move the dynamics_randomization package to rllab.envs.mujoco. - Delete tosser.xml, use xml in rllab/vendor/mujoco_model for test - The old test_dynamics_rand.py only tests for the Variations API, so rewrite it to test for both Variations API and RandomizedEnv. - Reorder imports. - Delete import os.path as osp in rllab/envs/mujoco/__init__.py. Previously added by mistake. --- rllab/dynamics_randomization/__init__.py | 5 -- .../test_dynamics_rand.py | 89 ------------------- rllab/dynamics_randomization/tosser.xml | 86 ------------------ rllab/envs/mujoco/__init__.py | 1 - .../mujoco/dynamics_randomization/__init__.py | 5 ++ .../mujoco_model_gen.py | 4 +- .../dynamics_randomization/randomized_env.py | 5 +- .../test_dynamics_rand.py | 21 +++++ .../dynamics_randomization/trpo_swimmer.py | 8 +- .../dynamics_randomization/variation.py | 0 10 files changed, 35 insertions(+), 189 deletions(-) delete mode 100644 rllab/dynamics_randomization/__init__.py delete mode 100644 rllab/dynamics_randomization/test_dynamics_rand.py delete mode 100644 rllab/dynamics_randomization/tosser.xml create mode 100644 rllab/envs/mujoco/dynamics_randomization/__init__.py rename rllab/{ => envs/mujoco}/dynamics_randomization/mujoco_model_gen.py (97%) rename rllab/{ => envs/mujoco}/dynamics_randomization/randomized_env.py (96%) create mode 100644 rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py rename rllab/{ => envs/mujoco}/dynamics_randomization/trpo_swimmer.py (77%) rename rllab/{ => envs/mujoco}/dynamics_randomization/variation.py (100%) diff --git a/rllab/dynamics_randomization/__init__.py b/rllab/dynamics_randomization/__init__.py deleted file mode 100644 index d97333cb2..000000000 --- a/rllab/dynamics_randomization/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from rllab.dynamics_randomization.variation import Variations -from rllab.dynamics_randomization.variation import Method -from rllab.dynamics_randomization.variation import Distribution -from rllab.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator -from rllab.dynamics_randomization.randomized_env import randomize diff --git a/rllab/dynamics_randomization/test_dynamics_rand.py b/rllab/dynamics_randomization/test_dynamics_rand.py deleted file mode 100644 index 3354c5147..000000000 --- a/rllab/dynamics_randomization/test_dynamics_rand.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 -""" -Benchmark model mutation for dynamics randomization -""" -from mujoco_py import load_model_from_xml -from mujoco_py import MjSim -from mujoco_py import MjViewer -from rllab.dynamics_randomization import Variations, Method -from rllab.dynamics_randomization import Distribution -from rllab.envs.mujoco import osp - -import numpy as np -import os -import xml.etree.ElementTree as ET - -# Execute at the root of rllab -MUJOCO_PY_PATH = os.getcwd() -TOSSER_XML = osp.join(MUJOCO_PY_PATH, - "rllab/dynamics_randomization/tosser.xml") - -# Load original model text into memory -tosser = ET.parse(TOSSER_XML) - -variations = Variations() -variations.randomize() \ - .attribute("gear") \ - .at_xpath(".//motor[@name='a1']") \ - .with_method(Method.COEFFICIENT) \ - .sampled_from(Distribution.UNIFORM) \ - .with_range(0.5, 1.5) \ - .add() - -variations.randomize() \ - .attribute("gear") \ - .at_xpath(".//motor[@name='a2']") \ - .sampled_from(Distribution.UNIFORM) \ - .with_method(Method.COEFFICIENT) \ - .with_range(0.5, 1.5) \ - .add() - -variations.randomize()\ - .attribute("damping") \ - .at_xpath(".//joint[@name='wr_js']") \ - .with_method(Method.ABSOLUTE) \ - .sampled_from(Distribution.UNIFORM) \ - .with_range(5, 15) \ - .add() - -elem_cache = {} -default_cache = {} -# Retrieve defaults and cache etree elems -for v in variations.get_list(): - e = tosser.find(v.xpath) - elem_cache[v] = e - default_cache[v] = float(e.attrib[v.attrib]) - -for _ in range(1000): - # Mutate model randomly - for v in variations.get_list(): - e = elem_cache[v] - if v.method == Method.COEFFICIENT: - c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) - e.attrib[v.attrib] = str(c * default_cache[v]) - elif v.method == Method.ABSOLUTE: - c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) - e.attrib[v.attrib] = str(c) - else: - raise NotImplementedError("Unknown method") - - # Reify model - model_xml = ET.tostring(tosser.getroot()).decode("ascii") - print(model_xml) - - # Run model loop - model = load_model_from_xml(model_xml) - sim = MjSim(model) - #viewer = MjViewer(sim) - - #sim_state = sim.get_state() - - #sim.set_state(sim_state) - - for i in range(1000): - if i < 150: - sim.data.ctrl[:] = 0.0 - else: - sim.data.ctrl[:] = -1.0 - sim.step() - #viewer.render() diff --git a/rllab/dynamics_randomization/tosser.xml b/rllab/dynamics_randomization/tosser.xml deleted file mode 100644 index 39cba60a4..000000000 --- a/rllab/dynamics_randomization/tosser.xml +++ /dev/null @@ -1,86 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/rllab/envs/mujoco/__init__.py b/rllab/envs/mujoco/__init__.py index fd6ed5661..507fb0fc4 100644 --- a/rllab/envs/mujoco/__init__.py +++ b/rllab/envs/mujoco/__init__.py @@ -1,4 +1,3 @@ -import os.path as osp from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv from rllab.envs.mujoco.hopper_env import HopperEnv from rllab.envs.mujoco.inverted_double_pendulum_env import InvertedDoublePendulumEnv diff --git a/rllab/envs/mujoco/dynamics_randomization/__init__.py b/rllab/envs/mujoco/dynamics_randomization/__init__.py new file mode 100644 index 000000000..9ffd42588 --- /dev/null +++ b/rllab/envs/mujoco/dynamics_randomization/__init__.py @@ -0,0 +1,5 @@ +from rllab.envs.mujoco.dynamics_randomization.variation import Variations +from rllab.envs.mujoco.dynamics_randomization.variation import Method +from rllab.envs.mujoco.dynamics_randomization.variation import Distribution +from rllab.envs.mujoco.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator +from rllab.envs.mujoco.dynamics_randomization.randomized_env import randomize diff --git a/rllab/dynamics_randomization/mujoco_model_gen.py b/rllab/envs/mujoco/dynamics_randomization/mujoco_model_gen.py similarity index 97% rename from rllab/dynamics_randomization/mujoco_model_gen.py rename to rllab/envs/mujoco/dynamics_randomization/mujoco_model_gen.py index b0e57d29a..9459592cf 100644 --- a/rllab/dynamics_randomization/mujoco_model_gen.py +++ b/rllab/envs/mujoco/dynamics_randomization/mujoco_model_gen.py @@ -8,8 +8,8 @@ from mujoco_py import load_model_from_xml import numpy as np -from rllab.dynamics_randomization import Distribution -from rllab.dynamics_randomization import Method +from rllab.envs.mujoco.dynamics_randomization import Distribution +from rllab.envs.mujoco.dynamics_randomization import Method class MujocoModelGenerator: diff --git a/rllab/dynamics_randomization/randomized_env.py b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py similarity index 96% rename from rllab/dynamics_randomization/randomized_env.py rename to rllab/envs/mujoco/dynamics_randomization/randomized_env.py index 43ac3b02b..8ccad4407 100644 --- a/rllab/dynamics_randomization/randomized_env.py +++ b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py @@ -1,9 +1,10 @@ +import os.path as osp + from mujoco_py import MjSim from rllab.core import Serializable -from rllab.dynamics_randomization import MujocoModelGenerator from rllab.envs import Env -from rllab.envs.mujoco import osp +from rllab.envs.mujoco.dynamics_randomization import MujocoModelGenerator from rllab.envs.mujoco.mujoco_env import MODEL_DIR diff --git a/rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py b/rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py new file mode 100644 index 000000000..7d363844c --- /dev/null +++ b/rllab/envs/mujoco/dynamics_randomization/test_dynamics_rand.py @@ -0,0 +1,21 @@ +from rllab.envs.mujoco import SwimmerEnv +from rllab.envs.mujoco.dynamics_randomization import Distribution +from rllab.envs.mujoco.dynamics_randomization import Method +from rllab.envs.mujoco.dynamics_randomization import randomize +from rllab.envs.mujoco.dynamics_randomization import Variations + +variations = Variations() +variations.randomize() \ + .at_xpath(".//geom[@name='torso']") \ + .attribute("density") \ + .with_method(Method.COEFFICIENT) \ + .sampled_from(Distribution.UNIFORM) \ + .with_range(0.5, 1.5) \ + .add() + +env = randomize(SwimmerEnv(), variations) + +for i in range(1000): + env.reset() + for j in range(1000): + env.step(env.action_space.sample()) diff --git a/rllab/dynamics_randomization/trpo_swimmer.py b/rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py similarity index 77% rename from rllab/dynamics_randomization/trpo_swimmer.py rename to rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py index 340a3fe4f..7b23f8fc4 100644 --- a/rllab/dynamics_randomization/trpo_swimmer.py +++ b/rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py @@ -1,10 +1,10 @@ from rllab.algos import TRPO from rllab.baselines import LinearFeatureBaseline from rllab.envs.mujoco import SwimmerEnv -from rllab.dynamics_randomization import randomize -from rllab.dynamics_randomization import Variations -from rllab.dynamics_randomization import Method -from rllab.dynamics_randomization import Distribution +from rllab.envs.mujoco.dynamics_randomization import Distribution +from rllab.envs.mujoco.dynamics_randomization import Method +from rllab.envs.mujoco.dynamics_randomization import randomize +from rllab.envs.mujoco.dynamics_randomization import Variations from rllab.policies import GaussianMLPPolicy variations = Variations() diff --git a/rllab/dynamics_randomization/variation.py b/rllab/envs/mujoco/dynamics_randomization/variation.py similarity index 100% rename from rllab/dynamics_randomization/variation.py rename to rllab/envs/mujoco/dynamics_randomization/variation.py From f18780e85440db736fdf64873ca45cb5458871b9 Mon Sep 17 00:00:00 2001 From: Chang Date: Thu, 7 Jun 2018 10:18:32 -0700 Subject: [PATCH 25/30] Rename mujoco_model_gen to mujoco_model_generator Package names should follow class names. --- rllab/envs/mujoco/dynamics_randomization/__init__.py | 2 +- .../{mujoco_model_gen.py => mujoco_model_generator.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename rllab/envs/mujoco/dynamics_randomization/{mujoco_model_gen.py => mujoco_model_generator.py} (100%) diff --git a/rllab/envs/mujoco/dynamics_randomization/__init__.py b/rllab/envs/mujoco/dynamics_randomization/__init__.py index 9ffd42588..87a34f2f5 100644 --- a/rllab/envs/mujoco/dynamics_randomization/__init__.py +++ b/rllab/envs/mujoco/dynamics_randomization/__init__.py @@ -1,5 +1,5 @@ from rllab.envs.mujoco.dynamics_randomization.variation import Variations from rllab.envs.mujoco.dynamics_randomization.variation import Method from rllab.envs.mujoco.dynamics_randomization.variation import Distribution -from rllab.envs.mujoco.dynamics_randomization.mujoco_model_gen import MujocoModelGenerator +from rllab.envs.mujoco.dynamics_randomization.mujoco_model_generator import MujocoModelGenerator from rllab.envs.mujoco.dynamics_randomization.randomized_env import randomize diff --git a/rllab/envs/mujoco/dynamics_randomization/mujoco_model_gen.py b/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py similarity index 100% rename from rllab/envs/mujoco/dynamics_randomization/mujoco_model_gen.py rename to rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py From 62331f2ae31e8c504a83cb6165ff7bbef9e2be1e Mon Sep 17 00:00:00 2001 From: Chang Date: Thu, 7 Jun 2018 10:27:41 -0700 Subject: [PATCH 26/30] Delete trpo_swimmer in dynamics_randomization test_dynamics_rand.py is enough for testing. Remove trpo_swimmer.py --- .../dynamics_randomization/trpo_swimmer.py | 39 ------------------- 1 file changed, 39 deletions(-) delete mode 100644 rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py diff --git a/rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py b/rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py deleted file mode 100644 index 7b23f8fc4..000000000 --- a/rllab/envs/mujoco/dynamics_randomization/trpo_swimmer.py +++ /dev/null @@ -1,39 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs.mujoco import SwimmerEnv -from rllab.envs.mujoco.dynamics_randomization import Distribution -from rllab.envs.mujoco.dynamics_randomization import Method -from rllab.envs.mujoco.dynamics_randomization import randomize -from rllab.envs.mujoco.dynamics_randomization import Variations -from rllab.policies import GaussianMLPPolicy - -variations = Variations() -variations.randomize() \ - .at_xpath(".//geom[@name='torso']") \ - .attribute("density") \ - .with_method(Method.COEFFICIENT) \ - .sampled_from(Distribution.UNIFORM) \ - .with_range(0.5, 1.5) \ - .add() - -env = randomize(SwimmerEnv(), variations) - -policy = GaussianMLPPolicy( - env_spec=env.spec, - # The neural network policy should have two hidden layers, each with 32 hidden units. - hidden_sizes=(32, 32)) - -baseline = LinearFeatureBaseline(env_spec=env.spec) - -algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=500, - n_itr=40, - discount=0.99, - step_size=0.01, - # plot=True -) -algo.train() From f154042a251df985dd807b0d5f7842965aa1e234 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Thu, 7 Jun 2018 12:40:07 -0700 Subject: [PATCH 27/30] Initialize variations and generation of XML string in Variations The code to initialize the variations and to generate the randomized parameters was moved into the Variations class. This will keep all the current code related to variations in the same file to improve the API of dynamics randomization, and will enable a more modular code for further features in the module. --- .../mujoco_model_generator.py | 53 +---------- .../dynamics_randomization/variation.py | 88 +++++++++++++++++-- 2 files changed, 86 insertions(+), 55 deletions(-) diff --git a/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py b/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py index 9459592cf..b15d4e644 100644 --- a/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py +++ b/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py @@ -4,12 +4,7 @@ from threading import Event from threading import Thread -from lxml import etree from mujoco_py import load_model_from_xml -import numpy as np - -from rllab.envs.mujoco.dynamics_randomization import Distribution -from rllab.envs.mujoco.dynamics_randomization import Method class MujocoModelGenerator: @@ -83,51 +78,9 @@ def _generator_routine(self): Routine of the worker thread in this class. """ # Initialize parsing of the model from XML - parsed_model = etree.parse(self._file_path) - elem_cache = {} - default_cache = {} - for v in self._variations.get_list(): - e = parsed_model.find(v.xpath) - if e is None: - raise ValueError( - "Could not find node in the XML model: %s" % v.xpath) - elem_cache[v] = e - - if v.attrib not in e.attrib: - raise ValueError("Attribute %s doesn't exist in node %s" % - (v.attrib, v.xpath)) - val = e.attrib[v.attrib].split(' ') - if len(val) == 1: - default_cache[v] = float(e.attrib[v.attrib]) - else: - default_cache[v] = np.array(list(map(float, val))) - + self._variations.initialize_variations(self._file_path) # Generate model with randomized dynamic parameters while not self._stop_event.is_set(): - for v in self._variations.get_list(): - e = elem_cache[v] - if v.distribution == Distribution.GAUSSIAN: - c = np.random.normal( - loc=v.mean_std[0], scale=v.mean_std[1]) - elif v.distribution == Distribution.UNIFORM: - c = np.random.uniform( - low=v.var_range[0], high=v.var_range[1]) - else: - raise ValueError("Unknown distribution") - - # Check if the sampled value has the same shape with default value - if np.array(c).shape != np.array(default_cache[v]).shape: - raise ValueError( - "Sampled value you input %s don't match with default value %s in the xml node %s" - % (c, default_cache[v], v.xpath)) - - if v.method == Method.COEFFICIENT: - e.attrib[v.attrib] = str(c * default_cache[v]) - elif v.method == Method.ABSOLUTE: - e.attrib[v.attrib] = str(c) - else: - raise ValueError("Unknown method") - - model_xml = etree.tostring(parsed_model.getroot()).decode("ascii") - self._mujoco_model = load_model_from_xml(model_xml) + self._mujoco_model = load_model_from_xml( + self._variations.get_randomized_xml_model()) self._models.put(self._mujoco_model) diff --git a/rllab/envs/mujoco/dynamics_randomization/variation.py b/rllab/envs/mujoco/dynamics_randomization/variation.py index 37149d114..9d271fd65 100644 --- a/rllab/envs/mujoco/dynamics_randomization/variation.py +++ b/rllab/envs/mujoco/dynamics_randomization/variation.py @@ -1,4 +1,6 @@ from enum import Enum +from lxml import etree +import numpy as np class Method(Enum): @@ -23,9 +25,9 @@ class Distribution(Enum): class Variation: """ - Each dynamic parameter to randomize is represented by a Variation. This + Each dynamic parameter to be randomized is represented by a Variation. This class works more like a data structure to store the data fields required - to find the dynamic parameter and the randomization to apply to it. + to find the corresponding dynamic parameter and apply the randomization to it. """ def __init__(self, @@ -91,11 +93,15 @@ def mean_std(self): class Variations: """ The purpose of this class is to keep a list of all the variations - that have to be applied to the RandomizedEnv class. + that have to be applied to the randomized environment, as well as + the methods to put the variations in the corresponding XML file. """ def __init__(self): self._list = [] + self._elem_cache = {} + self._default_cache = {} + self._parsed_model = None def randomize(self): """ @@ -107,6 +113,74 @@ def randomize(self): """ return VariationSpec(self) + def initialize_variations(self, xml_file_path): + """ + Once all the variations have been added to the list member of this + class, this method finds each variation as a node within the model + defined in the XML file. + For each variation, a reference to the data structure for the model + is created, as well as the default values of each parameter found + int the model. + + Parameters + ---------- + xml_file_path : string + absolute path to the location of the XML file that contains the + model + """ + self._parsed_model = etree.parse(xml_file_path) + self._elem_cache = {} + self._default_cache = {} + for v in self._list: + e = self._parsed_model.find(v.xpath) + if e is None: + raise ValueError( + "Could not find node in the XML model: %s" % v.xpath) + self._elem_cache[v] = e + + if v.attrib not in e.attrib: + raise ValueError("Attribute %s doesn't exist in node %s" % + (v.attrib, v.xpath)) + val = e.attrib[v.attrib].split(' ') + if len(val) == 1: + self._default_cache[v] = float(e.attrib[v.attrib]) + else: + self._default_cache[v] = np.array(list(map(float, val))) + + def get_randomized_xml_model(self): + """ + After all the variations have been initialized, this method will + generate a XML string with randomized dynamic parameters. + + Returns + ---------- + string + XML string of the model with the randomized dynamic parameters + """ + for v in self._list: + e = self._elem_cache[v] + if v.distribution == Distribution.GAUSSIAN: + c = np.random.normal(loc=v.mean_std[0], scale=v.mean_std[1]) + elif v.distribution == Distribution.UNIFORM: + c = np.random.uniform(low=v.var_range[0], high=v.var_range[1]) + else: + raise ValueError("Unknown distribution") + + # Check if the sampled value has the same shape with default value + if np.array(c).shape != np.array(self._default_cache[v]).shape: + raise ValueError( + "Sampled value you input %s don't match with default value %s in the xml node %s" + % (c, self._default_cache[v], v.xpath)) + + if v.method == Method.COEFFICIENT: + e.attrib[v.attrib] = str(c * self._default_cache[v]) + elif v.method == Method.ABSOLUTE: + e.attrib[v.attrib] = str(c) + else: + raise ValueError("Unknown method") + + return etree.tostring(self._parsed_model.getroot()).decode("ascii") + def get_list(self): """ Returns a list with all the variations @@ -114,8 +188,8 @@ def get_list(self): Returns ------- [Variation] - A list of all the dynamic parameters to find in the model XML - and the configuration to randomize each of them + A list of all the dynamic parameters to find in the model XML + and the configuration to randomize each of them """ return self._list @@ -229,6 +303,10 @@ def with_range(self, low, high): return self def add(self): + """ + Adds the variation defined by the fluent interface up to this call + to the list of variations to be randomized. + """ self._variations.add( Variation( xpath=self._xpath, From a1b9f2acd1777d5288fa86adeb80263420851984 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Thu, 7 Jun 2018 20:32:59 -0700 Subject: [PATCH 28/30] Return to single thread, add default values and fix imports There is a bottleneck at function load_model_from_xml from mujoco-py when using multi threading. In a single thread, a call to this function takes units of milliseconds, while in multi threading it takes tens of milliseconds. Maybe this is due to internal data structures that are required for both loading the model in the worker thread and performing the simulations in the main thread, causing the delay in load_model_from_xml and other functions that can be perceived in the cumulative time obtained by the profiler by running test_dynamics_rand.py. Due to this poor performance, the file mujoco_model_gen.py was removed since it serves no purpose now that the variations.py file contains methods to process the XML file, and the calls to obtain the randomized model are done directly in the class RandomizedEnv. --- .../mujoco_model_generator.py | 86 ------------------- .../dynamics_randomization/randomized_env.py | 9 +- 2 files changed, 4 insertions(+), 91 deletions(-) delete mode 100644 rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py diff --git a/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py b/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py deleted file mode 100644 index b15d4e644..000000000 --- a/rllab/envs/mujoco/dynamics_randomization/mujoco_model_generator.py +++ /dev/null @@ -1,86 +0,0 @@ -import atexit -import queue -from queue import Queue -from threading import Event -from threading import Thread - -from mujoco_py import load_model_from_xml - - -class MujocoModelGenerator: - """ - A worker thread to produce to MuJoCo models with randomized dynamic - parameters, which are specified by the users of rllab with the class - Variations. - """ - - def __init__(self, file_path, variations): - """ - Starts all the member fields of the class and the worker thread. - Parameters - ---------- - file_path : string - The absolute path to the XML file that contains the MuJoCo - model. - variations: Variations - An list of Variation objects that indicate the dynamic parameters - to randomize in the XML file. - """ - self._variations = variations - self._file_path = file_path - # Synchronized queue to store mujoco_models - self._models = Queue(maxsize=10) - # Worker Thread - self._worker_thread = Thread( - target=self._generator_routine, daemon=True, name="Worker-Thread") - # Reference to the generated model - self._mujoco_model = None - # Event to stop the worker thread - self._stop_event = Event() - atexit.register(self.stop) - self._worker_thread.start() - - def get_model(self): - """ - Gets the MuJoCo model produced by the worker thread in this class. - This call may block in case the calling thread asks for the model before - the worker thread has finished. - Returns - ------- - PyMjModel - A MuJoCo model with randomized dynamic parameters specified by the - user in this class. - """ - if not self._worker_thread.is_alive(): - # If worker thread terminates because of an error, terminates main thread - raise ChildProcessError("Error raised in Worker-Thread") - - try: - return self._models.get(timeout=1) - except queue.Empty: - # If the queue is empty after 1s, there's something wrong in the worker thread - raise ChildProcessError("Error raised in Worker-Thread") - - def stop(self): - """ - Stops the worker thread. This method has to be called when the corresponding - randomized environment is terminated or when the training is interrupted. - """ - if self._worker_thread.is_alive(): - while not self._models.empty(): - self._models.get() - - self._stop_event.set() - self._worker_thread.join() - - def _generator_routine(self): - """ - Routine of the worker thread in this class. - """ - # Initialize parsing of the model from XML - self._variations.initialize_variations(self._file_path) - # Generate model with randomized dynamic parameters - while not self._stop_event.is_set(): - self._mujoco_model = load_model_from_xml( - self._variations.get_randomized_xml_model()) - self._models.put(self._mujoco_model) diff --git a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py index 8ccad4407..6a91a3a2f 100644 --- a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py +++ b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py @@ -1,10 +1,10 @@ import os.path as osp from mujoco_py import MjSim +from mujoco_py import load_model_from_xml from rllab.core import Serializable from rllab.envs import Env -from rllab.envs.mujoco.dynamics_randomization import MujocoModelGenerator from rllab.envs.mujoco.mujoco_env import MODEL_DIR @@ -25,8 +25,7 @@ def __init__(self, mujoco_env, variations): self._wrapped_env = mujoco_env self._variations = variations self._file_path = osp.join(MODEL_DIR, mujoco_env.FILE) - self._model_generator = MujocoModelGenerator(self._file_path, - variations) + self._variations.initialize_variations(self._file_path) def reset(self): """ @@ -34,7 +33,8 @@ def reset(self): corresponding parameters in the MuJoCo environment class are set. """ - self._wrapped_env.model = self._model_generator.get_model() + self._wrapped_env.model = load_model_from_xml( + self._variations.get_randomized_xml_model()) if hasattr(self._wrapped_env, 'action_space'): del self._wrapped_env.__dict__['action_space'] self._wrapped_env.sim = MjSim(self._wrapped_env.model) @@ -65,7 +65,6 @@ def terminate(self): Besides regular termination, the MuJoCo model generator is stopped. """ - self._model_generator.stop() self._wrapped_env.terminate() @property From ac0d449402a608a9ebbfbc986e70a54c83bdefa2 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Thu, 7 Jun 2018 20:59:56 -0700 Subject: [PATCH 29/30] Sort modules in the package, add newline at EOF and defaults The changes in this commit include: - The modules in the __ini__.py file were sorted alphabetically. - The new line at the end of file was added in randomized_env.py. - Default values were assigned for VariationSpec, specifically for fields method, distribution, mean_std and var_range. Fields xpath, attrib, elem and default are specific to the model in the XML file provided by the user, so they cannot be default parameters. Further more, elem and default are obtained by parsing the XML file, so the user won't set them. --- .../mujoco/dynamics_randomization/__init__.py | 7 +++---- .../dynamics_randomization/randomized_env.py | 2 +- .../dynamics_randomization/variation.py | 20 +++++++++---------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/rllab/envs/mujoco/dynamics_randomization/__init__.py b/rllab/envs/mujoco/dynamics_randomization/__init__.py index 87a34f2f5..00b233701 100644 --- a/rllab/envs/mujoco/dynamics_randomization/__init__.py +++ b/rllab/envs/mujoco/dynamics_randomization/__init__.py @@ -1,5 +1,4 @@ -from rllab.envs.mujoco.dynamics_randomization.variation import Variations -from rllab.envs.mujoco.dynamics_randomization.variation import Method -from rllab.envs.mujoco.dynamics_randomization.variation import Distribution -from rllab.envs.mujoco.dynamics_randomization.mujoco_model_generator import MujocoModelGenerator from rllab.envs.mujoco.dynamics_randomization.randomized_env import randomize +from rllab.envs.mujoco.dynamics_randomization.variation import Distribution +from rllab.envs.mujoco.dynamics_randomization.variation import Method +from rllab.envs.mujoco.dynamics_randomization.variation import Variations diff --git a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py index 6a91a3a2f..4bf329aee 100644 --- a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py +++ b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py @@ -84,4 +84,4 @@ def horizon(self): return self._wrapped_env.horizon -randomize = RandomizedEnv \ No newline at end of file +randomize = RandomizedEnv diff --git a/rllab/envs/mujoco/dynamics_randomization/variation.py b/rllab/envs/mujoco/dynamics_randomization/variation.py index 9d271fd65..3d2c97b1f 100644 --- a/rllab/envs/mujoco/dynamics_randomization/variation.py +++ b/rllab/envs/mujoco/dynamics_randomization/variation.py @@ -31,12 +31,12 @@ class works more like a data structure to store the data fields required """ def __init__(self, - xpath=None, - attrib=None, - method=None, - distribution=None, - var_range=None, - mean_std=None, + xpath, + attrib, + method, + distribution, + var_range, + mean_std, elem=None, default=None): @@ -209,10 +209,10 @@ def __init__(self, variations): self._variations = variations self._xpath = None self._attrib = None - self._method = None - self._distribution = None - self._mean_std = None - self._var_range = None + self._method = Method.ABSOLUTE + self._distribution = Distribution.UNIFORM + self._mean_std = (0.0, 1.0) + self._var_range = (0.0, 1.0) self._elem = None self._default = None From 56d2b603b7ed3b3875eab82a87196288a95b8819 Mon Sep 17 00:00:00 2001 From: Angel Gonzalez Date: Thu, 7 Jun 2018 21:02:47 -0700 Subject: [PATCH 30/30] Fix PEP8 formatting in randomized_env.py --- rllab/envs/mujoco/dynamics_randomization/randomized_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py index 4bf329aee..cea618b29 100644 --- a/rllab/envs/mujoco/dynamics_randomization/randomized_env.py +++ b/rllab/envs/mujoco/dynamics_randomization/randomized_env.py @@ -34,7 +34,7 @@ def reset(self): set. """ self._wrapped_env.model = load_model_from_xml( - self._variations.get_randomized_xml_model()) + self._variations.get_randomized_xml_model()) if hasattr(self._wrapped_env, 'action_space'): del self._wrapped_env.__dict__['action_space'] self._wrapped_env.sim = MjSim(self._wrapped_env.model)