Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Enable default settings for TrainerSettings #4448

Merged
merged 7 commits into from
Sep 3, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Note that PyTorch 1.6.0 or greater should be installed to use this feature; see
- The minimum supported version of TensorFlow was increased to 1.14.0. (#4411)
- A CNN (`vis_encode_type: match3`) for smaller grids, e.g. board games, has been added.
(#4434)
- You can now again specify a default configuration for your behaviors. Specify `default_settings` in
your trainer configuration to do so. (#4448)

### Bug Fixes
#### com.unity.ml-agents (C#)
Expand Down
18 changes: 18 additions & 0 deletions docs/Training-ML-Agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,24 @@ each of these parameters mean and provide guidelines on how to set them. See
description of all the configurations listed above, along with their defaults.
Unless otherwise specified, omitting a configuration will revert it to its default.

### Default Behavior Settings

In some cases, you may want to specify a set of default configurations for your Behaviors.
This may be useful, for instance, if your Behavior names are generated procedurally by
the environment and not known before runtime, or if you have many Behaviors with very similar
settings. To specify a default configuraton, insert a `default_settings` section in your YAML.
This section should be formatted exactly like a configuration for a Behavior.

```yaml
default_settings:
# < Same as Behavior configuration >
behaviors:
# < Same as above >
```

Behaviors found in the environment that aren't secified in the YAML will now use the `default_settings`,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Behaviors found in the environment that aren't secified in the YAML will now use the `default_settings`,
Behaviors found in the environment that aren't specified in the YAML will now use the `default_settings`,

and unspecified settings in behavior configurations will default to the values in `default_settings` if
specified there.

### Environment Parameters

Expand Down
60 changes: 55 additions & 5 deletions ml-agents/mlagents/trainers/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,24 @@

import attr
import cattr
from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple, Union
from typing import (
Dict,
Optional,
List,
Any,
DefaultDict,
Mapping,
Tuple,
Union,
ClassVar,
)
from enum import Enum
import collections
import argparse
import abc
import numpy as np
import math
import copy

from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser
from mlagents.trainers.cli_utils import load_config
Expand Down Expand Up @@ -46,6 +57,17 @@ def defaultdict_to_dict(d: DefaultDict) -> Dict:
return {key: cattr.unstructure(val) for key, val in d.items()}


def deep_update_dict(d: Dict, update_d: Mapping) -> None:
"""
Similar to dict.update(), but works for nested dicts of dicts as well.
"""
for key, val in update_d.items():
if key in d and isinstance(d[key], Mapping) and isinstance(val, Mapping):
deep_update_dict(d[key], val)
else:
d[key] = val


class SerializationSettings:
convert_to_barracuda = True
convert_to_onnx = True
Expand Down Expand Up @@ -539,6 +561,7 @@ class FrameworkType(Enum):

@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
default_override: ClassVar[Optional["TrainerSettings"]] = None
trainer_type: TrainerType = TrainerType.PPO
hyperparameters: HyperparamSettings = attr.ib()

Expand Down Expand Up @@ -578,8 +601,8 @@ def _check_batch_size_seq_length(self, attribute, value):

@staticmethod
def dict_to_defaultdict(d: Dict, t: type) -> DefaultDict:
return collections.defaultdict(
TrainerSettings, cattr.structure(d, Dict[str, TrainerSettings])
return TrainerSettings.DefaultTrainerDict(
cattr.structure(d, Dict[str, TrainerSettings])
)

@staticmethod
Expand All @@ -588,10 +611,18 @@ def structure(d: Mapping, t: type) -> Any:
Helper method to structure a TrainerSettings class. Meant to be registered with
cattr.register_structure_hook() and called with cattr.structure().
"""

if not isinstance(d, Mapping):
raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.")

d_copy: Dict[str, Any] = {}
d_copy.update(d)

# Check if a default_settings was specified. If so, used those as the default
# rather than an empty dict.
if TrainerSettings.default_override is not None:
d_copy.update(cattr.unstructure(TrainerSettings.default_override))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this will do what you want for nested dictionaries. For example if you had

{"foo": {"x": 1}}.update({"foo", {"y": 2}})

you'd end up with {"foo", {"y": 2}}, but I think you really want {"foo", {"x": 1, "y": 2}} (i.e. recursively merge dictionary values)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed using a recursive dict update


deep_update_dict(d_copy, d)

for key, val in d_copy.items():
if attr.has(type(val)):
Expand All @@ -613,6 +644,16 @@ def structure(d: Mapping, t: type) -> Any:
d_copy[key] = check_and_structure(key, val, t)
return t(**d_copy)

class DefaultTrainerDict(collections.defaultdict):
def __init__(self, *args):
super().__init__(TrainerSettings, *args)

def __missing__(self, key: Any) -> "TrainerSettings":
if TrainerSettings.default_override is not None:
return copy.deepcopy(TrainerSettings.default_override)
else:
return TrainerSettings()


# COMMAND LINE #########################################################################
@attr.s(auto_attribs=True)
Expand Down Expand Up @@ -653,8 +694,9 @@ class EngineSettings:

@attr.s(auto_attribs=True)
class RunOptions(ExportableSettings):
default_settings: Optional[TrainerSettings] = None
behaviors: DefaultDict[str, TrainerSettings] = attr.ib(
factory=lambda: collections.defaultdict(TrainerSettings)
factory=TrainerSettings.DefaultTrainerDict
)
env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings)
engine_settings: EngineSettings = attr.ib(factory=EngineSettings)
Expand Down Expand Up @@ -733,4 +775,12 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":

@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":
# If a default settings was specified, set the TrainerSettings class override
if (
"default_settings" in options_dict.keys()
and options_dict["default_settings"] is not None
):
TrainerSettings.default_override = cattr.structure(
options_dict["default_settings"], TrainerSettings
)
return cattr.structure(options_dict, RunOptions)
32 changes: 32 additions & 0 deletions ml-agents/mlagents/trainers/tests/test_settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import attr
import cattr
import pytest
import yaml

Expand All @@ -20,6 +21,7 @@
GaussianSettings,
MultiRangeUniformSettings,
TrainerType,
deep_update_dict,
strict_to_cls,
)
from mlagents.trainers.exception import TrainerConfigError
Expand Down Expand Up @@ -104,6 +106,14 @@ class TestAttrsClass:
strict_to_cls("non_dict_input", TestAttrsClass)


def test_deep_update_dict():
dict1 = {"a": 1, "b": 2, "c": {"d": 3}}
dict2 = {"a": 2, "c": {"d": 4, "e": 5}}

deep_update_dict(dict1, dict2)
assert dict1 == {"a": 2, "b": 2, "c": {"d": 4, "e": 5}}


def test_trainersettings_structure():
"""
Test structuring method for TrainerSettings
Expand Down Expand Up @@ -468,3 +478,25 @@ def test_environment_settings():
# Multiple environments with no env_path is an error
with pytest.raises(ValueError):
EnvironmentSettings(num_envs=2)


def test_default_settings():
# Make default settings, one nested and one not.
default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}}
behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}}
run_options_dict = {"default_settings": default_settings, "behaviors": behaviors}
run_options = RunOptions.from_dict(run_options_dict)

# Check that a new behavior has the default settings
default_settings_cls = cattr.structure(default_settings, TrainerSettings)
check_if_different(default_settings_cls, run_options.behaviors["test2"])

# Check that an existing beehavior overrides the defaults in specified fields
test1_settings = run_options.behaviors["test1"]
assert test1_settings.max_steps == 2
assert test1_settings.network_settings.hidden_units == 2000
assert test1_settings.network_settings.num_layers == 1000
# Change the overridden fields back, and check if the rest are equal.
test1_settings.max_steps = 1
test1_settings.network_settings.hidden_units == default_settings_cls.network_settings.hidden_units
check_if_different(test1_settings, default_settings_cls)