diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 0532890abe..98f67618ea 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -61,6 +61,8 @@ This results in much less memory being allocated during inference with `CameraSe #### ml-agents / ml-agents-envs / gym-unity (Python) - Some console output have been moved from `info` to `debug` and will not be printed by default. If you want all messages to be printed, you can run `mlagents-learn` with the `--debug` option or add the line `debug: true` at the top of the yaml config file. (#5211) +- When using a configuration YAML, it is required to define all behaviors found in a Unity +executable in the trainer configuration YAML, or specify `default_settings`. (#5210) - The embedding size of attention layers used when a BufferSensor is in the scene has been changed. It is now fixed to 128 units. It might be impossible to resume training from a checkpoint of a previous version. (#5272) ### Bug Fixes diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 732c3d52f8..6226cf9bf4 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -661,7 +661,7 @@ def _check_batch_size_seq_length(self, attribute, value): ) @staticmethod - def dict_to_defaultdict(d: Dict, t: type) -> DefaultDict: + def dict_to_trainerdict(d: Dict, t: type) -> "TrainerSettings.DefaultTrainerDict": return TrainerSettings.DefaultTrainerDict( cattr.structure(d, Dict[str, TrainerSettings]) ) @@ -718,12 +718,26 @@ def __init__(self, *args): super().__init__(*args) else: super().__init__(TrainerSettings, *args) + self._config_specified = True + + def set_config_specified(self, require_config_specified: bool) -> None: + self._config_specified = require_config_specified def __missing__(self, key: Any) -> "TrainerSettings": if TrainerSettings.default_override is not None: - return copy.deepcopy(TrainerSettings.default_override) + self[key] = copy.deepcopy(TrainerSettings.default_override) + elif self._config_specified: + raise TrainerConfigError( + f"The behavior name {key} has not been specified in the trainer configuration. " + f"Please add an entry in the configuration file for {key}, or set default_settings." + ) else: - return TrainerSettings() + logger.warn( + f"Behavior name {key} does not match any behaviors specified " + f"in the trainer configuration file. A default configuration will be used." + ) + self[key] = TrainerSettings() + return self[key] # COMMAND LINE ######################################################################### @@ -788,7 +802,7 @@ class TorchSettings: @attr.s(auto_attribs=True) class RunOptions(ExportableSettings): default_settings: Optional[TrainerSettings] = None - behaviors: DefaultDict[str, TrainerSettings] = attr.ib( + behaviors: TrainerSettings.DefaultTrainerDict = attr.ib( factory=TrainerSettings.DefaultTrainerDict ) env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) @@ -800,7 +814,8 @@ class RunOptions(ExportableSettings): # These are options that are relevant to the run itself, and not the engine or environment. # They will be left here. debug: bool = parser.get_default("debug") - # Strict conversion + + # Convert to settings while making sure all fields are valid cattr.register_structure_hook(EnvironmentSettings, strict_to_cls) cattr.register_structure_hook(EngineSettings, strict_to_cls) cattr.register_structure_hook(CheckpointSettings, strict_to_cls) @@ -816,7 +831,7 @@ class RunOptions(ExportableSettings): ) cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) cattr.register_structure_hook( - DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict + TrainerSettings.DefaultTrainerDict, TrainerSettings.dict_to_trainerdict ) cattr.register_unstructure_hook(collections.defaultdict, defaultdict_to_dict) @@ -839,8 +854,12 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": "engine_settings": {}, "torch_settings": {}, } + _require_all_behaviors = True if config_path is not None: configured_dict.update(load_config(config_path)) + else: + # If we're not loading from a file, we don't require all behavior names to be specified. + _require_all_behaviors = False # Use the YAML file values for all values not specified in the CLI. for key in configured_dict.keys(): @@ -868,6 +887,10 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": configured_dict[key] = val final_runoptions = RunOptions.from_dict(configured_dict) + # Need check to bypass type checking but keep structure on dict working + if isinstance(final_runoptions.behaviors, TrainerSettings.DefaultTrainerDict): + # configure whether or not we should require all behavior names to be found in the config YAML + final_runoptions.behaviors.set_config_specified(_require_all_behaviors) return final_runoptions @staticmethod diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index 52a0460718..5e4a78d6d5 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -77,9 +77,9 @@ def test_no_configuration(): Verify that a new config will have a PPO trainer with extrinsic rewards. """ blank_runoptions = RunOptions() + blank_runoptions.behaviors.set_config_specified(False) assert isinstance(blank_runoptions.behaviors["test"], TrainerSettings) assert isinstance(blank_runoptions.behaviors["test"].hyperparameters, PPOSettings) - assert ( RewardSignalType.EXTRINSIC in blank_runoptions.behaviors["test"].reward_signals ) @@ -508,7 +508,7 @@ def test_default_settings(): default_settings_cls = cattr.structure(default_settings, TrainerSettings) check_if_different(default_settings_cls, run_options.behaviors["test2"]) - # Check that an existing beehavior overrides the defaults in specified fields + # Check that an existing behavior overrides the defaults in specified fields test1_settings = run_options.behaviors["test1"] assert test1_settings.max_steps == 2 assert test1_settings.network_settings.hidden_units == 2000 @@ -519,6 +519,37 @@ def test_default_settings(): check_if_different(test1_settings, default_settings_cls) +def test_config_specified(): + # Test require all behavior names to be specified (or not) + # Remove any pre-set defaults + TrainerSettings.default_override = None + behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}} + run_options_dict = {"behaviors": behaviors} + ro = RunOptions.from_dict(run_options_dict) + # Don't require all behavior names + ro.behaviors.set_config_specified(False) + # Test that we can grab an entry that is not in the dict. + assert isinstance(ro.behaviors["test2"], TrainerSettings) + + # Create strict RunOptions with no defualt_settings + run_options_dict = {"behaviors": behaviors} + ro = RunOptions.from_dict(run_options_dict) + # Require all behavior names + ro.behaviors.set_config_specified(True) + with pytest.raises(TrainerConfigError): + # Variable must be accessed otherwise Python won't query the dict + print(ro.behaviors["test2"]) + + # Create strict RunOptions with default settings + default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}} + run_options_dict = {"default_settings": default_settings, "behaviors": behaviors} + ro = RunOptions.from_dict(run_options_dict) + # Require all behavior names + ro.behaviors.set_config_specified(True) + # Test that we can grab an entry that is not in the dict. + assert isinstance(ro.behaviors["test2"], TrainerSettings) + + def test_pickle(): # Make sure RunOptions is pickle-able. run_options = RunOptions() diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index 62c7be9107..e8459d0752 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -71,6 +71,8 @@ def test_handles_no_config_provided(): """ brain_name = "testbrain" no_default_config = RunOptions().behaviors + # Pretend this was created without a YAML file + no_default_config.set_config_specified(False) trainer_factory = TrainerFactory( trainer_config=no_default_config, diff --git a/ml-agents/mlagents/trainers/trainer/trainer_factory.py b/ml-agents/mlagents/trainers/trainer/trainer_factory.py index c5ed2a9f64..9d351e3b5c 100644 --- a/ml-agents/mlagents/trainers/trainer/trainer_factory.py +++ b/ml-agents/mlagents/trainers/trainer/trainer_factory.py @@ -56,11 +56,6 @@ def __init__( self.ghost_controller = GhostController() def generate(self, behavior_name: str) -> Trainer: - if behavior_name not in self.trainer_config.keys(): - logger.warning( - f"Behavior name {behavior_name} does not match any behaviors specified" - f"in the trainer configuration file: {sorted(self.trainer_config.keys())}" - ) trainer_settings = self.trainer_config[behavior_name] return TrainerFactory._initialize_trainer( trainer_settings,