From c9a7b7767ec10f0083f5d001316cb22205382e61 Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Tue, 6 Sep 2022 09:51:22 -0700 Subject: [PATCH] Extended hyperopt to support nested configuration block parameters (#2445) --- ludwig/hyperopt/execution.py | 168 ++--------------- ludwig/hyperopt/results.py | 4 - ludwig/hyperopt/run.py | 75 +------- ludwig/hyperopt/utils.py | 92 +++++++++- ludwig/utils/defaults.py | 4 +- tests/integration_tests/test_hyperopt.py | 170 ++++++++++-------- tests/integration_tests/test_hyperopt_ray.py | 4 +- .../test_hyperopt_ray_horovod.py | 4 +- tests/ludwig/hyperopt/test_hyperopt.py | 102 +++++++---- 9 files changed, 281 insertions(+), 342 deletions(-) diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py index ab7a4fbcd7d..c95e440307c 100644 --- a/ludwig/hyperopt/execution.py +++ b/ludwig/hyperopt/execution.py @@ -12,7 +12,7 @@ from functools import lru_cache from inspect import signature from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import ray from packaging import version @@ -28,29 +28,14 @@ from ludwig.backend import initialize_backend, RAY from ludwig.backend.ray import initialize_ray from ludwig.callbacks import Callback -from ludwig.constants import ( - COLUMN, - COMBINER, - DECODER, - DEFAULTS, - ENCODER, - INPUT_FEATURES, - MAXIMIZE, - OUTPUT_FEATURES, - PREPROCESSING, - TEST, - TRAINER, - TRAINING, - TYPE, - VALIDATION, -) -from ludwig.hyperopt.results import RayTuneResults, TrialResults +from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION +from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.hyperopt.search_algos import get_search_algorithm -from ludwig.hyperopt.utils import load_json_values +from ludwig.hyperopt.utils import load_json_values, substitute_parameters from ludwig.modules.metric_modules import get_best_function from ludwig.utils import metric_utils from ludwig.utils.data_utils import hash_dict, NumpyEncoder -from ludwig.utils.defaults import default_random_seed +from ludwig.utils.defaults import default_random_seed, merge_with_defaults from ludwig.utils.fs_utils import has_remote_protocol from ludwig.utils.misc_utils import get_from_registry @@ -435,7 +420,6 @@ def _run_experiment( checkpoint_dir, hyperopt_dict, decode_ctx, - features_eligible_for_shared_params, is_using_ray_backend=False, ): for gpu_id in ray.get_gpu_ids(): @@ -453,9 +437,9 @@ def _run_experiment( trial_dir = Path(tune.get_trial_dir()) driver_trial_location = ray.util.get_node_ip_address() - modified_config = substitute_parameters( - copy.deepcopy(hyperopt_dict["config"]), config, features_eligible_for_shared_params - ) + modified_config = substitute_parameters(copy.deepcopy(hyperopt_dict["config"]), config) + + modified_config = merge_with_defaults(modified_config) hyperopt_dict["config"] = modified_config hyperopt_dict["experiment_name "] = f'{hyperopt_dict["experiment_name"]}_{trial_id}' @@ -648,9 +632,8 @@ def execute( random_seed=default_random_seed, debug=False, hyperopt_log_verbosity=3, - features_eligible_for_shared_params=None, **kwargs, - ) -> RayTuneResults: + ) -> HyperoptResults: if isinstance(dataset, str) and not has_remote_protocol(dataset) and not os.path.isabs(dataset): dataset = os.path.abspath(dataset) @@ -744,7 +727,6 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): checkpoint_dir, local_hyperopt_dict, self.decode_ctx, - features_eligible_for_shared_params, _is_ray_backend(backend), ) @@ -868,7 +850,7 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): logger.warning("No trials reported results; check if time budget lower than epoch latency") ordered_trials = [] - return RayTuneResults(ordered_trials=ordered_trials, experiment_analysis=analysis) + return HyperoptResults(ordered_trials=ordered_trials, experiment_analysis=analysis) class CallbackStopper(Stopper): @@ -905,136 +887,6 @@ def set_values(params: Dict[str, Any], model_dict: Dict[str, Any]): model_dict[key] = value -def update_features_with_shared_params( - section_dict: Dict[str, Any], - trial_parameters_dict: Dict[str, Dict[str, Any]], - config_feature_group: str = None, - features_eligible_for_shared_params: Dict[str, Dict[str, Set]] = None, -): - """Updates the parameters of feature_name in section_dict based on hyperopt parameters sampled. - - :param section_dict: Underlying config for the specific input/output feature populated with potentially a mix of - default and feature-specific parameters. This may be updated with values from the hyperopt search space. - :type section_dict: dict[str, any] - :param trial_parameters_dict: Config produced by the hyperopt sampler based on the parameter search space. It maps - the name of the feature to the sampled parameters for that feature. For default parameters, it creates - nested dictionaries for each feature type. - :type trial_parameters_dict: dict[str, dict[str, any]] - :param config_feature_group: Indicates whether the feature is an input feature or output feature (can be either of - `input_features` or `output_features`). - :type config_feature_group: str - :param features_eligible_for_shared_params: Collection of names of features that are eligible for using shared - parameters, keyed by `input_features` or `output_features` and then by feature type. - :type features_eligible_for_shared_params: dict[str, dict[str, set]] - """ - - feature_name = section_dict.get(COLUMN) - feature_type = section_dict.get(TYPE) - - # No default parameters specified in hyperopt parameter search space - if DEFAULTS not in trial_parameters_dict: - return - - # This feature type should have a sampled value from the default parameters passed in - if feature_type not in trial_parameters_dict.get(DEFAULTS): - return - - # All features in Ludwig config use non-default encoders or decoders - if not features_eligible_for_shared_params: - logger.warning( - """ - Default parameters specified in the hyperopt parameter search space are not being used since features - in Ludwig config are not using default encoders or decoders. You may consider either setting features to - their default encoders or decoders, or specifying feature with encoder specific parameters instead of - defaults in the parameter search space. - """ - ) - return - - features_eligible_for_shared_params = features_eligible_for_shared_params.get(config_feature_group) - - # At least one of this feature's feature type must use non-default encoders/decoders in the config - if feature_type not in features_eligible_for_shared_params: - return - - # This feature must use a default encoder/decoder - if feature_name not in features_eligible_for_shared_params.get(feature_type): - return - - sampled_default_shared_params = trial_parameters_dict.get(DEFAULTS).get(feature_type) - shared_params_copy = copy.deepcopy(sampled_default_shared_params) - - # Remove encoder/decoder from output/input features - if config_feature_group == INPUT_FEATURES: - if DECODER in sampled_default_shared_params: - del shared_params_copy[DECODER] - else: - if ENCODER in sampled_default_shared_params: - del shared_params_copy[ENCODER] - sampled_default_shared_params = shared_params_copy - - set_values(sampled_default_shared_params, section_dict) - - -def update_section_dict( - section_dict: Dict[str, Any], parameter_name: str, trial_parameters_dict: Dict[str, Dict[str, Any]] -): - """Update a parameter in section config with sampled value from hyperopt.""" - if parameter_name not in trial_parameters_dict: - return - - params = trial_parameters_dict[parameter_name] - set_values(params, section_dict) - - -def get_parameters_dict(parameters): - parameters_dict = {} - for name, value in parameters.items(): - curr_dict = parameters_dict - name_list = name.split(".") - for i, name_elem in enumerate(name_list): - if i == len(name_list) - 1: - curr_dict[name_elem] = value - else: - name_dict = curr_dict.get(name_elem, {}) - curr_dict[name_elem] = name_dict - curr_dict = name_dict - return parameters_dict - - -def substitute_parameters( - config: Dict[str, Any], - parameters: Dict[str, Any], - features_eligible_for_shared_params: Dict[str, Dict[str, Set]] = None, -): - """Update Ludwig config with parameters sampled from the Hyperopt sampler.""" - parameters_dict = get_parameters_dict(parameters) - for input_feature in config[INPUT_FEATURES]: - # Update shared params - update_features_with_shared_params( - input_feature, - parameters_dict, - config_feature_group=INPUT_FEATURES, - features_eligible_for_shared_params=features_eligible_for_shared_params, - ) - # Update or overwrite any feature specific hyperopt params - update_section_dict(input_feature, input_feature[COLUMN], parameters_dict) - for output_feature in config[OUTPUT_FEATURES]: - # Update shared params - update_features_with_shared_params( - output_feature, - parameters_dict, - config_feature_group=OUTPUT_FEATURES, - features_eligible_for_shared_params=features_eligible_for_shared_params, - ) - # Update or overwrite any feature specific hyperopt params - update_section_dict(output_feature, output_feature[COLUMN], parameters_dict) - update_section_dict(config[COMBINER], COMBINER, parameters_dict) - update_section_dict(config[TRAINER], TRAINER, parameters_dict) - update_section_dict(config[PREPROCESSING], PREPROCESSING, parameters_dict) - return config - - def run_experiment( config, parameters=None, diff --git a/ludwig/hyperopt/results.py b/ludwig/hyperopt/results.py index 855bfcd9589..6e6459480bd 100644 --- a/ludwig/hyperopt/results.py +++ b/ludwig/hyperopt/results.py @@ -23,8 +23,4 @@ class TrialResults: @dataclass class HyperoptResults: ordered_trials: List[TrialResults] - - -@dataclass -class RayTuneResults(HyperoptResults): experiment_analysis: ExperimentAnalysis diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py index 390c3758a3c..49abae0efbf 100644 --- a/ludwig/hyperopt/run.py +++ b/ludwig/hyperopt/run.py @@ -1,8 +1,8 @@ +import copy import logging import os -from collections import defaultdict from pprint import pformat -from typing import Any, Dict, List, Optional, Set, Union +from typing import List, Optional, Union import pandas as pd import yaml @@ -12,11 +12,8 @@ from ludwig.callbacks import Callback from ludwig.constants import ( COMBINED, - DECODER, - ENCODER, EXECUTOR, HYPEROPT, - INPUT_FEATURES, LOSS, MINIMIZE, NAME, @@ -28,7 +25,7 @@ VALIDATION, ) from ludwig.data.split import get_splitter -from ludwig.features.feature_registries import input_type_registry, output_type_registry +from ludwig.features.feature_registries import output_type_registry from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.utils import print_hyperopt_results, save_hyperopt_stats, should_tune_preprocessing from ludwig.utils.backward_compatibility import upgrade_to_latest_version @@ -188,15 +185,12 @@ def hyperopt( else: config_dict = config - # Get mapping of input/output features that don't have an encoder for shared parameters - features_eligible_for_shared_params = { - INPUT_FEATURES: get_features_eligible_for_shared_params(config_dict, INPUT_FEATURES), - OUTPUT_FEATURES: get_features_eligible_for_shared_params(config_dict, OUTPUT_FEATURES), - } - # backwards compatibility config = upgrade_to_latest_version(config_dict) + # Retain pre-merged config for hyperopt schema generation + premerged_config = copy.deepcopy(config) + # merge config with defaults config = merge_with_defaults(config) @@ -212,12 +206,6 @@ def hyperopt( logging.info(pformat(hyperopt_config, indent=4)) logging.info("\n") - logging.info( - "Features that may be updated in hyperopt trials if default parameters are specified in the search space" - ) - logging.info(pformat(dict(features_eligible_for_shared_params), indent=4)) - logging.info("\n") - search_alg = hyperopt_config["search_alg"] executor = hyperopt_config[EXECUTOR] parameters = hyperopt_config["parameters"] @@ -339,7 +327,7 @@ def hyperopt( callback.on_hyperopt_start(experiment_name) hyperopt_results = hyperopt_executor.execute( - config, + premerged_config, dataset=dataset, training_set=training_set, validation_set=validation_set, @@ -366,7 +354,6 @@ def hyperopt( backend=backend, random_seed=random_seed, hyperopt_log_verbosity=hyperopt_log_verbosity, - features_eligible_for_shared_params=features_eligible_for_shared_params, **kwargs, ) @@ -410,51 +397,3 @@ def update_hyperopt_params_with_defaults(hyperopt_params): hyperopt_params[EXECUTOR], executor_defaults, ) - - -def get_features_eligible_for_shared_params( - config_dict: Dict[str, Any], config_feature_type: str -) -> Dict[str, Dict[str, Set]]: - """Generates a mapping of feature type to the corresponding set of features without an encoder or one using the - default encoder for that feature type. - - These features may be considered for potential shared parameter search spaces depending on the parameter space - defined later within the hyperopt config. This applies to both config_feature_types (input_features and - output_features). The shared parameters for both config_feature_types must be specified separately. - - Note that shared default parameter search spaces are not applied to features with non-default encoders or - non-default decoders, since shared default parameter values should only apply to default modules. - - Returns: - Dict of feature type -> set of feature names with that type that are eligible for shared parameters (they use - the default encoder or default decoder). - - TODO(#2167): Make sure each feature has a type defined in the JSONSchema for Hyperopt - """ - - if config_feature_type not in config_dict: - raise ValueError(f"{config_feature_type} must be defined in Ludwig config.") - - features_eligible_for_shared_params = defaultdict(set) - - features = config_dict.get(config_feature_type) - feature_registry = input_type_registry if config_feature_type == INPUT_FEATURES else output_type_registry - - for feature in features: - if TYPE not in feature: - raise ValueError("Ludwig expects feature types to be defined for each feature within the config.") - - feature_schema = get_from_registry(feature.get(TYPE), feature_registry).get_schema_cls() - - if config_feature_type == INPUT_FEATURES: - default_encoder = feature_schema().encoder.type - if feature.get(ENCODER, None) and feature.get(ENCODER).get(TYPE, None) != default_encoder: - continue - else: - default_decoder = feature_schema().decoder.type - if feature.get(DECODER, None) and feature.get(DECODER).get(TYPE, None) != default_decoder: - continue - - features_eligible_for_shared_params[feature[TYPE]].add(feature[NAME]) - - return features_eligible_for_shared_params diff --git a/ludwig/hyperopt/utils.py b/ludwig/hyperopt/utils.py index dbe4170ea73..d04ae170d65 100644 --- a/ludwig/hyperopt/utils.py +++ b/ludwig/hyperopt/utils.py @@ -1,12 +1,15 @@ +import copy import dataclasses import json import logging import os +from typing import Any, Dict -from ludwig.constants import HYPEROPT, PARAMETERS, PREPROCESSING +from ludwig.constants import HYPEROPT, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PARAMETERS, PREPROCESSING from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.utils.data_utils import save_json +from ludwig.utils.misc_utils import merge_dict from ludwig.utils.print_utils import print_boxed logger = logging.getLogger(__name__) @@ -51,3 +54,90 @@ def should_tune_preprocessing(config): if f"{PREPROCESSING}." in param_name: return True return False + + +def parameter_to_dict(name, value): + if name == ".": + # Parameter name ".", means top-level config + return value + + parameter_dict = {} + curr_dict = parameter_dict + name_list = name.split(".") + for i, name_elem in enumerate(name_list): + if i == len(name_list) - 1: + curr_dict[name_elem] = value + else: + name_dict = curr_dict.get(name_elem, {}) + curr_dict[name_elem] = name_dict + curr_dict = name_dict + return parameter_dict + + +def feature_list_to_dict(config: Dict[str, Any]) -> Dict[str, Any]: + input_features_dict = {} + for feature in config[INPUT_FEATURES]: + input_features_dict[feature[NAME]] = feature + + output_features_dict = {} + for feature in config[OUTPUT_FEATURES]: + output_features_dict[feature[NAME]] = feature + + config = copy.copy(config) + config[INPUT_FEATURES] = input_features_dict + config[OUTPUT_FEATURES] = output_features_dict + return config + + +def feature_dict_to_list(config: Dict[str, Any]) -> Dict[str, Any]: + # This works because Python dicts are order-preserving, so we do not need to + # do anything special to map from a key in the dict to an index in a list + input_features_list = [] + for feature in config[INPUT_FEATURES].values(): + input_features_list.append(feature) + + output_features_list = [] + for feature in config[OUTPUT_FEATURES].values(): + output_features_list.append(feature) + + config = copy.copy(config) + config[INPUT_FEATURES] = input_features_list + config[OUTPUT_FEATURES] = output_features_list + return config + + +def substitute_parameters( + config: Dict[str, Any], + parameters: Dict[str, Any], +): + """Update Ludwig config with parameters sampled from the Hyperopt sampler.""" + + # Collect the sets of names for each feature grouping so we can map feature names to + # groups + input_feature_names = {feature[NAME] for feature in config[INPUT_FEATURES]} + output_feature_names = {feature[NAME] for feature in config[OUTPUT_FEATURES]} + + # Features in the user config are provided as a list, but in hyperopt we reference + # features by name, so convert temporarily to a dict to simplify the mergep process. + config = feature_list_to_dict(config) + + # Merge parameters into the user configuration in order. As such, if there are conflicting + # params, the later params will take precedence. + for name, value in parameters.items(): + # User params are provided as ., but we group input / output features + # together during the merge to make it easier and unambiguous to convert back and forth + # TODO(travis): we should revisit the user format here, as it silently breaks situations + # where the user has a feature named "trainer", "combiner", etc. + prefix = name.split(".")[0] + if prefix in input_feature_names: + name = f"{INPUT_FEATURES}.{name}" + elif prefix in output_feature_names: + name = f"{OUTPUT_FEATURES}.{name}" + + param_dict = parameter_to_dict(name, value) + config = merge_dict(config, param_dict) + + # Now that all features have been merged, convert back to the original list format. + config = feature_dict_to_list(config) + + return config diff --git a/ludwig/utils/defaults.py b/ludwig/utils/defaults.py index 9be6f93d2cb..2322ae154a1 100644 --- a/ludwig/utils/defaults.py +++ b/ludwig/utils/defaults.py @@ -277,9 +277,9 @@ def merge_with_defaults(config: dict) -> dict: # noqa: F821 # ===== Model Type ===== set_default_value(config, MODEL_TYPE, default_model_type) - # ===== Training ===== + # ===== Trainer ===== # Convert config dictionary into an instance of BaseTrainerConfig. - full_trainer_config, _ = load_trainer_with_kwargs(config[MODEL_TYPE], config[TRAINER] if TRAINER in config else {}) + full_trainer_config, _ = load_trainer_with_kwargs(config[MODEL_TYPE], config.get(TRAINER, {})) config[TRAINER] = asdict(full_trainer_config) set_default_value( diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py index d91aa7acbdb..698eeb55b27 100644 --- a/tests/integration_tests/test_hyperopt.py +++ b/tests/integration_tests/test_hyperopt.py @@ -25,8 +25,6 @@ ACCURACY, CATEGORY, COMBINER, - DECODER, - ENCODER, EXECUTOR, HYPEROPT, INPUT_FEATURES, @@ -38,9 +36,9 @@ TYPE, ) from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME -from ludwig.hyperopt.results import HyperoptResults, RayTuneResults +from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults -from ludwig.utils.config_utils import get_feature_type_parameter_values_from_section +from ludwig.utils.data_utils import load_json from ludwig.utils.defaults import merge_with_defaults from tests.integration_tests.utils import category_feature, generate_data, text_feature @@ -241,7 +239,7 @@ def test_hyperopt_search_alg( parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor ) raytune_results = hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir) - assert isinstance(raytune_results, RayTuneResults) + assert isinstance(raytune_results, HyperoptResults) @pytest.mark.distributed @@ -308,7 +306,7 @@ def test_hyperopt_scheduler( parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor ) raytune_results = hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir) - assert isinstance(raytune_results, RayTuneResults) + assert isinstance(raytune_results, HyperoptResults) @pytest.mark.distributed @@ -389,75 +387,6 @@ def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster): assert os.path.isfile(os.path.join(tmpdir, "test_hyperopt", HYPEROPT_STATISTICS_FILE_NAME)) -def _test_hyperopt_with_shared_params_trial_table( - hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space -): - # Check that hyperopt trials sample from defaults in the search space - for _, trial_row in hyperopt_results_df.iterrows(): - embedding_size = _get_trial_parameter_value("defaults.category.encoder.embedding_size", trial_row) - num_filters = _get_trial_parameter_value("defaults.text.encoder.num_filters", trial_row) - reduce_input = _get_trial_parameter_value("defaults.category.decoder.reduce_input", trial_row).replace('"', "") - assert embedding_size in embedding_size_search_space - assert num_filters in num_filters_search_space - assert reduce_input in reduce_input_search_space - - -def _test_hyperopt_with_shared_params_written_config( - hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space -): - # Check that each hyperopt trial's written input/output configs got updated - for _, trial_row in hyperopt_results_df.iterrows(): - model_parameters = json.load( - open(os.path.join(trial_row["trial_dir"], "test_hyperopt_run", "model", "model_hyperparameters.json")) - ) - - # Check that num_filters got updated from the sampler correctly - for input_feature in model_parameters[INPUT_FEATURES]: - if input_feature[TYPE] == TEXT: - assert input_feature[ENCODER]["num_filters"] in num_filters_search_space - elif input_feature[TYPE] == CATEGORY: - assert input_feature[ENCODER]["embedding_size"] in embedding_size_search_space - - # All text features with defaults should have the same num_filters for this trial - text_input_num_filters = get_feature_type_parameter_values_from_section( - model_parameters, INPUT_FEATURES, TEXT, "num_filters" - ) - assert len(text_input_num_filters) == 1 - - for output_feature in model_parameters[OUTPUT_FEATURES]: - if output_feature[TYPE] == CATEGORY: - assert output_feature[DECODER]["reduce_input"] in reduce_input_search_space - - # All category features with defaults should have the same embedding_size for this trial - input_category_features_embedding_sizes = get_feature_type_parameter_values_from_section( - model_parameters, INPUT_FEATURES, CATEGORY, "embedding_size" - ) - - assert len(input_category_features_embedding_sizes) == 1 - - -@pytest.mark.distributed -def test_hyperopt_with_shared_params(csv_filename, tmpdir): - ( - config, - rel_path, - num_filters_search_space, - embedding_size_search_space, - reduce_input_search_space, - ) = _setup_ludwig_config_with_shared_params(csv_filename) - - hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt") - hyperopt_results_df = hyperopt_results.experiment_analysis.results_df - - _test_hyperopt_with_shared_params_trial_table( - hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space - ) - - _test_hyperopt_with_shared_params_written_config( - hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space - ) - - @pytest.mark.distributed def test_hyperopt_with_feature_specific_parameters(csv_filename, tmpdir, ray_cluster): input_features = [ @@ -562,3 +491,94 @@ def test_hyperopt_old_config(csv_filename, tmpdir, ray_cluster): rel_path = generate_data(input_features, output_features, csv_filename) hyperopt(old_config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt") + + +@pytest.mark.distributed +def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster): + config = { + INPUT_FEATURES: [ + {"name": "cat1", TYPE: "category", "encoder": {"vocab_size": 2}}, + {"name": "num1", TYPE: "number"}, + ], + OUTPUT_FEATURES: [ + {"name": "bin1", TYPE: "binary"}, + ], + TRAINER: {"epochs": 2}, + HYPEROPT: { + EXECUTOR: { + TYPE: "ray", + "time_budget_s": 200, + "cpu_resources_per_trial": 1, + "num_samples": 4, + "scheduler": {TYPE: "fifo"}, + }, + "search_alg": {TYPE: "variant_generator"}, + "parameters": { + ".": { + "space": "choice", + "categories": [ + { + "combiner": { + "type": "tabnet", + "bn_virtual_bs": 256, + }, + "trainer": { + "learning_rate_scaling": "sqrt", + "decay": True, + "decay_steps": 20000, + "decay_rate": 0.8, + "optimizer": {"type": "adam"}, + }, + }, + { + "combiner": { + "type": "concat", + "num_fc_layers": 2, + }, + "trainer": { + "learning_rate_scaling": "linear", + }, + }, + ], + }, + "trainer.learning_rate": {"space": "choice", "categories": [0.7, 0.42]}, + }, + }, + } + + input_features = config[INPUT_FEATURES] + output_features = config[OUTPUT_FEATURES] + rel_path = generate_data(input_features, output_features, csv_filename) + + results = hyperopt( + config, + dataset=rel_path, + output_directory=tmpdir, + experiment_name="test_hyperopt_nested_params", + ) + + results_df = results.experiment_analysis.results_df + assert len(results_df) == 4 + + for _, trial_meta in results_df.iterrows(): + trial_dir = trial_meta["trial_dir"] + trial_config = load_json( + os.path.join(trial_dir, "test_hyperopt_nested_params_run", "model", "model_hyperparameters.json") + ) + + assert len(trial_config[INPUT_FEATURES]) == len(config[INPUT_FEATURES]) + assert len(trial_config[OUTPUT_FEATURES]) == len(config[OUTPUT_FEATURES]) + + assert trial_config[COMBINER][TYPE] in {"tabnet", "concat"} + if trial_config[COMBINER][TYPE] == "tabnet": + assert trial_config[COMBINER]["bn_virtual_bs"] == 256 + assert trial_config[TRAINER]["learning_rate_scaling"] == "sqrt" + assert trial_config[TRAINER]["decay"] is True + assert trial_config[TRAINER]["decay_steps"] == 20000 + assert trial_config[TRAINER]["decay_rate"] == 0.8 + assert trial_config[TRAINER]["optimizer"]["type"] == "adam" + else: + assert trial_config[COMBINER]["num_fc_layers"] == 2 + assert trial_config[TRAINER]["learning_rate_scaling"] == "linear" + + assert trial_config[TRAINER]["learning_rate"] in {0.7, 0.42} diff --git a/tests/integration_tests/test_hyperopt_ray.py b/tests/integration_tests/test_hyperopt_ray.py index 44aa559f4ef..8276abf2782 100644 --- a/tests/integration_tests/test_hyperopt_ray.py +++ b/tests/integration_tests/test_hyperopt_ray.py @@ -24,6 +24,7 @@ from ludwig.constants import ACCURACY, TRAINER from ludwig.contribs import MlflowCallback from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME +from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults from ludwig.utils.defaults import merge_with_defaults from tests.integration_tests.utils import category_feature, generate_data, text_feature @@ -32,7 +33,6 @@ import ray from ludwig.hyperopt.execution import get_build_hyperopt_executor - from ludwig.hyperopt.results import RayTuneResults except ImportError: ray = None @@ -282,7 +282,7 @@ def run_hyperopt( ) # check for return results - assert isinstance(hyperopt_results, RayTuneResults) + assert isinstance(hyperopt_results, HyperoptResults) # check for existence of the hyperopt statistics file assert os.path.isfile(os.path.join(tmpdir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME)) diff --git a/tests/integration_tests/test_hyperopt_ray_horovod.py b/tests/integration_tests/test_hyperopt_ray_horovod.py index 00d691d7381..2f9ee370781 100644 --- a/tests/integration_tests/test_hyperopt_ray_horovod.py +++ b/tests/integration_tests/test_hyperopt_ray_horovod.py @@ -24,6 +24,7 @@ from ludwig.callbacks import Callback from ludwig.constants import ACCURACY, TRAINER from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME +from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults from ludwig.utils.defaults import merge_with_defaults from tests.integration_tests.utils import binary_feature, create_data_set_to_use, generate_data, number_feature @@ -41,7 +42,6 @@ from ludwig.backend.ray import RayBackend from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor - from ludwig.hyperopt.results import RayTuneResults except ImportError: ray = None _ray_nightly = False @@ -304,7 +304,7 @@ def run_hyperopt( ) # check for return results - assert isinstance(hyperopt_results, RayTuneResults) + assert isinstance(hyperopt_results, HyperoptResults) # check for existence of the hyperopt statistics file assert os.path.isfile(os.path.join(out_dir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME)) diff --git a/tests/ludwig/hyperopt/test_hyperopt.py b/tests/ludwig/hyperopt/test_hyperopt.py index 20ae9e0d039..76c852bdd11 100644 --- a/tests/ludwig/hyperopt/test_hyperopt.py +++ b/tests/ludwig/hyperopt/test_hyperopt.py @@ -1,35 +1,77 @@ import pytest -from ludwig.constants import ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, TYPE -from ludwig.hyperopt.run import get_features_eligible_for_shared_params +from ludwig.constants import INPUT_FEATURES, NAME, OUTPUT_FEATURES, TYPE +from ludwig.hyperopt.utils import substitute_parameters +BASE_CONFIG = { + INPUT_FEATURES: [{NAME: "title", TYPE: "text"}], + OUTPUT_FEATURES: [{NAME: "summary", TYPE: "text"}], +} -def _setup(): - config = { - INPUT_FEATURES: [{NAME: "title", TYPE: "text"}], - OUTPUT_FEATURES: [{NAME: "summary", TYPE: "text"}], - } - return config - -def test_hyperopt_without_encoders_or_decoders(): - config = _setup() - features_eligible_for_shared_params = { - INPUT_FEATURES: get_features_eligible_for_shared_params(config, INPUT_FEATURES), - OUTPUT_FEATURES: get_features_eligible_for_shared_params(config, OUTPUT_FEATURES), - } - assert features_eligible_for_shared_params[INPUT_FEATURES] == {"text": {"title"}} - assert features_eligible_for_shared_params[OUTPUT_FEATURES] == {"text": {"summary"}} - - -@pytest.mark.parametrize("encoder", ["parallel_cnn", "stacked_cnn"]) -def test_hyperopt_default_encoder(encoder: str): - config = _setup() - config[INPUT_FEATURES][0][ENCODER] = {TYPE: encoder} - features_eligible_for_shared_params = get_features_eligible_for_shared_params(config, INPUT_FEATURES) - print(features_eligible_for_shared_params) - if encoder == "parallel_cnn": - assert features_eligible_for_shared_params == {"text": {"title"}} - else: - # When non-default encoder is passed, there should be no features eligible for shared params - assert features_eligible_for_shared_params == {} +@pytest.mark.parametrize( + "parameters, expected", + [ + ( + { + "combiner.type": "tabnet", + "combiner.fc_layers": [{"output_size": 64}, {"output_size": 32}], + "trainer.learning_rate": 0.1, + "trainer.batch_size": 256, + }, + { + **BASE_CONFIG, + "combiner": {"type": "tabnet", "fc_layers": [{"output_size": 64}, {"output_size": 32}]}, + "trainer": {"learning_rate": 0.1, "batch_size": 256}, + }, + ), + ( + { + "title.encoder.type": "bert", + "summary.decoder.reduce_input": "sum", + "trainer.learning_rate": 0.1, + "trainer.batch_size": 256, + }, + { + INPUT_FEATURES: [{NAME: "title", TYPE: "text", "encoder": {"type": "bert"}}], + OUTPUT_FEATURES: [{NAME: "summary", TYPE: "text", "decoder": {"reduce_input": "sum"}}], + "trainer": {"learning_rate": 0.1, "batch_size": 256}, + }, + ), + ( + { + ".": { + "combiner": {"type": "concat", "num_fc_layers": 2}, + "trainer": {"learning_rate_scaling": "linear"}, + }, + "trainer.learning_rate": 0.1, + }, + { + **BASE_CONFIG, + "combiner": {"type": "concat", "num_fc_layers": 2}, + "trainer": {"learning_rate_scaling": "linear", "learning_rate": 0.1}, + }, + ), + ( + { + ".": { + "combiner": {"type": "concat", "num_fc_layers": 2}, + "trainer": {"learning_rate_scaling": "linear"}, + }, + "trainer": { + "learning_rate": 0.1, + "batch_size": 256, + }, + }, + { + **BASE_CONFIG, + "combiner": {"type": "concat", "num_fc_layers": 2}, + "trainer": {"learning_rate_scaling": "linear", "learning_rate": 0.1, "batch_size": 256}, + }, + ), + ], + ids=["flat", "features", "nested", "multi-nested"], +) +def test_substitute_parameters(parameters, expected): + actual_config = substitute_parameters(BASE_CONFIG, parameters) + assert actual_config == expected