From c9a7b7767ec10f0083f5d001316cb22205382e61 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Tue, 6 Sep 2022 09:51:22 -0700
Subject: [PATCH] Extended hyperopt to support nested configuration block
 parameters (#2445)

---
 ludwig/hyperopt/execution.py                  | 168 ++---------------
 ludwig/hyperopt/results.py                    |   4 -
 ludwig/hyperopt/run.py                        |  75 +-------
 ludwig/hyperopt/utils.py                      |  92 +++++++++-
 ludwig/utils/defaults.py                      |   4 +-
 tests/integration_tests/test_hyperopt.py      | 170 ++++++++++--------
 tests/integration_tests/test_hyperopt_ray.py  |   4 +-
 .../test_hyperopt_ray_horovod.py              |   4 +-
 tests/ludwig/hyperopt/test_hyperopt.py        | 102 +++++++----
 9 files changed, 281 insertions(+), 342 deletions(-)

diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py
index ab7a4fbcd7d..c95e440307c 100644
--- a/ludwig/hyperopt/execution.py
+++ b/ludwig/hyperopt/execution.py
@@ -12,7 +12,7 @@
 from functools import lru_cache
 from inspect import signature
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import ray
 from packaging import version
@@ -28,29 +28,14 @@
 from ludwig.backend import initialize_backend, RAY
 from ludwig.backend.ray import initialize_ray
 from ludwig.callbacks import Callback
-from ludwig.constants import (
-    COLUMN,
-    COMBINER,
-    DECODER,
-    DEFAULTS,
-    ENCODER,
-    INPUT_FEATURES,
-    MAXIMIZE,
-    OUTPUT_FEATURES,
-    PREPROCESSING,
-    TEST,
-    TRAINER,
-    TRAINING,
-    TYPE,
-    VALIDATION,
-)
-from ludwig.hyperopt.results import RayTuneResults, TrialResults
+from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION
+from ludwig.hyperopt.results import HyperoptResults, TrialResults
 from ludwig.hyperopt.search_algos import get_search_algorithm
-from ludwig.hyperopt.utils import load_json_values
+from ludwig.hyperopt.utils import load_json_values, substitute_parameters
 from ludwig.modules.metric_modules import get_best_function
 from ludwig.utils import metric_utils
 from ludwig.utils.data_utils import hash_dict, NumpyEncoder
-from ludwig.utils.defaults import default_random_seed
+from ludwig.utils.defaults import default_random_seed, merge_with_defaults
 from ludwig.utils.fs_utils import has_remote_protocol
 from ludwig.utils.misc_utils import get_from_registry
 
@@ -435,7 +420,6 @@ def _run_experiment(
         checkpoint_dir,
         hyperopt_dict,
         decode_ctx,
-        features_eligible_for_shared_params,
         is_using_ray_backend=False,
     ):
         for gpu_id in ray.get_gpu_ids():
@@ -453,9 +437,9 @@ def _run_experiment(
         trial_dir = Path(tune.get_trial_dir())
         driver_trial_location = ray.util.get_node_ip_address()
 
-        modified_config = substitute_parameters(
-            copy.deepcopy(hyperopt_dict["config"]), config, features_eligible_for_shared_params
-        )
+        modified_config = substitute_parameters(copy.deepcopy(hyperopt_dict["config"]), config)
+
+        modified_config = merge_with_defaults(modified_config)
 
         hyperopt_dict["config"] = modified_config
         hyperopt_dict["experiment_name "] = f'{hyperopt_dict["experiment_name"]}_{trial_id}'
@@ -648,9 +632,8 @@ def execute(
         random_seed=default_random_seed,
         debug=False,
         hyperopt_log_verbosity=3,
-        features_eligible_for_shared_params=None,
         **kwargs,
-    ) -> RayTuneResults:
+    ) -> HyperoptResults:
         if isinstance(dataset, str) and not has_remote_protocol(dataset) and not os.path.isabs(dataset):
             dataset = os.path.abspath(dataset)
 
@@ -744,7 +727,6 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None):
                 checkpoint_dir,
                 local_hyperopt_dict,
                 self.decode_ctx,
-                features_eligible_for_shared_params,
                 _is_ray_backend(backend),
             )
 
@@ -868,7 +850,7 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None):
             logger.warning("No trials reported results; check if time budget lower than epoch latency")
             ordered_trials = []
 
-        return RayTuneResults(ordered_trials=ordered_trials, experiment_analysis=analysis)
+        return HyperoptResults(ordered_trials=ordered_trials, experiment_analysis=analysis)
 
 
 class CallbackStopper(Stopper):
@@ -905,136 +887,6 @@ def set_values(params: Dict[str, Any], model_dict: Dict[str, Any]):
             model_dict[key] = value
 
 
-def update_features_with_shared_params(
-    section_dict: Dict[str, Any],
-    trial_parameters_dict: Dict[str, Dict[str, Any]],
-    config_feature_group: str = None,
-    features_eligible_for_shared_params: Dict[str, Dict[str, Set]] = None,
-):
-    """Updates the parameters of feature_name in section_dict based on hyperopt parameters sampled.
-
-    :param section_dict: Underlying config for the specific input/output feature populated with potentially a mix of
-            default and feature-specific parameters. This may be updated with values from the hyperopt search space.
-    :type section_dict: dict[str, any]
-    :param trial_parameters_dict: Config produced by the hyperopt sampler based on the parameter search space. It maps
-            the name of the feature to the sampled parameters for that feature. For default parameters, it creates
-            nested dictionaries for each feature type.
-    :type trial_parameters_dict: dict[str, dict[str, any]]
-    :param config_feature_group: Indicates whether the feature is an input feature or output feature (can be either of
-        `input_features` or `output_features`).
-    :type config_feature_group: str
-    :param features_eligible_for_shared_params: Collection of names of features that are eligible for using shared
-            parameters, keyed by `input_features` or `output_features` and then by feature type.
-    :type features_eligible_for_shared_params: dict[str, dict[str, set]]
-    """
-
-    feature_name = section_dict.get(COLUMN)
-    feature_type = section_dict.get(TYPE)
-
-    # No default parameters specified in hyperopt parameter search space
-    if DEFAULTS not in trial_parameters_dict:
-        return
-
-    # This feature type should have a sampled value from the default parameters passed in
-    if feature_type not in trial_parameters_dict.get(DEFAULTS):
-        return
-
-    # All features in Ludwig config use non-default encoders or decoders
-    if not features_eligible_for_shared_params:
-        logger.warning(
-            """
-            Default parameters specified in the hyperopt parameter search space are not being used since features
-            in Ludwig config are not using default encoders or decoders. You may consider either setting features to
-            their default encoders or decoders, or specifying feature with encoder specific parameters instead of
-            defaults in the parameter search space.
-            """
-        )
-        return
-
-    features_eligible_for_shared_params = features_eligible_for_shared_params.get(config_feature_group)
-
-    # At least one of this feature's feature type must use non-default encoders/decoders in the config
-    if feature_type not in features_eligible_for_shared_params:
-        return
-
-    # This feature must use a default encoder/decoder
-    if feature_name not in features_eligible_for_shared_params.get(feature_type):
-        return
-
-    sampled_default_shared_params = trial_parameters_dict.get(DEFAULTS).get(feature_type)
-    shared_params_copy = copy.deepcopy(sampled_default_shared_params)
-
-    # Remove encoder/decoder from output/input features
-    if config_feature_group == INPUT_FEATURES:
-        if DECODER in sampled_default_shared_params:
-            del shared_params_copy[DECODER]
-    else:
-        if ENCODER in sampled_default_shared_params:
-            del shared_params_copy[ENCODER]
-    sampled_default_shared_params = shared_params_copy
-
-    set_values(sampled_default_shared_params, section_dict)
-
-
-def update_section_dict(
-    section_dict: Dict[str, Any], parameter_name: str, trial_parameters_dict: Dict[str, Dict[str, Any]]
-):
-    """Update a parameter in section config with sampled value from hyperopt."""
-    if parameter_name not in trial_parameters_dict:
-        return
-
-    params = trial_parameters_dict[parameter_name]
-    set_values(params, section_dict)
-
-
-def get_parameters_dict(parameters):
-    parameters_dict = {}
-    for name, value in parameters.items():
-        curr_dict = parameters_dict
-        name_list = name.split(".")
-        for i, name_elem in enumerate(name_list):
-            if i == len(name_list) - 1:
-                curr_dict[name_elem] = value
-            else:
-                name_dict = curr_dict.get(name_elem, {})
-                curr_dict[name_elem] = name_dict
-                curr_dict = name_dict
-    return parameters_dict
-
-
-def substitute_parameters(
-    config: Dict[str, Any],
-    parameters: Dict[str, Any],
-    features_eligible_for_shared_params: Dict[str, Dict[str, Set]] = None,
-):
-    """Update Ludwig config with parameters sampled from the Hyperopt sampler."""
-    parameters_dict = get_parameters_dict(parameters)
-    for input_feature in config[INPUT_FEATURES]:
-        # Update shared params
-        update_features_with_shared_params(
-            input_feature,
-            parameters_dict,
-            config_feature_group=INPUT_FEATURES,
-            features_eligible_for_shared_params=features_eligible_for_shared_params,
-        )
-        # Update or overwrite any feature specific hyperopt params
-        update_section_dict(input_feature, input_feature[COLUMN], parameters_dict)
-    for output_feature in config[OUTPUT_FEATURES]:
-        # Update shared params
-        update_features_with_shared_params(
-            output_feature,
-            parameters_dict,
-            config_feature_group=OUTPUT_FEATURES,
-            features_eligible_for_shared_params=features_eligible_for_shared_params,
-        )
-        # Update or overwrite any feature specific hyperopt params
-        update_section_dict(output_feature, output_feature[COLUMN], parameters_dict)
-    update_section_dict(config[COMBINER], COMBINER, parameters_dict)
-    update_section_dict(config[TRAINER], TRAINER, parameters_dict)
-    update_section_dict(config[PREPROCESSING], PREPROCESSING, parameters_dict)
-    return config
-
-
 def run_experiment(
     config,
     parameters=None,
diff --git a/ludwig/hyperopt/results.py b/ludwig/hyperopt/results.py
index 855bfcd9589..6e6459480bd 100644
--- a/ludwig/hyperopt/results.py
+++ b/ludwig/hyperopt/results.py
@@ -23,8 +23,4 @@ class TrialResults:
 @dataclass
 class HyperoptResults:
     ordered_trials: List[TrialResults]
-
-
-@dataclass
-class RayTuneResults(HyperoptResults):
     experiment_analysis: ExperimentAnalysis
diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py
index 390c3758a3c..49abae0efbf 100644
--- a/ludwig/hyperopt/run.py
+++ b/ludwig/hyperopt/run.py
@@ -1,8 +1,8 @@
+import copy
 import logging
 import os
-from collections import defaultdict
 from pprint import pformat
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import List, Optional, Union
 
 import pandas as pd
 import yaml
@@ -12,11 +12,8 @@
 from ludwig.callbacks import Callback
 from ludwig.constants import (
     COMBINED,
-    DECODER,
-    ENCODER,
     EXECUTOR,
     HYPEROPT,
-    INPUT_FEATURES,
     LOSS,
     MINIMIZE,
     NAME,
@@ -28,7 +25,7 @@
     VALIDATION,
 )
 from ludwig.data.split import get_splitter
-from ludwig.features.feature_registries import input_type_registry, output_type_registry
+from ludwig.features.feature_registries import output_type_registry
 from ludwig.hyperopt.results import HyperoptResults
 from ludwig.hyperopt.utils import print_hyperopt_results, save_hyperopt_stats, should_tune_preprocessing
 from ludwig.utils.backward_compatibility import upgrade_to_latest_version
@@ -188,15 +185,12 @@ def hyperopt(
     else:
         config_dict = config
 
-    # Get mapping of input/output features that don't have an encoder for shared parameters
-    features_eligible_for_shared_params = {
-        INPUT_FEATURES: get_features_eligible_for_shared_params(config_dict, INPUT_FEATURES),
-        OUTPUT_FEATURES: get_features_eligible_for_shared_params(config_dict, OUTPUT_FEATURES),
-    }
-
     # backwards compatibility
     config = upgrade_to_latest_version(config_dict)
 
+    # Retain pre-merged config for hyperopt schema generation
+    premerged_config = copy.deepcopy(config)
+
     # merge config with defaults
     config = merge_with_defaults(config)
 
@@ -212,12 +206,6 @@ def hyperopt(
     logging.info(pformat(hyperopt_config, indent=4))
     logging.info("\n")
 
-    logging.info(
-        "Features that may be updated in hyperopt trials if default parameters are specified in the search space"
-    )
-    logging.info(pformat(dict(features_eligible_for_shared_params), indent=4))
-    logging.info("\n")
-
     search_alg = hyperopt_config["search_alg"]
     executor = hyperopt_config[EXECUTOR]
     parameters = hyperopt_config["parameters"]
@@ -339,7 +327,7 @@ def hyperopt(
         callback.on_hyperopt_start(experiment_name)
 
     hyperopt_results = hyperopt_executor.execute(
-        config,
+        premerged_config,
         dataset=dataset,
         training_set=training_set,
         validation_set=validation_set,
@@ -366,7 +354,6 @@ def hyperopt(
         backend=backend,
         random_seed=random_seed,
         hyperopt_log_verbosity=hyperopt_log_verbosity,
-        features_eligible_for_shared_params=features_eligible_for_shared_params,
         **kwargs,
     )
 
@@ -410,51 +397,3 @@ def update_hyperopt_params_with_defaults(hyperopt_params):
         hyperopt_params[EXECUTOR],
         executor_defaults,
     )
-
-
-def get_features_eligible_for_shared_params(
-    config_dict: Dict[str, Any], config_feature_type: str
-) -> Dict[str, Dict[str, Set]]:
-    """Generates a mapping of feature type to the corresponding set of features without an encoder or one using the
-    default encoder for that feature type.
-
-    These features may be considered for potential shared parameter search spaces depending on the parameter space
-    defined later within the hyperopt config. This applies to both config_feature_types (input_features and
-    output_features). The shared parameters for both config_feature_types must be specified separately.
-
-    Note that shared default parameter search spaces are not applied to features with non-default encoders or
-    non-default decoders, since shared default parameter values should only apply to default modules.
-
-    Returns:
-      Dict of feature type -> set of feature names with that type that are eligible for shared parameters (they use
-      the default encoder or default decoder).
-
-    TODO(#2167): Make sure each feature has a type defined in the JSONSchema for Hyperopt
-    """
-
-    if config_feature_type not in config_dict:
-        raise ValueError(f"{config_feature_type} must be defined in Ludwig config.")
-
-    features_eligible_for_shared_params = defaultdict(set)
-
-    features = config_dict.get(config_feature_type)
-    feature_registry = input_type_registry if config_feature_type == INPUT_FEATURES else output_type_registry
-
-    for feature in features:
-        if TYPE not in feature:
-            raise ValueError("Ludwig expects feature types to be defined for each feature within the config.")
-
-        feature_schema = get_from_registry(feature.get(TYPE), feature_registry).get_schema_cls()
-
-        if config_feature_type == INPUT_FEATURES:
-            default_encoder = feature_schema().encoder.type
-            if feature.get(ENCODER, None) and feature.get(ENCODER).get(TYPE, None) != default_encoder:
-                continue
-        else:
-            default_decoder = feature_schema().decoder.type
-            if feature.get(DECODER, None) and feature.get(DECODER).get(TYPE, None) != default_decoder:
-                continue
-
-        features_eligible_for_shared_params[feature[TYPE]].add(feature[NAME])
-
-    return features_eligible_for_shared_params
diff --git a/ludwig/hyperopt/utils.py b/ludwig/hyperopt/utils.py
index dbe4170ea73..d04ae170d65 100644
--- a/ludwig/hyperopt/utils.py
+++ b/ludwig/hyperopt/utils.py
@@ -1,12 +1,15 @@
+import copy
 import dataclasses
 import json
 import logging
 import os
+from typing import Any, Dict
 
-from ludwig.constants import HYPEROPT, PARAMETERS, PREPROCESSING
+from ludwig.constants import HYPEROPT, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PARAMETERS, PREPROCESSING
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.hyperopt.results import HyperoptResults, TrialResults
 from ludwig.utils.data_utils import save_json
+from ludwig.utils.misc_utils import merge_dict
 from ludwig.utils.print_utils import print_boxed
 
 logger = logging.getLogger(__name__)
@@ -51,3 +54,90 @@ def should_tune_preprocessing(config):
         if f"{PREPROCESSING}." in param_name:
             return True
     return False
+
+
+def parameter_to_dict(name, value):
+    if name == ".":
+        # Parameter name ".", means top-level config
+        return value
+
+    parameter_dict = {}
+    curr_dict = parameter_dict
+    name_list = name.split(".")
+    for i, name_elem in enumerate(name_list):
+        if i == len(name_list) - 1:
+            curr_dict[name_elem] = value
+        else:
+            name_dict = curr_dict.get(name_elem, {})
+            curr_dict[name_elem] = name_dict
+            curr_dict = name_dict
+    return parameter_dict
+
+
+def feature_list_to_dict(config: Dict[str, Any]) -> Dict[str, Any]:
+    input_features_dict = {}
+    for feature in config[INPUT_FEATURES]:
+        input_features_dict[feature[NAME]] = feature
+
+    output_features_dict = {}
+    for feature in config[OUTPUT_FEATURES]:
+        output_features_dict[feature[NAME]] = feature
+
+    config = copy.copy(config)
+    config[INPUT_FEATURES] = input_features_dict
+    config[OUTPUT_FEATURES] = output_features_dict
+    return config
+
+
+def feature_dict_to_list(config: Dict[str, Any]) -> Dict[str, Any]:
+    # This works because Python dicts are order-preserving, so we do not need to
+    # do anything special to map from a key in the dict to an index in a list
+    input_features_list = []
+    for feature in config[INPUT_FEATURES].values():
+        input_features_list.append(feature)
+
+    output_features_list = []
+    for feature in config[OUTPUT_FEATURES].values():
+        output_features_list.append(feature)
+
+    config = copy.copy(config)
+    config[INPUT_FEATURES] = input_features_list
+    config[OUTPUT_FEATURES] = output_features_list
+    return config
+
+
+def substitute_parameters(
+    config: Dict[str, Any],
+    parameters: Dict[str, Any],
+):
+    """Update Ludwig config with parameters sampled from the Hyperopt sampler."""
+
+    # Collect the sets of names for each feature grouping so we can map feature names to
+    # groups
+    input_feature_names = {feature[NAME] for feature in config[INPUT_FEATURES]}
+    output_feature_names = {feature[NAME] for feature in config[OUTPUT_FEATURES]}
+
+    # Features in the user config are provided as a list, but in hyperopt we reference
+    # features by name, so convert temporarily to a dict to simplify the mergep process.
+    config = feature_list_to_dict(config)
+
+    # Merge parameters into the user configuration in order. As such, if there are conflicting
+    # params, the later params will take precedence.
+    for name, value in parameters.items():
+        # User params are provided as <feature_name>.<param>, but we group input / output features
+        # together during the merge to make it easier and unambiguous to convert back and forth
+        # TODO(travis): we should revisit the user format here, as it silently breaks situations
+        # where the user has a feature named "trainer", "combiner", etc.
+        prefix = name.split(".")[0]
+        if prefix in input_feature_names:
+            name = f"{INPUT_FEATURES}.{name}"
+        elif prefix in output_feature_names:
+            name = f"{OUTPUT_FEATURES}.{name}"
+
+        param_dict = parameter_to_dict(name, value)
+        config = merge_dict(config, param_dict)
+
+    # Now that all features have been merged, convert back to the original list format.
+    config = feature_dict_to_list(config)
+
+    return config
diff --git a/ludwig/utils/defaults.py b/ludwig/utils/defaults.py
index 9be6f93d2cb..2322ae154a1 100644
--- a/ludwig/utils/defaults.py
+++ b/ludwig/utils/defaults.py
@@ -277,9 +277,9 @@ def merge_with_defaults(config: dict) -> dict:  # noqa: F821
     # ===== Model Type =====
     set_default_value(config, MODEL_TYPE, default_model_type)
 
-    # ===== Training =====
+    # ===== Trainer =====
     # Convert config dictionary into an instance of BaseTrainerConfig.
-    full_trainer_config, _ = load_trainer_with_kwargs(config[MODEL_TYPE], config[TRAINER] if TRAINER in config else {})
+    full_trainer_config, _ = load_trainer_with_kwargs(config[MODEL_TYPE], config.get(TRAINER, {}))
     config[TRAINER] = asdict(full_trainer_config)
 
     set_default_value(
diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py
index d91aa7acbdb..698eeb55b27 100644
--- a/tests/integration_tests/test_hyperopt.py
+++ b/tests/integration_tests/test_hyperopt.py
@@ -25,8 +25,6 @@
     ACCURACY,
     CATEGORY,
     COMBINER,
-    DECODER,
-    ENCODER,
     EXECUTOR,
     HYPEROPT,
     INPUT_FEATURES,
@@ -38,9 +36,9 @@
     TYPE,
 )
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
-from ludwig.hyperopt.results import HyperoptResults, RayTuneResults
+from ludwig.hyperopt.results import HyperoptResults
 from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
-from ludwig.utils.config_utils import get_feature_type_parameter_values_from_section
+from ludwig.utils.data_utils import load_json
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import category_feature, generate_data, text_feature
 
@@ -241,7 +239,7 @@ def test_hyperopt_search_alg(
         parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor
     )
     raytune_results = hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir)
-    assert isinstance(raytune_results, RayTuneResults)
+    assert isinstance(raytune_results, HyperoptResults)
 
 
 @pytest.mark.distributed
@@ -308,7 +306,7 @@ def test_hyperopt_scheduler(
             parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor
         )
         raytune_results = hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir)
-        assert isinstance(raytune_results, RayTuneResults)
+        assert isinstance(raytune_results, HyperoptResults)
 
 
 @pytest.mark.distributed
@@ -389,75 +387,6 @@ def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
     assert os.path.isfile(os.path.join(tmpdir, "test_hyperopt", HYPEROPT_STATISTICS_FILE_NAME))
 
 
-def _test_hyperopt_with_shared_params_trial_table(
-    hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space
-):
-    # Check that hyperopt trials sample from defaults in the search space
-    for _, trial_row in hyperopt_results_df.iterrows():
-        embedding_size = _get_trial_parameter_value("defaults.category.encoder.embedding_size", trial_row)
-        num_filters = _get_trial_parameter_value("defaults.text.encoder.num_filters", trial_row)
-        reduce_input = _get_trial_parameter_value("defaults.category.decoder.reduce_input", trial_row).replace('"', "")
-        assert embedding_size in embedding_size_search_space
-        assert num_filters in num_filters_search_space
-        assert reduce_input in reduce_input_search_space
-
-
-def _test_hyperopt_with_shared_params_written_config(
-    hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space
-):
-    # Check that each hyperopt trial's written input/output configs got updated
-    for _, trial_row in hyperopt_results_df.iterrows():
-        model_parameters = json.load(
-            open(os.path.join(trial_row["trial_dir"], "test_hyperopt_run", "model", "model_hyperparameters.json"))
-        )
-
-        # Check that num_filters got updated from the sampler correctly
-        for input_feature in model_parameters[INPUT_FEATURES]:
-            if input_feature[TYPE] == TEXT:
-                assert input_feature[ENCODER]["num_filters"] in num_filters_search_space
-            elif input_feature[TYPE] == CATEGORY:
-                assert input_feature[ENCODER]["embedding_size"] in embedding_size_search_space
-
-        # All text features with defaults should have the same num_filters for this trial
-        text_input_num_filters = get_feature_type_parameter_values_from_section(
-            model_parameters, INPUT_FEATURES, TEXT, "num_filters"
-        )
-        assert len(text_input_num_filters) == 1
-
-        for output_feature in model_parameters[OUTPUT_FEATURES]:
-            if output_feature[TYPE] == CATEGORY:
-                assert output_feature[DECODER]["reduce_input"] in reduce_input_search_space
-
-        # All category features with defaults should have the same embedding_size for this trial
-        input_category_features_embedding_sizes = get_feature_type_parameter_values_from_section(
-            model_parameters, INPUT_FEATURES, CATEGORY, "embedding_size"
-        )
-
-        assert len(input_category_features_embedding_sizes) == 1
-
-
-@pytest.mark.distributed
-def test_hyperopt_with_shared_params(csv_filename, tmpdir):
-    (
-        config,
-        rel_path,
-        num_filters_search_space,
-        embedding_size_search_space,
-        reduce_input_search_space,
-    ) = _setup_ludwig_config_with_shared_params(csv_filename)
-
-    hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt")
-    hyperopt_results_df = hyperopt_results.experiment_analysis.results_df
-
-    _test_hyperopt_with_shared_params_trial_table(
-        hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space
-    )
-
-    _test_hyperopt_with_shared_params_written_config(
-        hyperopt_results_df, num_filters_search_space, embedding_size_search_space, reduce_input_search_space
-    )
-
-
 @pytest.mark.distributed
 def test_hyperopt_with_feature_specific_parameters(csv_filename, tmpdir, ray_cluster):
     input_features = [
@@ -562,3 +491,94 @@ def test_hyperopt_old_config(csv_filename, tmpdir, ray_cluster):
     rel_path = generate_data(input_features, output_features, csv_filename)
 
     hyperopt(old_config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt")
+
+
+@pytest.mark.distributed
+def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster):
+    config = {
+        INPUT_FEATURES: [
+            {"name": "cat1", TYPE: "category", "encoder": {"vocab_size": 2}},
+            {"name": "num1", TYPE: "number"},
+        ],
+        OUTPUT_FEATURES: [
+            {"name": "bin1", TYPE: "binary"},
+        ],
+        TRAINER: {"epochs": 2},
+        HYPEROPT: {
+            EXECUTOR: {
+                TYPE: "ray",
+                "time_budget_s": 200,
+                "cpu_resources_per_trial": 1,
+                "num_samples": 4,
+                "scheduler": {TYPE: "fifo"},
+            },
+            "search_alg": {TYPE: "variant_generator"},
+            "parameters": {
+                ".": {
+                    "space": "choice",
+                    "categories": [
+                        {
+                            "combiner": {
+                                "type": "tabnet",
+                                "bn_virtual_bs": 256,
+                            },
+                            "trainer": {
+                                "learning_rate_scaling": "sqrt",
+                                "decay": True,
+                                "decay_steps": 20000,
+                                "decay_rate": 0.8,
+                                "optimizer": {"type": "adam"},
+                            },
+                        },
+                        {
+                            "combiner": {
+                                "type": "concat",
+                                "num_fc_layers": 2,
+                            },
+                            "trainer": {
+                                "learning_rate_scaling": "linear",
+                            },
+                        },
+                    ],
+                },
+                "trainer.learning_rate": {"space": "choice", "categories": [0.7, 0.42]},
+            },
+        },
+    }
+
+    input_features = config[INPUT_FEATURES]
+    output_features = config[OUTPUT_FEATURES]
+    rel_path = generate_data(input_features, output_features, csv_filename)
+
+    results = hyperopt(
+        config,
+        dataset=rel_path,
+        output_directory=tmpdir,
+        experiment_name="test_hyperopt_nested_params",
+    )
+
+    results_df = results.experiment_analysis.results_df
+    assert len(results_df) == 4
+
+    for _, trial_meta in results_df.iterrows():
+        trial_dir = trial_meta["trial_dir"]
+        trial_config = load_json(
+            os.path.join(trial_dir, "test_hyperopt_nested_params_run", "model", "model_hyperparameters.json")
+        )
+
+        assert len(trial_config[INPUT_FEATURES]) == len(config[INPUT_FEATURES])
+        assert len(trial_config[OUTPUT_FEATURES]) == len(config[OUTPUT_FEATURES])
+
+        assert trial_config[COMBINER][TYPE] in {"tabnet", "concat"}
+        if trial_config[COMBINER][TYPE] == "tabnet":
+            assert trial_config[COMBINER]["bn_virtual_bs"] == 256
+            assert trial_config[TRAINER]["learning_rate_scaling"] == "sqrt"
+            assert trial_config[TRAINER]["decay"] is True
+            assert trial_config[TRAINER]["decay_steps"] == 20000
+            assert trial_config[TRAINER]["decay_rate"] == 0.8
+            assert trial_config[TRAINER]["optimizer"]["type"] == "adam"
+        else:
+            assert trial_config[COMBINER]["num_fc_layers"] == 2
+            assert trial_config[TRAINER]["learning_rate_scaling"] == "linear"
+
+        assert trial_config[TRAINER]["learning_rate"] in {0.7, 0.42}
diff --git a/tests/integration_tests/test_hyperopt_ray.py b/tests/integration_tests/test_hyperopt_ray.py
index 44aa559f4ef..8276abf2782 100644
--- a/tests/integration_tests/test_hyperopt_ray.py
+++ b/tests/integration_tests/test_hyperopt_ray.py
@@ -24,6 +24,7 @@
 from ludwig.constants import ACCURACY, TRAINER
 from ludwig.contribs import MlflowCallback
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
+from ludwig.hyperopt.results import HyperoptResults
 from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import category_feature, generate_data, text_feature
@@ -32,7 +33,6 @@
     import ray
 
     from ludwig.hyperopt.execution import get_build_hyperopt_executor
-    from ludwig.hyperopt.results import RayTuneResults
 except ImportError:
     ray = None
 
@@ -282,7 +282,7 @@ def run_hyperopt(
     )
 
     # check for return results
-    assert isinstance(hyperopt_results, RayTuneResults)
+    assert isinstance(hyperopt_results, HyperoptResults)
 
     # check for existence of the hyperopt statistics file
     assert os.path.isfile(os.path.join(tmpdir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME))
diff --git a/tests/integration_tests/test_hyperopt_ray_horovod.py b/tests/integration_tests/test_hyperopt_ray_horovod.py
index 00d691d7381..2f9ee370781 100644
--- a/tests/integration_tests/test_hyperopt_ray_horovod.py
+++ b/tests/integration_tests/test_hyperopt_ray_horovod.py
@@ -24,6 +24,7 @@
 from ludwig.callbacks import Callback
 from ludwig.constants import ACCURACY, TRAINER
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
+from ludwig.hyperopt.results import HyperoptResults
 from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import binary_feature, create_data_set_to_use, generate_data, number_feature
@@ -41,7 +42,6 @@
 
     from ludwig.backend.ray import RayBackend
     from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor
-    from ludwig.hyperopt.results import RayTuneResults
 except ImportError:
     ray = None
     _ray_nightly = False
@@ -304,7 +304,7 @@ def run_hyperopt(
     )
 
     # check for return results
-    assert isinstance(hyperopt_results, RayTuneResults)
+    assert isinstance(hyperopt_results, HyperoptResults)
 
     # check for existence of the hyperopt statistics file
     assert os.path.isfile(os.path.join(out_dir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME))
diff --git a/tests/ludwig/hyperopt/test_hyperopt.py b/tests/ludwig/hyperopt/test_hyperopt.py
index 20ae9e0d039..76c852bdd11 100644
--- a/tests/ludwig/hyperopt/test_hyperopt.py
+++ b/tests/ludwig/hyperopt/test_hyperopt.py
@@ -1,35 +1,77 @@
 import pytest
 
-from ludwig.constants import ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, TYPE
-from ludwig.hyperopt.run import get_features_eligible_for_shared_params
+from ludwig.constants import INPUT_FEATURES, NAME, OUTPUT_FEATURES, TYPE
+from ludwig.hyperopt.utils import substitute_parameters
 
+BASE_CONFIG = {
+    INPUT_FEATURES: [{NAME: "title", TYPE: "text"}],
+    OUTPUT_FEATURES: [{NAME: "summary", TYPE: "text"}],
+}
 
-def _setup():
-    config = {
-        INPUT_FEATURES: [{NAME: "title", TYPE: "text"}],
-        OUTPUT_FEATURES: [{NAME: "summary", TYPE: "text"}],
-    }
-    return config
 
-
-def test_hyperopt_without_encoders_or_decoders():
-    config = _setup()
-    features_eligible_for_shared_params = {
-        INPUT_FEATURES: get_features_eligible_for_shared_params(config, INPUT_FEATURES),
-        OUTPUT_FEATURES: get_features_eligible_for_shared_params(config, OUTPUT_FEATURES),
-    }
-    assert features_eligible_for_shared_params[INPUT_FEATURES] == {"text": {"title"}}
-    assert features_eligible_for_shared_params[OUTPUT_FEATURES] == {"text": {"summary"}}
-
-
-@pytest.mark.parametrize("encoder", ["parallel_cnn", "stacked_cnn"])
-def test_hyperopt_default_encoder(encoder: str):
-    config = _setup()
-    config[INPUT_FEATURES][0][ENCODER] = {TYPE: encoder}
-    features_eligible_for_shared_params = get_features_eligible_for_shared_params(config, INPUT_FEATURES)
-    print(features_eligible_for_shared_params)
-    if encoder == "parallel_cnn":
-        assert features_eligible_for_shared_params == {"text": {"title"}}
-    else:
-        # When non-default encoder is passed, there should be no features eligible for shared params
-        assert features_eligible_for_shared_params == {}
+@pytest.mark.parametrize(
+    "parameters, expected",
+    [
+        (
+            {
+                "combiner.type": "tabnet",
+                "combiner.fc_layers": [{"output_size": 64}, {"output_size": 32}],
+                "trainer.learning_rate": 0.1,
+                "trainer.batch_size": 256,
+            },
+            {
+                **BASE_CONFIG,
+                "combiner": {"type": "tabnet", "fc_layers": [{"output_size": 64}, {"output_size": 32}]},
+                "trainer": {"learning_rate": 0.1, "batch_size": 256},
+            },
+        ),
+        (
+            {
+                "title.encoder.type": "bert",
+                "summary.decoder.reduce_input": "sum",
+                "trainer.learning_rate": 0.1,
+                "trainer.batch_size": 256,
+            },
+            {
+                INPUT_FEATURES: [{NAME: "title", TYPE: "text", "encoder": {"type": "bert"}}],
+                OUTPUT_FEATURES: [{NAME: "summary", TYPE: "text", "decoder": {"reduce_input": "sum"}}],
+                "trainer": {"learning_rate": 0.1, "batch_size": 256},
+            },
+        ),
+        (
+            {
+                ".": {
+                    "combiner": {"type": "concat", "num_fc_layers": 2},
+                    "trainer": {"learning_rate_scaling": "linear"},
+                },
+                "trainer.learning_rate": 0.1,
+            },
+            {
+                **BASE_CONFIG,
+                "combiner": {"type": "concat", "num_fc_layers": 2},
+                "trainer": {"learning_rate_scaling": "linear", "learning_rate": 0.1},
+            },
+        ),
+        (
+            {
+                ".": {
+                    "combiner": {"type": "concat", "num_fc_layers": 2},
+                    "trainer": {"learning_rate_scaling": "linear"},
+                },
+                "trainer": {
+                    "learning_rate": 0.1,
+                    "batch_size": 256,
+                },
+            },
+            {
+                **BASE_CONFIG,
+                "combiner": {"type": "concat", "num_fc_layers": 2},
+                "trainer": {"learning_rate_scaling": "linear", "learning_rate": 0.1, "batch_size": 256},
+            },
+        ),
+    ],
+    ids=["flat", "features", "nested", "multi-nested"],
+)
+def test_substitute_parameters(parameters, expected):
+    actual_config = substitute_parameters(BASE_CONFIG, parameters)
+    assert actual_config == expected