ludwig-ai · arnavgarg1 · Jan 21, 2023 · Jan 19, 2023 · Jan 19, 2023 · Jan 20, 2023
@@ -55,6 +55,7 @@
     input_config_registry,
     output_config_registry,
 )
+from ludwig.schema.hyperopt import HyperoptConfig
 from ludwig.schema.optimizers import get_optimizer_cls
 from ludwig.schema.preprocessing import PreprocessingConfig
 from ludwig.schema.split import get_split_cls
@@ -203,6 +204,7 @@ def __init__(self, config_dict: ModelConfigDict):
 
         # ===== Hyperopt =====
         self.hyperopt = upgraded_config_dict.get(HYPEROPT, {})
+
         self._set_hyperopt_defaults()
 
         # Set up default validation metric, which is used for plateau metrics and early stopping.
@@ -511,7 +513,11 @@ def _set_hyperopt_defaults(self):
         if not self.hyperopt:
             return
 
-        scheduler = self.hyperopt.get("executor", {}).get("scheduler")
+        # Convert hyperopt config to hyperopt schema to populate with schema defaults
+        # This fills in missing splits, executor config, search_alg, etc.
+        self.hyperopt = HyperoptConfig.Schema().load(self.hyperopt).to_dict()
+
+        scheduler = self.hyperopt.get("executor", {}).get("scheduler", {})
         if not scheduler:
             return
 

@@ -557,3 +557,34 @@ def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster_7cpu):
             assert trial_config[TRAINER]["learning_rate_scaling"] == "linear"
 
         assert trial_config[TRAINER]["learning_rate"] in {0.7, 0.42}
+
+
+def test_hyperopt_without_config_defaults(csv_filename, tmpdir, ray_cluster_7cpu):
+    input_features = [category_feature(encoder={"vocab_size": 3})]
+    output_features = [category_feature(decoder={"vocab_size": 3})]
+
+    rel_path = generate_data(input_features, output_features, csv_filename)
+
+    config = {
+        INPUT_FEATURES: input_features,
+        OUTPUT_FEATURES: output_features,
+        COMBINER: {TYPE: "concat"},
+        TRAINER: {"train_steps": 5, "learning_rate": 0.001, BATCH_SIZE: 128},
+        # Missing search_alg and executor, but should still work
+        HYPEROPT: {
+            "parameters": {
+                "trainer.learning_rate": {
+                    "lower": 0.0001,
+                    "upper": 0.01,
+                    "space": "loguniform",
+                }
+            },
+            "goal": "minimize",
+            "output_feature": output_features[0]["name"],
+            "validation_metrics": "loss",
+        },
+    }
+
+    experiment_name = f"test_hyperopt_{uuid.uuid4().hex}"
+    hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name=experiment_name)
+    assert hyperopt_results.experiment_analysis.results_df.shape[0] == 10