From d8a0d8f1ace6a546d6d1875aa604b84e386c6ee1 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Mon, 10 Oct 2022 17:52:16 -0700
Subject: [PATCH 01/29] Fixed hyperopt trial syncing to remote filesystems for
 Ray 2.0 (#2617)

---
 ludwig/hyperopt/execution.py             | 14 +++---
 ludwig/hyperopt/syncer.py                | 34 +++++++++++++
 ludwig/utils/fs_utils.py                 | 10 ++++
 tests/integration_tests/test_hyperopt.py | 62 +++++++++++-------------
 tests/integration_tests/test_remote.py   | 26 +++-------
 tests/integration_tests/utils.py         | 21 ++++++++
 6 files changed, 109 insertions(+), 58 deletions(-)
 create mode 100644 ludwig/hyperopt/syncer.py

diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py
index 04e57990156..90964c1ab2e 100644
--- a/ludwig/hyperopt/execution.py
+++ b/ludwig/hyperopt/execution.py
@@ -44,11 +44,11 @@
 if _ray_200:
     from ray.air import Checkpoint
     from ray.tune.search import SEARCH_ALG_IMPORT
-    from ray.tune.syncer import get_node_to_storage_syncer, SyncConfig
+
+    from ludwig.hyperopt.syncer import RemoteSyncer
 else:
     from ray.ml import Checkpoint
     from ray.tune.suggest import SEARCH_ALG_IMPORT
-    from ray.tune.syncer import get_cloud_sync_client
 
 
 logger = logging.getLogger(__name__)
@@ -778,12 +778,14 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None):
             )
 
         if has_remote_protocol(output_directory):
-            run_experiment_trial = tune.durable(run_experiment_trial)
-            self.sync_config = tune.SyncConfig(sync_to_driver=False, upload_dir=output_directory)
             if _ray_200:
-                self.sync_client = get_node_to_storage_syncer(SyncConfig(upload_dir=output_directory))
+                self.sync_client = RemoteSyncer()
+                self.sync_config = tune.SyncConfig(upload_dir=output_directory, syncer=self.sync_client)
             else:
-                self.sync_client = get_cloud_sync_client(output_directory)
+                raise ValueError(
+                    "Syncing to remote filesystems with hyperopt is not supported with ray<2.0, "
+                    "please upgrade to ray>=2.0"
+                )
             output_directory = None
         elif self.kubernetes_namespace:
             from ray.tune.integration.kubernetes import KubernetesSyncClient, NamespacedKubernetesSyncer
diff --git a/ludwig/hyperopt/syncer.py b/ludwig/hyperopt/syncer.py
new file mode 100644
index 00000000000..561b3048587
--- /dev/null
+++ b/ludwig/hyperopt/syncer.py
@@ -0,0 +1,34 @@
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from ray.tune.syncer import _BackgroundSyncer
+
+from ludwig.utils.data_utils import use_credentials
+from ludwig.utils.fs_utils import delete, download, upload
+
+
+class RemoteSyncer(_BackgroundSyncer):
+    def __init__(self, sync_period: float = 300.0, creds: Optional[Dict[str, Any]] = None):
+        super().__init__(sync_period=sync_period)
+        self.creds = creds
+
+    def _sync_up_command(self, local_path: str, uri: str, exclude: Optional[List] = None) -> Tuple[Callable, Dict]:
+        with use_credentials(self.creds):
+            return upload, dict(lpath=local_path, rpath=uri)
+
+    def _sync_down_command(self, uri: str, local_path: str) -> Tuple[Callable, Dict]:
+        with use_credentials(self.creds):
+            return download, dict(rpath=uri, lpath=local_path)
+
+    def _delete_command(self, uri: str) -> Tuple[Callable, Dict]:
+        with use_credentials(self.creds):
+            return delete, dict(url=uri, recursive=True)
+
+    def __reduce__(self):
+        """We need this custom serialization because we can't pickle thread.lock objects that are used by the
+        use_credentials context manager.
+
+        https://docs.ray.io/en/latest/ray-core/objects/serialization.html#customized-serialization
+        """
+        deserializer = RemoteSyncer
+        serialized_data = (self.sync_period, self.creds)
+        return deserializer, serialized_data
diff --git a/ludwig/utils/fs_utils.py b/ludwig/utils/fs_utils.py
index afcf6423afd..116d2429f8a 100644
--- a/ludwig/utils/fs_utils.py
+++ b/ludwig/utils/fs_utils.py
@@ -202,6 +202,16 @@ def delete(url, recursive=False):
     return fs.delete(path, recursive=recursive)
 
 
+def upload(lpath, rpath):
+    fs, path = get_fs_and_path(rpath)
+    pyarrow.fs.copy_files(lpath, path, destination_filesystem=pyarrow.fs.PyFileSystem(pyarrow.fs.FSSpecHandler(fs)))
+
+
+def download(rpath, lpath):
+    fs, path = get_fs_and_path(rpath)
+    pyarrow.fs.copy_files(path, lpath, source_filesystem=pyarrow.fs.PyFileSystem(pyarrow.fs.FSSpecHandler(fs)))
+
+
 def checksum(url):
     fs, path = get_fs_and_path(url)
     return fs.checksum(path)
diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py
index 76d717d6ce9..ebadd93ed0e 100644
--- a/tests/integration_tests/test_hyperopt.py
+++ b/tests/integration_tests/test_hyperopt.py
@@ -15,7 +15,8 @@
 import contextlib
 import json
 import os.path
-from typing import Any, Dict, Optional, Tuple, Union
+import uuid
+from typing import Any, Dict, Optional, Tuple
 
 import pytest
 import torch
@@ -39,23 +40,21 @@
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.hyperopt.results import HyperoptResults
 from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
+from ludwig.utils import fs_utils
 from ludwig.utils.data_utils import load_json
 from ludwig.utils.defaults import merge_with_defaults
-from tests.integration_tests.utils import category_feature, generate_data, text_feature
+from tests.integration_tests.utils import category_feature, generate_data, private_param, remote_tmpdir, text_feature
 
-try:
-    import ray
+ray = pytest.importorskip("ray")
 
-    from ludwig.hyperopt.execution import get_build_hyperopt_executor
+from ludwig.hyperopt.execution import get_build_hyperopt_executor  # noqa
 
-    _ray113 = version.parse(ray.__version__) > version.parse("1.13")
+_ray200 = version.parse(ray.__version__) >= version.parse("2.0")
 
-except ImportError:
-    ray = None
-    _ray113 = None
+pytestmark = pytest.mark.distributed
 
 
-RANDOM_SEARCH_SIZE = 4
+RANDOM_SEARCH_SIZE = 2
 
 HYPEROPT_CONFIG = {
     "parameters": {
@@ -165,18 +164,6 @@ def _setup_ludwig_config_with_shared_params(dataset_fp: str) -> Tuple[Dict, Any]
     return config, rel_path, num_filters_search_space, embedding_size_search_space, reduce_input_search_space
 
 
-def _get_trial_parameter_value(parameter_key: str, trial_row: str) -> Union[str, None]:
-    """Returns the parameter value from the Ray trial row, which has slightly different column names depending on
-    the version of Ray. Returns None if the parameter key is not found.
-
-    TODO(#2176): There are different key name delimiters depending on Ray version. The delimiter in future versions of
-    Ray (> 1.13) will be '/' instead of '.' Simplify this as Ray is upgraded.
-    """
-    if _ray113:
-        return trial_row[f"config/{parameter_key}"]
-    return trial_row[f"config.{parameter_key}"]
-
-
 @contextlib.contextmanager
 def ray_start(num_cpus: Optional[int] = None, num_gpus: Optional[int] = None):
     res = ray.init(
@@ -198,7 +185,6 @@ def ray_cluster():
         yield
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize("search_alg", SEARCH_ALGS_FOR_TESTING)
 def test_hyperopt_search_alg(
     search_alg, csv_filename, tmpdir, ray_cluster, validate_output_feature=False, validation_metric=None
@@ -249,7 +235,6 @@ def test_hyperopt_search_alg(
         assert isinstance(path, str)
 
 
-@pytest.mark.distributed
 def test_hyperopt_executor_with_metric(csv_filename, tmpdir, ray_cluster):
     test_hyperopt_search_alg(
         "variant_generator",
@@ -261,7 +246,6 @@ def test_hyperopt_executor_with_metric(csv_filename, tmpdir, ray_cluster):
     )
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize("scheduler", SCHEDULERS_FOR_TESTING)
 def test_hyperopt_scheduler(
     scheduler, csv_filename, tmpdir, ray_cluster, validate_output_feature=False, validation_metric=None
@@ -316,7 +300,6 @@ def test_hyperopt_scheduler(
         assert isinstance(raytune_results, HyperoptResults)
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize("search_space", ["random", "grid"])
 def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
     input_features = [
@@ -370,14 +353,19 @@ def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
         "goal": "minimize",
         "output_feature": output_feature_name,
         "validation_metrics": "loss",
-        "executor": {TYPE: "ray", "num_samples": 1 if search_space == "grid" else RANDOM_SEARCH_SIZE},
+        "executor": {
+            TYPE: "ray",
+            "num_samples": 1 if search_space == "grid" else RANDOM_SEARCH_SIZE,
+            "max_concurrent_trials": 1,
+        },
         "search_alg": {TYPE: "variant_generator"},
     }
 
     # add hyperopt parameter space to the config
     config[HYPEROPT] = hyperopt_configs
 
-    hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt")
+    experiment_name = f"test_hyperopt_{uuid.uuid4().hex}"
+    hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name=experiment_name)
     if search_space == "random":
         assert hyperopt_results.experiment_analysis.results_df.shape[0] == RANDOM_SEARCH_SIZE
     else:
@@ -391,10 +379,21 @@ def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
     assert isinstance(hyperopt_results, HyperoptResults)
 
     # check for existence of the hyperopt statistics file
-    assert os.path.isfile(os.path.join(tmpdir, "test_hyperopt", HYPEROPT_STATISTICS_FILE_NAME))
+    assert fs_utils.path_exists(os.path.join(tmpdir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME))
+
+
+@pytest.mark.parametrize("fs_protocol,bucket", [private_param(("s3", "ludwig-tests"))], ids=["s3"])
+def test_hyperopt_sync_remote(fs_protocol, bucket, csv_filename, ray_cluster):
+    with remote_tmpdir(fs_protocol, bucket) as tmpdir:
+        with pytest.raises(ValueError) if not _ray200 else contextlib.nullcontext():
+            test_hyperopt_run_hyperopt(
+                csv_filename,
+                "random",
+                tmpdir,
+                ray_cluster,
+            )
 
 
-@pytest.mark.distributed
 def test_hyperopt_with_feature_specific_parameters(csv_filename, tmpdir, ray_cluster):
     input_features = [
         text_feature(name="utterance", reduce_output="sum"),
@@ -446,7 +445,6 @@ def test_hyperopt_with_feature_specific_parameters(csv_filename, tmpdir, ray_clu
             assert input_feature["encoder"]["embedding_size"] in embedding_size_search_space
 
 
-@pytest.mark.distributed
 def test_hyperopt_old_config(csv_filename, tmpdir, ray_cluster):
     old_config = {
         "ludwig_version": "0.4",
@@ -500,7 +498,6 @@ def test_hyperopt_old_config(csv_filename, tmpdir, ray_cluster):
     hyperopt(old_config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt")
 
 
-@pytest.mark.distributed
 def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster):
     config = {
         INPUT_FEATURES: [
@@ -591,7 +588,6 @@ def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster):
         assert trial_config[TRAINER]["learning_rate"] in {0.7, 0.42}
 
 
-@pytest.mark.distributed
 def test_hyperopt_grid_search_more_than_one_sample(csv_filename, tmpdir, ray_cluster):
     input_features = [
         text_feature(name="utterance", encoder={"reduce_output": "sum"}),
diff --git a/tests/integration_tests/test_remote.py b/tests/integration_tests/test_remote.py
index 020d755970f..debf1f18004 100644
--- a/tests/integration_tests/test_remote.py
+++ b/tests/integration_tests/test_remote.py
@@ -1,7 +1,4 @@
-import contextlib
 import os
-import tempfile
-import uuid
 
 import pytest
 import yaml
@@ -11,22 +8,13 @@
 from ludwig.constants import TRAINER
 from ludwig.globals import DESCRIPTION_FILE_NAME
 from ludwig.utils import fs_utils
-from tests.integration_tests.utils import category_feature, generate_data, private_param, sequence_feature
-
-
-@contextlib.contextmanager
-def remote_tmpdir(fs_protocol, bucket):
-    if bucket is None:
-        with tempfile.TemporaryDirectory() as tmpdir:
-            yield f"{fs_protocol}://{tmpdir}"
-        return
-
-    prefix = f"tmp_{uuid.uuid4().hex}"
-    tmpdir = f"{fs_protocol}://{bucket}/{prefix}"
-    try:
-        yield tmpdir
-    finally:
-        fs_utils.delete(tmpdir, recursive=True)
+from tests.integration_tests.utils import (
+    category_feature,
+    generate_data,
+    private_param,
+    remote_tmpdir,
+    sequence_feature,
+)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py
index cc9110bf283..64a9b9b4d48 100644
--- a/tests/integration_tests/utils.py
+++ b/tests/integration_tests/utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 
+import contextlib
 import logging
 import multiprocessing
 import os
@@ -40,6 +41,7 @@
 from ludwig.experiment import experiment_cli
 from ludwig.features.feature_utils import compute_feature_hash
 from ludwig.trainers.trainer import Trainer
+from ludwig.utils import fs_utils
 from ludwig.utils.data_utils import read_csv, replace_file_extension
 
 logger = logging.getLogger(__name__)
@@ -872,3 +874,22 @@ def filter(stats):
     finally:
         # Remove results/intermediate data saved to disk
         shutil.rmtree(output_dir, ignore_errors=True)
+
+
+@contextlib.contextmanager
+def remote_tmpdir(fs_protocol, bucket):
+    if bucket is None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            yield f"{fs_protocol}://{tmpdir}"
+        return
+
+    prefix = f"tmp_{uuid.uuid4().hex}"
+    tmpdir = f"{fs_protocol}://{bucket}/{prefix}"
+    try:
+        yield tmpdir
+    finally:
+        try:
+            fs_utils.delete(tmpdir, recursive=True)
+        except FileNotFoundError as e:
+            logging.info(f"failed to delete remote tempdir, does not exist: {str(e)}")
+            pass

From c61380a9e5687891cd25847b9d3b250ea20c4edd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 10 Oct 2022 18:10:48 -0700
Subject: [PATCH 02/29] [pre-commit.ci] pre-commit suggestions (#2622)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/pyupgrade: v2.38.2 → v3.1.0](https://github.com/asottile/pyupgrade/compare/v2.38.2...v3.1.0)
- [github.com/psf/black: 22.8.0 → 22.10.0](https://github.com/psf/black/compare/22.8.0...22.10.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index abd03ca56e5..8261e76b058 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,7 +28,7 @@ repos:
       - id: trailing-whitespace
       - id: mixed-line-ending
   - repo: https://github.com/asottile/pyupgrade
-    rev: v2.38.2
+    rev: v3.1.0
     hooks:
       - id: pyupgrade
         args: [--py36-plus]
@@ -47,7 +47,7 @@ repos:
     hooks:
       - id: flake8
   - repo: https://github.com/psf/black
-    rev: 22.8.0
+    rev: 22.10.0
     hooks:
       - id: black
         name: Format code

From 5f2816af82d7f8ed48b1a46549d758417c0931b3 Mon Sep 17 00:00:00 2001
From: Geoffrey Angus <geoffrey@predibase.com>
Date: Tue, 11 Oct 2022 12:23:39 -0500
Subject: [PATCH 03/29] feat: adds `max_batch_size` to auto batch size
 functionality (#2579)

* updates dask version; adds pyarrow schema to to_parquet; adds unit tests

* add non-NaN test for date and vector

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* changed schema from type dict to pa.Schema

* unpin dask entirely

* re-organized ray tests and created linear issues

* cleanup

* revert ray_start change

* typo

* adds batch size tuning ceiling; wip needs to be added to the schema

* first cut at adding the parameter to the config

* Changed param name to be `auto_batch_size_ceiling` in trainer to match schema

* remove unused import

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix test

* rename auto_batch_size to max_batch_size; WIP unify with plateau param

* update test

* ceiling=>max batch size

* unify increase_batch_size_on_plateau_max with max_batch_size

* use self.max_batch_size

* changed max_batch_size max to sys.maxsize

* updated parameter metadata

* typo

* fix test default for max_batch_size

* PR revision

* pr revision: adds backward compatibility hook for batch size

* cleanup

* added one more relevant param to max_batch_size schema

* PR revisions: adding integer max possible batch size

* typo

* tighter requirement for schema type hint

* comment description of constant

* PR revisions

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/constants.py                           |  3 ++
 ludwig/schema/metadata/trainer_metadata.py    | 25 ++++++++--------
 ludwig/schema/trainer.py                      | 29 ++++++++++++++-----
 ludwig/trainers/trainer.py                    | 16 +++++++---
 ludwig/utils/backward_compatibility.py        | 24 +++++++++++++--
 .../utils/test_backward_compatibility.py      | 19 ++++++++++++
 tests/ludwig/utils/test_defaults.py           |  3 +-
 7 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/ludwig/constants.py b/ludwig/constants.py
index 365eb62281d..ad69fa4856d 100644
--- a/ludwig/constants.py
+++ b/ludwig/constants.py
@@ -164,6 +164,9 @@
 BATCH_SIZE = "batch_size"
 EVAL_BATCH_SIZE = "eval_batch_size"
 DEFAULT_BATCH_SIZE = 128
+MAX_POSSIBLE_BATCH_SIZE = (
+    1099511627776  # 2^40. Used for `max_batch_size` config param. Not a hard constraint for `batch_size` config param.
+)
 LEARNING_RATE = "learning_rate"
 USE_BIAS = "use_bias"
 BIAS = "bias"
diff --git a/ludwig/schema/metadata/trainer_metadata.py b/ludwig/schema/metadata/trainer_metadata.py
index 11640d7a08a..e82dc505b95 100644
--- a/ludwig/schema/metadata/trainer_metadata.py
+++ b/ludwig/schema/metadata/trainer_metadata.py
@@ -24,6 +24,19 @@
                                      expected_impact=ExpectedImpact.HIGH,
                                      literature_references=None,
                                      internal_only=False),
+      'max_batch_size': ParameterMetadata(ui_display_name='Max Batch Size',
+                                                  default_value_reasoning='Not typically required.',
+                                                  example_value=1024,
+                                                  related_parameters=['batch_size', 'increase_batch_size_on_plateau'],
+                                                  description_implications='Value used to manually limit the batch '
+                                                                           'sizes explored by auto batch size tuning '
+                                                                           'and batch size increasing on plateau.',
+                                                  suggested_values=None,
+                                                  suggested_values_reasoning=None,
+                                                  commonly_used=False,
+                                                  expected_impact=ExpectedImpact.MEDIUM,
+                                                  literature_references=None,
+                                                  internal_only=False),
      'checkpoints_per_epoch': ParameterMetadata(ui_display_name='Checkpoints per epoch',
                                                 default_value_reasoning='Per-epoch behavior, which scales according '
                                                                         'to the '
@@ -361,18 +374,6 @@
                                                          expected_impact=ExpectedImpact.MEDIUM,
                                                          literature_references=None,
                                                          internal_only=False),
-     'increase_batch_size_on_plateau_max': ParameterMetadata(ui_display_name='Batch Size Increase On Plateau: Cap',
-                                                             default_value_reasoning=None,
-                                                             example_value=None,
-                                                             related_parameters=None,
-                                                             other_information=None,
-                                                             description_implications=None,
-                                                             suggested_values=None,
-                                                             suggested_values_reasoning=None,
-                                                             commonly_used=False,
-                                                             expected_impact=ExpectedImpact.MEDIUM,
-                                                             literature_references=None,
-                                                             internal_only=False),
      'increase_batch_size_on_plateau_patience': ParameterMetadata(ui_display_name='Batch Size Increase On Plateau: '
                                                                                   'Patience',
                                                                   default_value_reasoning=None,
diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py
index 8ba0cf29eeb..fb1a8047af8 100644
--- a/ludwig/schema/trainer.py
+++ b/ludwig/schema/trainer.py
@@ -3,7 +3,16 @@
 
 from marshmallow_dataclass import dataclass
 
-from ludwig.constants import COMBINED, DEFAULT_BATCH_SIZE, LOSS, MODEL_ECD, MODEL_GBM, TRAINING, TYPE
+from ludwig.constants import (
+    COMBINED,
+    DEFAULT_BATCH_SIZE,
+    LOSS,
+    MAX_POSSIBLE_BATCH_SIZE,
+    MODEL_ECD,
+    MODEL_GBM,
+    TRAINING,
+    TYPE,
+)
 from ludwig.schema import utils as schema_utils
 from ludwig.schema.metadata.trainer_metadata import TRAINER_METADATA
 from ludwig.schema.optimizers import (
@@ -158,6 +167,16 @@ class ECDTrainerConfig(BaseTrainerConfig):
         ],
     )
 
+    max_batch_size: int = schema_utils.PositiveInteger(
+        default=MAX_POSSIBLE_BATCH_SIZE,
+        allow_none=True,
+        description=(
+            "Auto batch size tuning and increasing batch size on plateau will be capped at this value. The default "
+            "value is 2^40."
+        ),
+        parameter_metadata=TRAINER_METADATA["max_batch_size"],
+    )
+
     steps_per_checkpoint: int = schema_utils.NonNegativeInteger(
         default=0,
         description=(
@@ -215,7 +234,7 @@ class ECDTrainerConfig(BaseTrainerConfig):
 
     increase_batch_size_on_plateau: int = schema_utils.NonNegativeInteger(
         default=0,
-        description="Number to increase the batch size by on a plateau.",
+        description="The number of times to increase the batch size on a plateau.",
         parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau"],
     )
 
@@ -231,12 +250,6 @@ class ECDTrainerConfig(BaseTrainerConfig):
         parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_rate"],
     )
 
-    increase_batch_size_on_plateau_max: int = schema_utils.PositiveInteger(
-        default=512,
-        description="Maximum size of the batch.",
-        parameter_metadata=TRAINER_METADATA["increase_batch_size_on_plateau_max"],
-    )
-
     increase_batch_size_eval_metric: str = schema_utils.String(
         default=LOSS,
         description="Which metric to listen on for increasing the batch size.",
diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py
index a0dafe68d05..ac3933ec3ed 100644
--- a/ludwig/trainers/trainer.py
+++ b/ludwig/trainers/trainer.py
@@ -143,6 +143,7 @@ def __init__(
         self.decay_steps = config.decay_steps
         self.staircase = config.staircase
         self.batch_size = config.batch_size
+        self.max_batch_size = config.max_batch_size
         self.eval_batch_size = config.batch_size if config.eval_batch_size is None else config.eval_batch_size
         self.should_shuffle = config.should_shuffle
         self._validation_field = config.validation_field
@@ -159,7 +160,6 @@ def __init__(
         self.increase_batch_size_on_plateau = config.increase_batch_size_on_plateau
         self.increase_batch_size_on_plateau_patience = config.increase_batch_size_on_plateau_patience
         self.increase_batch_size_on_plateau_rate = config.increase_batch_size_on_plateau_rate
-        self.increase_batch_size_on_plateau_max = config.increase_batch_size_on_plateau_max
         self.increase_batch_size_eval_metric = config.increase_batch_size_eval_metric
         self.increase_batch_size_eval_split = config.increase_batch_size_eval_split
         self.learning_rate_warmup_epochs = config.learning_rate_warmup_epochs
@@ -480,7 +480,15 @@ def tune_batch_size(
 
             def _is_valid_batch_size(batch_size):
                 # make sure that batch size is valid (e.g. less than size of ds)
-                return batch_size < len(training_set)
+                is_smaller_than_training_set = batch_size < len(training_set)
+                is_under_max_batch_size = batch_size <= self.max_batch_size
+                is_valid = is_smaller_than_training_set and is_under_max_batch_size
+                if not is_valid:
+                    logger.info(
+                        f"Batch size {batch_size} is invalid, must be smaller than training set size "
+                        f"{len(training_set)} and less than or equal to max batch size {self.max_batch_size}"
+                    )
+                return is_valid
 
             # TODO (ASN) : Circle back on how we want to set default placeholder value
             # Currently, since self.batch_size is originally set to auto, we provide a
@@ -652,7 +660,7 @@ def run_evaluation(
                 self.increase_batch_size_on_plateau,
                 self.increase_batch_size_on_plateau_patience,
                 self.increase_batch_size_on_plateau_rate,
-                self.increase_batch_size_on_plateau_max,
+                self.max_batch_size,
                 self.increase_batch_size_eval_metric,
                 self.increase_batch_size_eval_split,
                 early_stopping_steps,
@@ -1352,7 +1360,7 @@ def increase_batch_size(
                     )
                 ):
                     progress_tracker.batch_size = min(
-                        (increase_batch_size_on_plateau_rate * progress_tracker.batch_size),
+                        int(increase_batch_size_on_plateau_rate * progress_tracker.batch_size),
                         increase_batch_size_on_plateau_max,
                     )
 
diff --git a/ludwig/utils/backward_compatibility.py b/ludwig/utils/backward_compatibility.py
index 5f4fded5ef7..657b465891e 100644
--- a/ludwig/utils/backward_compatibility.py
+++ b/ludwig/utils/backward_compatibility.py
@@ -598,15 +598,35 @@ def upgrade_missing_value_strategy(config: Dict[str, Any]) -> Dict[str, Any]:
     return config
 
 
+@register_config_transformation("0.6", ["trainer"])
+def _upgrade_max_batch_size(trainer: Dict[str, Any]) -> Dict[str, Any]:
+    if "increase_batch_size_on_plateau_max" in trainer:
+        warnings.warn(
+            'Config param "increase_batch_size_on_plateau_max" renamed to "max_batch_size" and will be '
+            "removed in v0.8",
+            DeprecationWarning,
+        )
+        increase_batch_size_on_plateau_max_val = trainer.pop("increase_batch_size_on_plateau_max")
+        if "max_batch_size" in trainer:
+            warnings.warn('"max_batch_size" config param already set. Discarding "increase_batch_size_on_plateau_max".')
+        else:
+            warnings.warn(
+                f'Setting "max_batch_size" config param to "increase_batch_size_on_plateau_max" value '
+                f'({increase_batch_size_on_plateau_max_val}) and discarding "increase_batch_size_on_plateau_max"'
+            )
+            trainer["max_batch_size"] = increase_batch_size_on_plateau_max_val
+    return trainer
+
+
 def upgrade_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]:
     # TODO(travis): stopgap solution, we should make it so we don't need to do this
     # by decoupling config and metadata
     metadata = copy.deepcopy(metadata)
-    _upgrade_metadata_mising_values(metadata)
+    _upgrade_metadata_missing_values(metadata)
     return metadata
 
 
-def _upgrade_metadata_mising_values(metadata: Dict[str, Any]):
+def _upgrade_metadata_missing_values(metadata: Dict[str, Any]):
     for k, v in metadata.items():
         if isinstance(v, dict) and _is_old_missing_value_strategy(v):
             _update_old_missing_value_strategy(v)
diff --git a/tests/ludwig/utils/test_backward_compatibility.py b/tests/ludwig/utils/test_backward_compatibility.py
index fcd388e007b..6db0df0b9d4 100644
--- a/tests/ludwig/utils/test_backward_compatibility.py
+++ b/tests/ludwig/utils/test_backward_compatibility.py
@@ -509,6 +509,25 @@ def test_update_missing_value_strategy(missing_value_strategy: str):
     assert updated_config == expected_config
 
 
+def test_update_increase_batch_size_on_plateau_max():
+    old_valid_config = {
+        "input_features": [{"name": "input_feature_1", "type": "category"}],
+        "output_features": [{"name": "output_feature_1", "type": "category"}],
+        "trainer": {
+            "increase_batch_size_on_plateau_max": 256,
+        },
+    }
+
+    updated_config = upgrade_to_latest_version(old_valid_config)
+    del updated_config["ludwig_version"]
+
+    expected_config = copy.deepcopy(old_valid_config)
+    del expected_config["trainer"]["increase_batch_size_on_plateau_max"]
+    expected_config["trainer"]["max_batch_size"] = 256
+
+    assert updated_config == expected_config
+
+
 def test_old_class_weights_default():
     old_config = {
         "input_features": [
diff --git a/tests/ludwig/utils/test_defaults.py b/tests/ludwig/utils/test_defaults.py
index 73b69a00c72..2ef2a8a9a9a 100644
--- a/tests/ludwig/utils/test_defaults.py
+++ b/tests/ludwig/utils/test_defaults.py
@@ -14,6 +14,7 @@
     FILL_WITH_MODE,
     HYPEROPT,
     INPUT_FEATURES,
+    MAX_POSSIBLE_BATCH_SIZE,
     MISSING_VALUE_STRATEGY,
     MODEL_ECD,
     MODEL_GBM,
@@ -429,6 +430,7 @@ def test_merge_with_defaults():
             "regularization_type": "l2",
             "should_shuffle": True,
             "batch_size": 128,
+            "max_batch_size": MAX_POSSIBLE_BATCH_SIZE,
             "steps_per_checkpoint": 0,
             "checkpoints_per_epoch": 0,
             "reduce_learning_rate_on_plateau": 0.0,
@@ -439,7 +441,6 @@ def test_merge_with_defaults():
             "increase_batch_size_on_plateau": 0,
             "increase_batch_size_on_plateau_patience": 5,
             "increase_batch_size_on_plateau_rate": 2.0,
-            "increase_batch_size_on_plateau_max": 512,
             "increase_batch_size_eval_metric": "loss",
             "increase_batch_size_eval_split": "training",
             "decay": False,

From e832a7ab11a5ae8cc86e7fd6fd5055835face0f9 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Tue, 11 Oct 2022 13:03:45 -0700
Subject: [PATCH 04/29] Set commonly used parameters (#2619)

* Set commonly used parameters

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* revert title change

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/schema/combiners/utils.py            |   5 +
 ludwig/schema/metadata/combiner_metadata.py |  39 +++--
 ludwig/schema/metadata/decoder_metadata.py  |   2 +-
 ludwig/schema/metadata/encoder_metadata.py  | 184 ++++++++++----------
 ludwig/schema/metadata/feature_metadata.py  |  36 ++--
 ludwig/schema/metadata/trainer_metadata.py  |  18 +-
 6 files changed, 151 insertions(+), 133 deletions(-)

diff --git a/ludwig/schema/combiners/utils.py b/ludwig/schema/combiners/utils.py
index 0093f62b1c9..d1f2edb338f 100644
--- a/ludwig/schema/combiners/utils.py
+++ b/ludwig/schema/combiners/utils.py
@@ -1,4 +1,7 @@
+from ludwig.constants import TYPE
 from ludwig.schema import utils as schema_utils
+from ludwig.schema.metadata.combiner_metadata import COMBINER_METADATA
+from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json
 from ludwig.utils.registry import Registry
 
 combiner_registry = Registry()
@@ -16,6 +19,7 @@ def get_combiner_jsonschema():
     """Returns a JSON schema structured to only require a `type` key and then conditionally apply a corresponding
     combiner's field constraints."""
     combiner_types = sorted(list(combiner_registry.keys()))
+    parameter_metadata = convert_metadata_to_json(COMBINER_METADATA[TYPE])
     return {
         "type": "object",
         "properties": {
@@ -25,6 +29,7 @@ def get_combiner_jsonschema():
                 "default": "concat",
                 "title": "type",
                 "description": "Select the combiner type.",
+                "parameter_metadata": parameter_metadata,
             },
         },
         "allOf": get_combiner_conds(),
diff --git a/ludwig/schema/metadata/combiner_metadata.py b/ludwig/schema/metadata/combiner_metadata.py
index 1838b878a9d..7f7ebf5519b 100644
--- a/ludwig/schema/metadata/combiner_metadata.py
+++ b/ludwig/schema/metadata/combiner_metadata.py
@@ -1,6 +1,19 @@
 from ludwig.schema.metadata.parameter_metadata import ExpectedImpact, ParameterMetadata
 
 COMBINER_METADATA = {
+    "type": ParameterMetadata(
+        ui_display_name="Combiner Type",
+        default_value_reasoning=None,
+        example_value=None,
+        related_parameters=None,
+        other_information=None,
+        description_implications=None,
+        suggested_values_reasoning=None,
+        commonly_used=True,
+        expected_impact=ExpectedImpact.HIGH,
+        literature_references=None,
+        internal_only=False,
+    ),
     "ComparatorCombiner": {
         "activation": ParameterMetadata(
             ui_display_name="Activation",
@@ -61,7 +74,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -143,7 +156,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -327,7 +340,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -395,7 +408,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -593,7 +606,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -647,7 +660,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -955,7 +968,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1156,7 +1169,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1194,7 +1207,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1272,7 +1285,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -1512,7 +1525,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1550,7 +1563,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1628,7 +1641,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
diff --git a/ludwig/schema/metadata/decoder_metadata.py b/ludwig/schema/metadata/decoder_metadata.py
index c56d51adfb0..2908e91973c 100644
--- a/ludwig/schema/metadata/decoder_metadata.py
+++ b/ludwig/schema/metadata/decoder_metadata.py
@@ -479,7 +479,7 @@
             "but the higher dimensionality can also improve overall quality.",
             suggested_values="128 - 2048",
             suggested_values_reasoning="Try models with smaller or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=None,
             internal_only=False,
diff --git a/ludwig/schema/metadata/encoder_metadata.py b/ludwig/schema/metadata/encoder_metadata.py
index 1148a6d22c0..8cf3539103c 100644
--- a/ludwig/schema/metadata/encoder_metadata.py
+++ b/ludwig/schema/metadata/encoder_metadata.py
@@ -18,7 +18,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -52,7 +52,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -78,7 +78,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -136,7 +136,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -359,7 +359,7 @@
             "typically generalize well. Consider deviating from the default only if the "
             "text in the dataset originates from another domain (e.g. languages other than "
             "English).",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://arxiv.org/abs/1909.11942"],
             internal_only=False,
@@ -583,7 +583,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -603,7 +603,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -658,7 +658,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1008,7 +1008,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1034,7 +1034,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -1147,7 +1147,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -1634,7 +1634,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1654,7 +1654,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -1707,7 +1707,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -2014,7 +2014,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -2057,7 +2057,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -2146,7 +2146,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -2189,7 +2189,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -2321,7 +2321,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -2347,7 +2347,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -2425,7 +2425,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -2620,7 +2620,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -2674,7 +2674,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -2869,7 +2869,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -2918,7 +2918,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -3078,7 +3078,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3112,7 +3112,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3254,7 +3254,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3305,7 +3305,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3397,7 +3397,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3417,7 +3417,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3443,7 +3443,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -3486,7 +3486,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3792,7 +3792,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -3840,7 +3840,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -4910,7 +4910,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -4936,7 +4936,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -5014,7 +5014,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -5264,7 +5264,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -5290,7 +5290,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -5367,7 +5367,7 @@
             description_implications=None,
             suggested_values="sigmoid, ReLu, tanh",
             suggested_values_reasoning=None,
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=None,
             internal_only=False,
@@ -5388,7 +5388,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -5544,7 +5544,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -5570,7 +5570,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -5648,7 +5648,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -6009,7 +6009,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -6198,7 +6198,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -6608,7 +6608,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -6634,7 +6634,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -6755,7 +6755,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -7137,7 +7137,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -7247,7 +7247,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -7622,7 +7622,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -7648,7 +7648,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -7904,7 +7904,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -7930,7 +7930,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -8022,7 +8022,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -8244,7 +8244,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -8367,7 +8367,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -8844,7 +8844,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -8870,7 +8870,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -8991,7 +8991,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -9421,7 +9421,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -9476,7 +9476,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -9502,7 +9502,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -9578,7 +9578,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -9661,7 +9661,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -9879,7 +9879,7 @@
             description_implications=None,
             suggested_values="sigmoid, ReLu, tanh",
             suggested_values_reasoning=None,
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=None,
             internal_only=False,
@@ -9900,7 +9900,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -10151,7 +10151,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -10177,7 +10177,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -10298,7 +10298,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -10659,7 +10659,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -10685,7 +10685,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -10761,7 +10761,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -10830,7 +10830,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -10951,7 +10951,7 @@
             description_implications=None,
             suggested_values="sigmoid, ReLu, tanh",
             suggested_values_reasoning=None,
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=None,
             internal_only=False,
@@ -10972,7 +10972,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -11173,7 +11173,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -11199,7 +11199,7 @@
             suggested_values="1.6 * sqrt(vocab_size)",
             suggested_values_reasoning="Rule of thumb suggested by a deep learning textbook. Try models with smaller "
             "or larger embedding sizes to observe relative impact.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.MEDIUM,
             literature_references=[
                 "https://developers.google.com/machine-learning/crash-course/embeddings/video-lecture"
@@ -11275,7 +11275,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -11363,7 +11363,7 @@
             "weights are updated. For example, batch normalization standardizes the inputs "
             "to a layer for each mini-batch. Try out different normalizations to see if "
             "that helps with training stability",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=[
                 "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -11684,7 +11684,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -12080,7 +12080,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -12428,7 +12428,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -12497,7 +12497,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -12651,7 +12651,7 @@
             "typically generalize well. Consider deviating from the default only if the "
             "images in the dataset originate from another domain (e.g. medical images, "
             "geospatial data).",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://arxiv.org/abs/2010.11929"],
             internal_only=False,
@@ -12707,7 +12707,7 @@
             suggested_values=[False],
             suggested_values_reasoning="If you have a large amount of data and/or you have data that differs from the "
             "typical distribution, then it might be worth training the model from scratch.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://machinelearningmastery.com/transfer-learning-for-deep-learning/"],
             internal_only=False,
@@ -12757,7 +12757,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -12819,7 +12819,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -13544,7 +13544,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
@@ -13822,7 +13822,7 @@
             suggested_values_reasoning="Tuning dropout is really something to be done when all of the big choices "
             "about architecture have been settled. Consider starting with 0.5 and "
             "adjusting the dropout depending on observed model performance.",
-            commonly_used=True,
+            commonly_used=False,
             expected_impact=ExpectedImpact.HIGH,
             literature_references=["https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html"],
             internal_only=False,
diff --git a/ludwig/schema/metadata/feature_metadata.py b/ludwig/schema/metadata/feature_metadata.py
index 0ac293c5904..894b3ed3f1e 100644
--- a/ludwig/schema/metadata/feature_metadata.py
+++ b/ludwig/schema/metadata/feature_metadata.py
@@ -167,7 +167,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -192,7 +192,7 @@
                 "inputs "
                 "to a layer for each mini-batch. Try out different normalizations to see if "
                 "that helps with training stability",
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.HIGH,
                 literature_references=[
                     "https://machinelearningmastery.com/batch-normalization-for-training-of-deep-neural-networks/"
@@ -289,7 +289,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -396,7 +396,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -463,7 +463,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -561,7 +561,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -614,7 +614,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -764,7 +764,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -893,7 +893,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -918,7 +918,7 @@
                 "feature "
                 "distributions have mean = 0 and std = 1. It’s useful when there are a few "
                 "outliers, but not so extreme that you need clipping.",
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.HIGH,
                 literature_references=[
                     "https://developers.google.com/machine-learning/data-prep/transform/normalization"
@@ -986,7 +986,7 @@
                 suggested_values="Use the lowest value that covers most of your input data. Only increase the value if "
                 "crucial parts of the input data are truncated.",
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.HIGH,
                 literature_references=None,
                 internal_only=False,
@@ -1007,7 +1007,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -1172,7 +1172,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -1280,7 +1280,7 @@
                 suggested_values="Use the lowest value that covers most of your input data. Only increase the value if "
                 "crucial parts of the input data are truncated.",
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.HIGH,
                 literature_references=None,
                 internal_only=False,
@@ -1301,7 +1301,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -1401,7 +1401,7 @@
                 "https://towardsdatascience.com/byte-pair-encoding-the-dark-horse-of-modern"
                 "-nlp-eb36c7df4f10 ). This tokenizer is language-agnostic and more "
                 "sophisticated than the default.",
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.HIGH,
                 literature_references=["https://huggingface.co/course/chapter2/4?fw=pt"],
                 internal_only=False,
@@ -1488,7 +1488,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
@@ -1597,7 +1597,7 @@
                 "high proportion of missing values in the dataset.",
                 suggested_values=None,
                 suggested_values_reasoning=None,
-                commonly_used=True,
+                commonly_used=False,
                 expected_impact=ExpectedImpact.UNKNOWN,
                 literature_references=None,
                 internal_only=False,
diff --git a/ludwig/schema/metadata/trainer_metadata.py b/ludwig/schema/metadata/trainer_metadata.py
index e82dc505b95..d9022d22dcb 100644
--- a/ludwig/schema/metadata/trainer_metadata.py
+++ b/ludwig/schema/metadata/trainer_metadata.py
@@ -84,7 +84,7 @@
                                                                            'a sub-epoch time scale, or every few '
                                                                            'thousand '
                                                                            'steps.',
-                                                commonly_used=True,
+                                                commonly_used=False,
                                                 expected_impact=ExpectedImpact.HIGH,
                                                 literature_references=None,
                                                 internal_only=False),
@@ -119,7 +119,7 @@
                                                            'a learning rate schedule can give better training '
                                                            'performance '
                                                            'and make the model converge faster',
-                                commonly_used=True,
+                                commonly_used=False,
                                 expected_impact=ExpectedImpact.MEDIUM,
                                 literature_references=[
                                     'https://peltarion.com/knowledge-center/documentation/modeling-view/run-a-model'
@@ -181,7 +181,7 @@
                                                                  '`decay_steps`. You can think of `decay_steps` as a '
                                                                  'rate '
                                                                  'of decay for the `decay_rate`.',
-                                      commonly_used=True,
+                                      commonly_used=False,
                                       expected_impact=ExpectedImpact.MEDIUM,
                                       literature_references=None,
                                       internal_only=False),
@@ -217,7 +217,7 @@
                                                                 "is usually a good indicator that there's not much "
                                                                 "more to "
                                                                 'learn.',
-                                     commonly_used=True,
+                                     commonly_used=False,
                                      expected_impact=ExpectedImpact.MEDIUM,
                                      literature_references=None,
                                      internal_only=False),
@@ -309,7 +309,7 @@
                                                                            'computed, so it will still be easy to spot '
                                                                            'signs of overfitting like when the '
                                                                            'training-validation loss curves diverge.',
-                                                commonly_used=True,
+                                                commonly_used=False,
                                                 expected_impact=ExpectedImpact.HIGH,
                                                 literature_references=None,
                                                 internal_only=False),
@@ -548,7 +548,7 @@
                                                                'choice. If you are in need of quick results without '
                                                                'extensive hypertuning, tend towards adaptive gradient '
                                                                'methods like adam or adamw.',
-                                    commonly_used=True,
+                                    commonly_used=False,
                                     expected_impact=ExpectedImpact.HIGH,
                                     literature_references=['https://www.youtube.com/watch?v=mdKjMPmcWjY'],
                                     internal_only=False),
@@ -658,7 +658,7 @@
                                                                            '” with values '
                                                                            'often on a logarithmic scale between 0 and '
                                                                            '0.1, such as 0.1, 0.001, 0.0001, etc.',
-                                                commonly_used=True,
+                                                commonly_used=False,
                                                 expected_impact=ExpectedImpact.HIGH,
                                                 literature_references=[
                                                     'https://developers.google.com/machine-learning/crash-course'
@@ -689,7 +689,7 @@
                                                                        'regularization is not.',
                                               suggested_values='L2',
                                               suggested_values_reasoning=None,
-                                              commonly_used=True,
+                                              commonly_used=False,
                                               expected_impact=ExpectedImpact.HIGH,
                                               literature_references=[
                                                   'https://neptune.ai/blog/fighting-overfitting-with-l1-or-l2'
@@ -802,7 +802,7 @@
                                                                           'a sub-epoch time scale, or every few '
                                                                           'thousand '
                                                                           'steps.',
-                                               commonly_used=True,
+                                               commonly_used=False,
                                                expected_impact=ExpectedImpact.HIGH,
                                                literature_references=None,
                                                internal_only=False),

From 38d922ec3192b6bed4f344b6e305ce296970f7b8 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Tue, 11 Oct 2022 21:32:30 -0700
Subject: [PATCH 05/29] Factor out defaults mixin change (#2628)

* Factor out defaults mixin change

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix test and pull in other relevant changes from CO PR

* Refactored feature cond changes

* Removed default validation metric from schema since it's not needed until config object refactor

* Flake 8

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/features/audio_feature.py              |  2 -
 ludwig/features/bag_feature.py                |  2 -
 ludwig/features/binary_feature.py             |  3 -
 ludwig/features/category_feature.py           |  3 -
 ludwig/features/date_feature.py               |  2 -
 ludwig/features/h3_feature.py                 |  2 -
 ludwig/features/image_feature.py              |  2 -
 ludwig/features/number_feature.py             |  3 -
 ludwig/features/sequence_feature.py           |  3 -
 ludwig/features/set_feature.py                |  3 -
 ludwig/features/text_feature.py               |  3 -
 ludwig/features/timeseries_feature.py         |  2 -
 ludwig/features/vector_feature.py             |  3 -
 ludwig/schema/features/audio_feature.py       | 24 +++---
 ludwig/schema/features/bag_feature.py         | 24 +++---
 ludwig/schema/features/binary_feature.py      | 79 ++++++++++--------
 ludwig/schema/features/category_feature.py    | 73 ++++++++++-------
 ludwig/schema/features/date_feature.py        | 24 +++---
 ludwig/schema/features/h3_feature.py          | 24 +++---
 ludwig/schema/features/image_feature.py       | 24 +++---
 ludwig/schema/features/number_feature.py      | 62 +++++++++-----
 ludwig/schema/features/sequence_feature.py    | 58 ++++++++-----
 ludwig/schema/features/set_feature.py         | 77 +++++++++++-------
 ludwig/schema/features/text_feature.py        | 61 ++++++++++----
 ludwig/schema/features/timeseries_feature.py  | 26 +++---
 ludwig/schema/features/utils.py               | 34 +++-----
 ludwig/schema/features/vector_feature.py      | 81 +++++++++++--------
 ludwig/schema/metadata/parameter_metadata.py  |  3 +
 ludwig/schema/utils.py                        | 19 +++--
 .../schema/test_validate_config_misc.py       |  8 +-
 tests/ludwig/utils/test_defaults.py           |  6 +-
 31 files changed, 428 insertions(+), 312 deletions(-)

diff --git a/ludwig/features/audio_feature.py b/ludwig/features/audio_feature.py
index d72a91b58de..f3c45af918e 100644
--- a/ludwig/features/audio_feature.py
+++ b/ludwig/features/audio_feature.py
@@ -36,7 +36,6 @@
 from ludwig.features.base_feature import BaseFeatureMixin
 from ludwig.features.sequence_feature import SequenceInputFeature
 from ludwig.schema.features.audio_feature import AudioInputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature
 from ludwig.utils.audio_utils import (
     calculate_mean,
     calculate_var,
@@ -435,7 +434,6 @@ def _get_max_length_feature(preprocessing_parameters, sampling_rate_in_hz, audio
             raise ValueError(f"{feature_type} is not recognized.")
 
 
-@register_input_feature(AUDIO)
 class AudioInputFeature(AudioFeatureMixin, SequenceInputFeature):
     def __init__(self, input_feature_config: Union[AudioInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
diff --git a/ludwig/features/bag_feature.py b/ludwig/features/bag_feature.py
index 86e118928bb..814d5ec79f5 100644
--- a/ludwig/features/bag_feature.py
+++ b/ludwig/features/bag_feature.py
@@ -25,7 +25,6 @@
 from ludwig.features.feature_utils import set_str_to_idx
 from ludwig.features.set_feature import _SetPreprocessing
 from ludwig.schema.features.bag_feature import BagInputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature
 from ludwig.utils.misc_utils import set_default_value, set_default_values
 from ludwig.utils.strings_utils import create_vocabulary
 
@@ -86,7 +85,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(BAG)
 class BagInputFeature(BagFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[BagInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
diff --git a/ludwig/features/binary_feature.py b/ludwig/features/binary_feature.py
index 03287a58d5e..1caaa7c82f5 100644
--- a/ludwig/features/binary_feature.py
+++ b/ludwig/features/binary_feature.py
@@ -43,7 +43,6 @@
 )
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
 from ludwig.schema.features.binary_feature import BinaryInputFeatureConfig, BinaryOutputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.utils import calibration, output_feature_utils, strings_utils
 from ludwig.utils.eval_utils import (
     average_precision_score,
@@ -214,7 +213,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(BINARY)
 class BinaryInputFeature(BinaryFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[BinaryInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -273,7 +271,6 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
         return _BinaryPreprocessing(metadata)
 
 
-@register_output_feature(BINARY)
 class BinaryOutputFeature(BinaryFeatureMixin, OutputFeature):
     metric_functions = {LOSS: None, ACCURACY: None, ROC_AUC: None}
     default_validation_metric = ROC_AUC
diff --git a/ludwig/features/category_feature.py b/ludwig/features/category_feature.py
index 1972e021351..66ac6e15383 100644
--- a/ludwig/features/category_feature.py
+++ b/ludwig/features/category_feature.py
@@ -44,7 +44,6 @@
 )
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
 from ludwig.schema.features.category_feature import CategoryInputFeatureConfig, CategoryOutputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.utils import calibration, output_feature_utils
 from ludwig.utils.eval_utils import ConfusionMatrix
 from ludwig.utils.math_utils import int_type, softmax
@@ -190,7 +189,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(CATEGORY)
 class CategoryInputFeature(CategoryFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[CategoryInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -251,7 +249,6 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
         return _CategoryPreprocessing(metadata)
 
 
-@register_output_feature(CATEGORY)
 class CategoryOutputFeature(CategoryFeatureMixin, OutputFeature):
     metric_functions = {LOSS: None, ACCURACY: None, HITS_AT_K: None}
     default_validation_metric = ACCURACY
diff --git a/ludwig/features/date_feature.py b/ludwig/features/date_feature.py
index f861d66023a..c39c770c81b 100644
--- a/ludwig/features/date_feature.py
+++ b/ludwig/features/date_feature.py
@@ -24,7 +24,6 @@
 from ludwig.constants import COLUMN, DATE, ENCODER, PROC_COLUMN, TIED, TYPE
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature
 from ludwig.schema.features.date_feature import DateInputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature
 from ludwig.utils.date_utils import create_vector_from_datetime_obj
 from ludwig.utils.misc_utils import set_default_value, set_default_values
 from ludwig.utils.types import DataFrame, TorchscriptPreprocessingInput
@@ -112,7 +111,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(DATE)
 class DateInputFeature(DateFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[DateInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
diff --git a/ludwig/features/h3_feature.py b/ludwig/features/h3_feature.py
index b6c6bfa9519..d03a4e3d600 100644
--- a/ludwig/features/h3_feature.py
+++ b/ludwig/features/h3_feature.py
@@ -21,7 +21,6 @@
 from ludwig.constants import COLUMN, ENCODER, H3, PROC_COLUMN, TIED, TYPE
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature
 from ludwig.schema.features.h3_feature import H3InputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature
 from ludwig.utils.h3_util import h3_to_components
 from ludwig.utils.misc_utils import set_default_value, set_default_values
 from ludwig.utils.types import TorchscriptPreprocessingInput
@@ -109,7 +108,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(H3)
 class H3InputFeature(H3FeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[H3InputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py
index 917c1a80b89..f574bfc4de0 100644
--- a/ludwig/features/image_feature.py
+++ b/ludwig/features/image_feature.py
@@ -47,7 +47,6 @@
 from ludwig.data.cache.types import wrap
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature
 from ludwig.schema.features.image_feature import ImageInputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature
 from ludwig.utils.data_utils import get_abs_path
 from ludwig.utils.dataframe_utils import is_dask_series_or_df
 from ludwig.utils.fs_utils import has_remote_protocol, upload_h5
@@ -479,7 +478,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(IMAGE)
 class ImageInputFeature(ImageFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[ImageInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
diff --git a/ludwig/features/number_feature.py b/ludwig/features/number_feature.py
index be82a80fcfd..d37b4ac4652 100644
--- a/ludwig/features/number_feature.py
+++ b/ludwig/features/number_feature.py
@@ -48,7 +48,6 @@
 )
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
 from ludwig.schema.features.number_feature import NumberInputFeatureConfig, NumberOutputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.utils import output_feature_utils
 from ludwig.utils.misc_utils import get_from_registry, set_default_value, set_default_values
 from ludwig.utils.types import TorchscriptPreprocessingInput
@@ -293,7 +292,6 @@ def normalize(series: pd.Series) -> pd.Series:
         return proc_df
 
 
-@register_input_feature(NUMBER)
 class NumberInputFeature(NumberFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[NumberInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -351,7 +349,6 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
         return _NumberPreprocessing(metadata)
 
 
-@register_output_feature(NUMBER)
 class NumberOutputFeature(NumberFeatureMixin, OutputFeature):
     metric_functions = {
         LOSS: None,
diff --git a/ludwig/features/sequence_feature.py b/ludwig/features/sequence_feature.py
index b1e7d9af72c..deb2e46db3f 100644
--- a/ludwig/features/sequence_feature.py
+++ b/ludwig/features/sequence_feature.py
@@ -48,7 +48,6 @@
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
 from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities
 from ludwig.schema.features.sequence_feature import SequenceInputFeatureConfig, SequenceOutputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.utils import output_feature_utils
 from ludwig.utils.math_utils import softmax
 from ludwig.utils.misc_utils import set_default_value, set_default_values
@@ -259,7 +258,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(SEQUENCE)
 class SequenceInputFeature(SequenceFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[SequenceInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -314,7 +312,6 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
         return _SequencePreprocessing(metadata)
 
 
-@register_output_feature(SEQUENCE)
 class SequenceOutputFeature(SequenceFeatureMixin, OutputFeature):
     metric_functions = {
         LOSS: None,
diff --git a/ludwig/features/set_feature.py b/ludwig/features/set_feature.py
index 0936328fd5b..2808a676d5c 100644
--- a/ludwig/features/set_feature.py
+++ b/ludwig/features/set_feature.py
@@ -42,7 +42,6 @@
 from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
 from ludwig.features.feature_utils import set_str_to_idx
 from ludwig.schema.features.set_feature import SetInputFeatureConfig, SetOutputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.utils import output_feature_utils
 from ludwig.utils.misc_utils import set_default_value, set_default_values
 from ludwig.utils.strings_utils import create_vocabulary, UNKNOWN_SYMBOL
@@ -210,7 +209,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(SET)
 class SetInputFeature(SetFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[SetInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -260,7 +258,6 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
         return _SetPreprocessing(metadata)
 
 
-@register_output_feature(SET)
 class SetOutputFeature(SetFeatureMixin, OutputFeature):
     metric_functions = {LOSS: None, JACCARD: None}
     default_validation_metric = JACCARD
diff --git a/ludwig/features/text_feature.py b/ludwig/features/text_feature.py
index e6210319af9..4996e193158 100644
--- a/ludwig/features/text_feature.py
+++ b/ludwig/features/text_feature.py
@@ -49,7 +49,6 @@
     SequenceOutputFeature,
 )
 from ludwig.schema.features.text_feature import TextInputFeatureConfig, TextOutputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.utils.math_utils import softmax
 from ludwig.utils.misc_utils import set_default_value, set_default_values
 from ludwig.utils.strings_utils import build_sequence_matrix, create_vocabulary, SpecialSymbol, UNKNOWN_SYMBOL
@@ -181,7 +180,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(TEXT)
 class TextInputFeature(TextFeatureMixin, SequenceInputFeature):
     def __init__(self, input_feature_config: Union[TextInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -246,7 +244,6 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:
         return _SequencePreprocessing(metadata)
 
 
-@register_output_feature(TEXT)
 class TextOutputFeature(TextFeatureMixin, SequenceOutputFeature):
     metric_functions = {LOSS: None, TOKEN_ACCURACY: None, LAST_ACCURACY: None, PERPLEXITY: None, EDIT_DISTANCE: None}
     default_validation_metric = LOSS
diff --git a/ludwig/features/timeseries_feature.py b/ludwig/features/timeseries_feature.py
index 703373c2975..8eda71178b5 100644
--- a/ludwig/features/timeseries_feature.py
+++ b/ludwig/features/timeseries_feature.py
@@ -23,7 +23,6 @@
 from ludwig.features.base_feature import BaseFeatureMixin
 from ludwig.features.sequence_feature import SequenceInputFeature
 from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig
-from ludwig.schema.features.utils import register_input_feature
 from ludwig.utils.misc_utils import set_default_value, set_default_values
 from ludwig.utils.tokenizers import get_tokenizer_from_registry, TORCHSCRIPT_COMPATIBLE_TOKENIZERS
 from ludwig.utils.types import TorchscriptPreprocessingInput
@@ -173,7 +172,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(TIMESERIES)
 class TimeseriesInputFeature(TimeseriesFeatureMixin, SequenceInputFeature):
     def __init__(self, input_feature_config: Union[TimeseriesInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
diff --git a/ludwig/features/vector_feature.py b/ludwig/features/vector_feature.py
index 9ec8bdd229d..a0a9da90871 100644
--- a/ludwig/features/vector_feature.py
+++ b/ludwig/features/vector_feature.py
@@ -42,7 +42,6 @@
     VECTOR,
 )
 from ludwig.features.base_feature import InputFeature, OutputFeature, PredictModule
-from ludwig.schema.features.utils import register_input_feature, register_output_feature
 from ludwig.schema.features.vector_feature import VectorInputFeatureConfig, VectorOutputFeatureConfig
 from ludwig.utils import output_feature_utils
 from ludwig.utils.misc_utils import set_default_value, set_default_values
@@ -146,7 +145,6 @@ def add_feature_data(
         return proc_df
 
 
-@register_input_feature(VECTOR)
 class VectorInputFeature(VectorFeatureMixin, InputFeature):
     def __init__(self, input_feature_config: Union[VectorInputFeatureConfig, Dict], encoder_obj=None, **kwargs):
         input_feature_config = self.load_config(input_feature_config)
@@ -195,7 +193,6 @@ def get_schema_cls():
         return VectorInputFeatureConfig
 
 
-@register_output_feature(VECTOR)
 class VectorOutputFeature(VectorFeatureMixin, OutputFeature):
     metric_functions = {LOSS: None, ERROR: None, MEAN_SQUARED_ERROR: None, MEAN_ABSOLUTE_ERROR: None, R2: None}
     default_validation_metric = MEAN_SQUARED_ERROR
diff --git a/ludwig/schema/features/audio_feature.py b/ludwig/schema/features/audio_feature.py
index 9587a380fd0..797b21e07ff 100644
--- a/ludwig/schema/features/audio_feature.py
+++ b/ludwig/schema/features/audio_feature.py
@@ -1,19 +1,20 @@
 from marshmallow_dataclass import dataclass
 
 from ludwig.constants import AUDIO
-from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.base import BaseEncoderConfig
 from ludwig.schema.encoders.utils import EncoderDataclassField
 from ludwig.schema.features.base import BaseInputFeatureConfig
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry
+from ludwig.schema.features.utils import input_config_registry, input_mixin_registry
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(AUDIO)
+@input_mixin_registry.register(AUDIO)
 @dataclass
-class AudioInputFeatureConfig(BaseInputFeatureConfig):
-    """AudioFeatureInputFeature is a dataclass that configures the parameters used for an audio input feature."""
+class AudioInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """AudioInputFeatureConfigMixin is a dataclass that configures the parameters used in both the audio input
+    feature and the audio global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=AUDIO)
 
@@ -22,9 +23,10 @@ class AudioInputFeatureConfig(BaseInputFeatureConfig):
         default="parallel_cnn",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
+
+@input_config_registry.register(AUDIO)
+@dataclass(repr=False)
+class AudioInputFeatureConfig(BaseInputFeatureConfig, AudioInputFeatureConfigMixin):
+    """AudioInputFeatureConfig is a dataclass that configures the parameters used for an audio input feature."""
+
+    pass
diff --git a/ludwig/schema/features/bag_feature.py b/ludwig/schema/features/bag_feature.py
index 0e3f4bb9c48..7dedebbf658 100644
--- a/ludwig/schema/features/bag_feature.py
+++ b/ludwig/schema/features/bag_feature.py
@@ -1,19 +1,20 @@
 from marshmallow_dataclass import dataclass
 
 from ludwig.constants import BAG
-from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.base import BaseEncoderConfig
 from ludwig.schema.encoders.utils import EncoderDataclassField
 from ludwig.schema.features.base import BaseInputFeatureConfig
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry
+from ludwig.schema.features.utils import input_config_registry, input_mixin_registry
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(BAG)
+@input_mixin_registry.register(BAG)
 @dataclass
-class BagInputFeatureConfig(BaseInputFeatureConfig):
-    """BagInputFeatureConfig is a dataclass that configures the parameters used for a bag input feature."""
+class BagInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """BagInputFeatureConfigMixin is a dataclass that configures the parameters used in both the bag input feature
+    and the bag global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=BAG)
 
@@ -22,9 +23,10 @@ class BagInputFeatureConfig(BaseInputFeatureConfig):
         default="embed",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
+
+@input_config_registry.register(BAG)
+@dataclass(repr=False)
+class BagInputFeatureConfig(BaseInputFeatureConfig, BagInputFeatureConfigMixin):
+    """BagInputFeatureConfig is a dataclass that configures the parameters used for a bag input feature."""
+
+    pass
diff --git a/ludwig/schema/features/binary_feature.py b/ludwig/schema/features/binary_feature.py
index 0252c25bba1..99785fa5b9c 100644
--- a/ludwig/schema/features/binary_feature.py
+++ b/ludwig/schema/features/binary_feature.py
@@ -11,13 +11,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(BINARY)
+@input_mixin_registry.register(BINARY)
 @dataclass
-class BinaryInputFeatureConfig(BaseInputFeatureConfig):
-    """BinaryInputFeatureConfig is a dataclass that configures the parameters used for a binary input feature."""
+class BinaryInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """BinaryInputFeatureConfigMixin is a dataclass that configures the parameters used in both the binary input
+    feature and the binary global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=BINARY)
 
@@ -26,43 +33,40 @@ class BinaryInputFeatureConfig(BaseInputFeatureConfig):
         default="passthrough",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
 
+@input_config_registry.register(BINARY)
+@dataclass(repr=False)
+class BinaryInputFeatureConfig(BaseInputFeatureConfig, BinaryInputFeatureConfigMixin):
+    """BinaryInputFeatureConfig is a dataclass that configures the parameters used for a binary input feature."""
 
-@output_config_registry.register(BINARY)
-@dataclass
-class BinaryOutputFeatureConfig(BaseOutputFeatureConfig):
-    """BinaryOutputFeatureConfig is a dataclass that configures the parameters used for a binary output feature."""
+    pass
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="binary_output")
 
-    loss: BaseLossConfig = LossDataclassField(
-        feature_type=BINARY,
-        default=BINARY_WEIGHTED_CROSS_ENTROPY,
-    )
+@output_mixin_registry.register(BINARY)
+@dataclass
+class BinaryOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """BinaryOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the binary output
+    feature and the binary global defaults section of the Ludwig Config."""
 
     decoder: BaseDecoderConfig = DecoderDataclassField(
         feature_type=BINARY,
         default="regressor",
     )
 
-    threshold: float = schema_utils.FloatRange(
-        default=0.5,
-        min=0,
-        max=1,
-        description="The threshold used to convert output probabilities to predictions. Predicted probabilities greater"
-        "than or equal to threshold are mapped to True.",
+    loss: BaseLossConfig = LossDataclassField(
+        feature_type=BINARY,
+        default=BINARY_WEIGHTED_CROSS_ENTROPY,
     )
 
-    reduce_input: str = schema_utils.ReductionOptions(
-        default="sum",
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
+
+@output_config_registry.register(BINARY)
+@dataclass(repr=False)
+class BinaryOutputFeatureConfig(BaseOutputFeatureConfig, BinaryOutputFeatureConfigMixin):
+    """BinaryOutputFeatureConfig is a dataclass that configures the parameters used for a binary output feature."""
+
+    calibration: bool = schema_utils.Boolean(
+        default=False,
+        description="Calibrate the model's output probabilities using temperature scaling.",
     )
 
     dependencies: list = schema_utils.List(
@@ -70,12 +74,23 @@ class BinaryOutputFeatureConfig(BaseOutputFeatureConfig):
         description="List of input features that this feature depends on.",
     )
 
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="binary_output")
+
     reduce_dependencies: str = schema_utils.ReductionOptions(
         default="sum",
         description="How to reduce the dependencies of the output feature.",
     )
 
-    calibration: bool = schema_utils.Boolean(
-        default=False,
-        description="Calibrate the model's output probabilities using temperature scaling.",
+    reduce_input: str = schema_utils.ReductionOptions(
+        default="sum",
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
+
+    threshold: float = schema_utils.FloatRange(
+        default=0.5,
+        min=0,
+        max=1,
+        description="The threshold used to convert output probabilities to predictions. Predicted probabilities greater"
+        "than or equal to threshold are mapped to True.",
     )
diff --git a/ludwig/schema/features/category_feature.py b/ludwig/schema/features/category_feature.py
index c9ce624e1a5..0cacb01edc3 100644
--- a/ludwig/schema/features/category_feature.py
+++ b/ludwig/schema/features/category_feature.py
@@ -11,14 +11,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(CATEGORY)
+@input_mixin_registry.register(CATEGORY)
 @dataclass
-class CategoryInputFeatureConfig(BaseInputFeatureConfig):
-    """CategoryInputFeatureConfig is a dataclass that configures the parameters used for a category input
-    feature."""
+class CategoryInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """CategoryInputFeatureConfigMixin is a dataclass that configures the parameters used in both the category
+    input feature and the category global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=CATEGORY)
 
@@ -27,36 +33,42 @@ class CategoryInputFeatureConfig(BaseInputFeatureConfig):
         default="dense",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
 
+@input_config_registry.register(CATEGORY)
+@dataclass(repr=False)
+class CategoryInputFeatureConfig(BaseInputFeatureConfig, CategoryInputFeatureConfigMixin):
+    """CategoryInputFeatureConfig is a dataclass that configures the parameters used for a category input
+    feature."""
+
+    pass
 
-@output_config_registry.register(CATEGORY)
+
+@output_mixin_registry.register(CATEGORY)
 @dataclass
-class CategoryOutputFeatureConfig(BaseOutputFeatureConfig):
-    """CategoryOutputFeatureConfig is a dataclass that configures the parameters used for a category output
-    feature."""
+class CategoryOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """CategoryOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the category
+    output feature and the category global defaults section of the Ludwig Config."""
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="category_output")
+    decoder: BaseDecoderConfig = DecoderDataclassField(
+        feature_type=CATEGORY,
+        default="classifier",
+    )
 
     loss: BaseLossConfig = LossDataclassField(
         feature_type=CATEGORY,
         default=SOFTMAX_CROSS_ENTROPY,
     )
 
-    decoder: BaseDecoderConfig = DecoderDataclassField(
-        feature_type=CATEGORY,
-        default="classifier",
-    )
 
-    reduce_input: str = schema_utils.ReductionOptions(
-        default="sum",
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
+@output_config_registry.register(CATEGORY)
+@dataclass(repr=False)
+class CategoryOutputFeatureConfig(BaseOutputFeatureConfig, CategoryOutputFeatureConfigMixin):
+    """CategoryOutputFeatureConfig is a dataclass that configures the parameters used for a category output
+    feature."""
+
+    calibration: bool = schema_utils.Boolean(
+        default=False,
+        description="Calibrate the model's output probabilities using temperature scaling.",
     )
 
     dependencies: list = schema_utils.List(
@@ -64,19 +76,22 @@ class CategoryOutputFeatureConfig(BaseOutputFeatureConfig):
         description="List of input features that this feature depends on.",
     )
 
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="category_output")
+
     reduce_dependencies: str = schema_utils.ReductionOptions(
         default="sum",
         description="How to reduce the dependencies of the output feature.",
     )
 
+    reduce_input: str = schema_utils.ReductionOptions(
+        default="sum",
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
+
     top_k: int = schema_utils.NonNegativeInteger(
         default=3,
         description="Determines the parameter k, the number of categories to consider when computing the top_k "
         "measure. It computes accuracy but considering as a match if the true category appears in the "
         "first k predicted categories ranked by decoder's confidence.",
     )
-
-    calibration: bool = schema_utils.Boolean(
-        default=False,
-        description="Calibrate the model's output probabilities using temperature scaling.",
-    )
diff --git a/ludwig/schema/features/date_feature.py b/ludwig/schema/features/date_feature.py
index 153e63788dd..ab6d83cd354 100644
--- a/ludwig/schema/features/date_feature.py
+++ b/ludwig/schema/features/date_feature.py
@@ -1,19 +1,20 @@
 from marshmallow_dataclass import dataclass
 
 from ludwig.constants import DATE
-from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.base import BaseEncoderConfig
 from ludwig.schema.encoders.utils import EncoderDataclassField
 from ludwig.schema.features.base import BaseInputFeatureConfig
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry
+from ludwig.schema.features.utils import input_config_registry, input_mixin_registry
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(DATE)
+@input_mixin_registry.register(DATE)
 @dataclass
-class DateInputFeatureConfig(BaseInputFeatureConfig):
-    """DateInputFeature is a dataclass that configures the parameters used for a date input feature."""
+class DateInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """DateInputFeatureConfigMixin is a dataclass that configures the parameters used in both the date input
+    feature and the date global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=DATE)
 
@@ -22,9 +23,10 @@ class DateInputFeatureConfig(BaseInputFeatureConfig):
         default="embed",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
+
+@input_config_registry.register(DATE)
+@dataclass(repr=False)
+class DateInputFeatureConfig(BaseInputFeatureConfig, DateInputFeatureConfigMixin):
+    """DateInputFeature is a dataclass that configures the parameters used for a date input feature."""
+
+    pass
diff --git a/ludwig/schema/features/h3_feature.py b/ludwig/schema/features/h3_feature.py
index 2335e61bbb3..b20ecece071 100644
--- a/ludwig/schema/features/h3_feature.py
+++ b/ludwig/schema/features/h3_feature.py
@@ -1,19 +1,20 @@
 from marshmallow_dataclass import dataclass
 
 from ludwig.constants import H3
-from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.base import BaseEncoderConfig
 from ludwig.schema.encoders.utils import EncoderDataclassField
 from ludwig.schema.features.base import BaseInputFeatureConfig
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry
+from ludwig.schema.features.utils import input_config_registry, input_mixin_registry
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(H3)
+@input_mixin_registry.register(H3)
 @dataclass
-class H3InputFeatureConfig(BaseInputFeatureConfig):
-    """H3InputFeatureConfig is a dataclass that configures the parameters used for an h3 input feature."""
+class H3InputFeatureConfigMixin(BaseMarshmallowConfig):
+    """H3InputFeatureConfigMixin is a dataclass that configures the parameters used in both the h3 input feature
+    and the h3 global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=H3)
 
@@ -22,9 +23,10 @@ class H3InputFeatureConfig(BaseInputFeatureConfig):
         default="embed",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
+
+@input_config_registry.register(H3)
+@dataclass(repr=False)
+class H3InputFeatureConfig(BaseInputFeatureConfig, H3InputFeatureConfigMixin):
+    """H3InputFeatureConfig is a dataclass that configures the parameters used for an h3 input feature."""
+
+    pass
diff --git a/ludwig/schema/features/image_feature.py b/ludwig/schema/features/image_feature.py
index 2e8bd9ec971..ba5d0688f40 100644
--- a/ludwig/schema/features/image_feature.py
+++ b/ludwig/schema/features/image_feature.py
@@ -1,19 +1,20 @@
 from marshmallow_dataclass import dataclass
 
 from ludwig.constants import IMAGE
-from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.base import BaseEncoderConfig
 from ludwig.schema.encoders.utils import EncoderDataclassField
 from ludwig.schema.features.base import BaseInputFeatureConfig
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry
+from ludwig.schema.features.utils import input_config_registry, input_mixin_registry
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(IMAGE)
+@input_mixin_registry.register(IMAGE)
 @dataclass
-class ImageInputFeatureConfig(BaseInputFeatureConfig):
-    """ImageInputFeatureConfig is a dataclass that configures the parameters used for an image input feature."""
+class ImageInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """ImageInputFeatureConfigMixin is a dataclass that configures the parameters used in both the image input
+    feature and the image global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=IMAGE)
 
@@ -22,9 +23,10 @@ class ImageInputFeatureConfig(BaseInputFeatureConfig):
         default="stacked_cnn",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
+
+@input_config_registry.register(IMAGE)
+@dataclass(repr=False)
+class ImageInputFeatureConfig(BaseInputFeatureConfig, ImageInputFeatureConfigMixin):
+    """ImageInputFeatureConfig is a dataclass that configures the parameters used for an image input feature."""
+
+    pass
diff --git a/ludwig/schema/features/number_feature.py b/ludwig/schema/features/number_feature.py
index 37417f0d673..2e2e6aa25e0 100644
--- a/ludwig/schema/features/number_feature.py
+++ b/ludwig/schema/features/number_feature.py
@@ -13,13 +13,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(NUMBER)
+@input_mixin_registry.register(NUMBER)
 @dataclass
-class NumberInputFeatureConfig(BaseInputFeatureConfig):
-    """NumberInputFeatureConfig is a dataclass that configures the parameters used for a number input feature."""
+class NumberInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """NumberInputFeatureConfigMixin is a dataclass that configures the parameters used in both the number input
+    feature and the number global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=NUMBER)
 
@@ -29,19 +36,37 @@ class NumberInputFeatureConfig(BaseInputFeatureConfig):
     )
 
 
-@output_config_registry.register(NUMBER)
+@input_config_registry.register(NUMBER)
+@dataclass(repr=False)
+class NumberInputFeatureConfig(BaseInputFeatureConfig, NumberInputFeatureConfigMixin):
+    """NumberInputFeatureConfig is a dataclass that configures the parameters used for a number input feature."""
+
+    pass
+
+
+@output_mixin_registry.register(NUMBER)
 @dataclass
-class NumberOutputFeatureConfig(BaseOutputFeatureConfig):
-    """NumberOutputFeatureConfig is a dataclass that configures the parameters used for a category output
-    feature."""
+class NumberOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """NumberOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the number output
+    feature and the number global defaults section of the Ludwig Config."""
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="number_output")
+    decoder: BaseDecoderConfig = DecoderDataclassField(
+        feature_type=NUMBER,
+        default="regressor",
+    )
 
     loss: BaseLossConfig = LossDataclassField(
         feature_type=NUMBER,
         default=MEAN_SQUARED_ERROR,
     )
 
+
+@output_config_registry.register(NUMBER)
+@dataclass(repr=False)
+class NumberOutputFeatureConfig(BaseOutputFeatureConfig, NumberOutputFeatureConfigMixin):
+    """NumberOutputFeatureConfig is a dataclass that configures the parameters used for a category output
+    feature."""
+
     clip: Union[List[int], Tuple[int]] = schema_utils.FloatRangeTupleDataclassField(
         n=2,
         default=None,
@@ -51,17 +76,6 @@ class NumberOutputFeatureConfig(BaseOutputFeatureConfig):
         description="Clip the predicted output to the specified range.",
     )
 
-    decoder: BaseDecoderConfig = DecoderDataclassField(
-        feature_type=NUMBER,
-        default="regressor",
-    )
-
-    reduce_input: str = schema_utils.ReductionOptions(
-        default="sum",
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
-    )
-
     dependencies: list = schema_utils.List(
         default=[],
         description="List of input features that this feature depends on.",
@@ -71,3 +85,11 @@ class NumberOutputFeatureConfig(BaseOutputFeatureConfig):
         default="sum",
         description="How to reduce the dependencies of the output feature.",
     )
+
+    reduce_input: str = schema_utils.ReductionOptions(
+        default="sum",
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
+
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="number_output")
diff --git a/ludwig/schema/features/sequence_feature.py b/ludwig/schema/features/sequence_feature.py
index 5e07552de4f..66edd61ba04 100644
--- a/ludwig/schema/features/sequence_feature.py
+++ b/ludwig/schema/features/sequence_feature.py
@@ -11,14 +11,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(SEQUENCE)
+@input_mixin_registry.register(SEQUENCE)
 @dataclass
-class SequenceInputFeatureConfig(BaseInputFeatureConfig):
-    """SequenceInputFeatureConfig is a dataclass that configures the parameters used for a sequence input
-    feature."""
+class SequenceInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """SequenceInputFeatureConfigMixin is a dataclass that configures the parameters used in both the sequence
+    input feature and the sequence global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=SEQUENCE)
 
@@ -28,36 +34,52 @@ class SequenceInputFeatureConfig(BaseInputFeatureConfig):
     )
 
 
-@output_config_registry.register(SEQUENCE)
-@dataclass
-class SequenceOutputFeatureConfig(BaseOutputFeatureConfig):
-    """SequenceOutputFeatureConfig is a dataclass that configures the parameters used for a sequence output
+@input_config_registry.register(SEQUENCE)
+@dataclass(repr=False)
+class SequenceInputFeatureConfig(BaseInputFeatureConfig, SequenceInputFeatureConfigMixin):
+    """SequenceInputFeatureConfig is a dataclass that configures the parameters used for a sequence input
     feature."""
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="sequence_output")
+    pass
 
-    loss: BaseLossConfig = LossDataclassField(
-        feature_type=SEQUENCE,
-        default=SEQUENCE_SOFTMAX_CROSS_ENTROPY,
-    )
+
+@output_mixin_registry.register(SEQUENCE)
+@dataclass
+class SequenceOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """SequenceOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the sequence
+    output feature and the sequence global defaults section of the Ludwig Config."""
 
     decoder: BaseDecoderConfig = DecoderDataclassField(
         feature_type=SEQUENCE,
         default="generator",
     )
 
-    reduce_input: str = schema_utils.ReductionOptions(
-        default="sum",
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
+    loss: BaseLossConfig = LossDataclassField(
+        feature_type=SEQUENCE,
+        default=SEQUENCE_SOFTMAX_CROSS_ENTROPY,
     )
 
+
+@output_config_registry.register(SEQUENCE)
+@dataclass(repr=False)
+class SequenceOutputFeatureConfig(BaseOutputFeatureConfig, SequenceOutputFeatureConfigMixin):
+    """SequenceOutputFeatureConfig is a dataclass that configures the parameters used for a sequence output
+    feature."""
+
     dependencies: list = schema_utils.List(
         default=[],
         description="List of input features that this feature depends on.",
     )
 
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="sequence_output")
+
     reduce_dependencies: str = schema_utils.ReductionOptions(
         default="sum",
         description="How to reduce the dependencies of the output feature.",
     )
+
+    reduce_input: str = schema_utils.ReductionOptions(
+        default="sum",
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
diff --git a/ludwig/schema/features/set_feature.py b/ludwig/schema/features/set_feature.py
index 0e644cbfbfd..7abd9eb8db6 100644
--- a/ludwig/schema/features/set_feature.py
+++ b/ludwig/schema/features/set_feature.py
@@ -11,13 +11,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(SET)
+@input_mixin_registry.register(SET)
 @dataclass
-class SetInputFeatureConfig(BaseInputFeatureConfig):
-    """SetInputFeatureConfig is a dataclass that configures the parameters used for a set input feature."""
+class SetInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """SetInputFeatureConfigMixin is a dataclass that configures the parameters used in both the set input feature
+    and the set global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=SET)
 
@@ -26,51 +33,59 @@ class SetInputFeatureConfig(BaseInputFeatureConfig):
         default="embed",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
 
+@input_config_registry.register(SET)
+@dataclass(repr=False)
+class SetInputFeatureConfig(BaseInputFeatureConfig, SetInputFeatureConfigMixin):
+    """SetInputFeatureConfig is a dataclass that configures the parameters used for a set input feature."""
 
-@output_config_registry.register(SET)
-@dataclass
-class SetOutputFeatureConfig(BaseOutputFeatureConfig):
-    """SetOutputFeatureConfig is a dataclass that configures the parameters used for a set output feature."""
+    pass
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="set_output")
 
-    loss: BaseLossConfig = LossDataclassField(
-        feature_type=SET,
-        default=SIGMOID_CROSS_ENTROPY,
-    )
-
-    threshold: float = schema_utils.FloatRange(
-        default=0.5,
-        min=0,
-        max=1,
-        description="The threshold used to convert output probabilities to predictions. Tokens with predicted"
-        "probabilities greater than or equal to threshold are predicted to be in the output set (True).",
-    )
+@output_mixin_registry.register(SET)
+@dataclass
+class SetOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """SetOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the set output
+    feature and the set global defaults section of the Ludwig Config."""
 
     decoder: BaseDecoderConfig = DecoderDataclassField(
         feature_type=SET,
         default="classifier",
     )
 
-    reduce_input: str = schema_utils.ReductionOptions(
-        default="sum",
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
+    loss: BaseLossConfig = LossDataclassField(
+        feature_type=SET,
+        default=SIGMOID_CROSS_ENTROPY,
     )
 
+
+@output_config_registry.register(SET)
+@dataclass(repr=False)
+class SetOutputFeatureConfig(BaseOutputFeatureConfig, SetOutputFeatureConfigMixin):
+    """SetOutputFeatureConfig is a dataclass that configures the parameters used for a set output feature."""
+
     dependencies: list = schema_utils.List(
         default=[],
         description="List of input features that this feature depends on.",
     )
 
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="set_output")
+
     reduce_dependencies: str = schema_utils.ReductionOptions(
         default="sum",
         description="How to reduce the dependencies of the output feature.",
     )
+
+    reduce_input: str = schema_utils.ReductionOptions(
+        default="sum",
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
+
+    threshold: float = schema_utils.FloatRange(
+        default=0.5,
+        min=0,
+        max=1,
+        description="The threshold used to convert output probabilities to predictions. Tokens with predicted"
+        "probabilities greater than or equal to threshold are predicted to be in the output set (True).",
+    )
diff --git a/ludwig/schema/features/text_feature.py b/ludwig/schema/features/text_feature.py
index 60dacf7a93d..51b7bcae604 100644
--- a/ludwig/schema/features/text_feature.py
+++ b/ludwig/schema/features/text_feature.py
@@ -11,13 +11,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(TEXT)
+@input_mixin_registry.register(TEXT)
 @dataclass
-class TextInputFeatureConfig(BaseInputFeatureConfig):
-    """TextInputFeatureConfig is a dataclass that configures the parameters used for a text input feature."""
+class TextInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """TextInputFeatureConfigMixin is a dataclass that configures the parameters used in both the text input
+    feature and the text global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=TEXT)
 
@@ -27,27 +34,41 @@ class TextInputFeatureConfig(BaseInputFeatureConfig):
     )
 
 
-@output_config_registry.register(TEXT)
+@input_config_registry.register(TEXT)
+@dataclass(repr=False)
+class TextInputFeatureConfig(BaseInputFeatureConfig, TextInputFeatureConfigMixin):
+    """TextInputFeatureConfig is a dataclass that configures the parameters used for a text input feature."""
+
+    pass
+
+
+@output_mixin_registry.register(TEXT)
 @dataclass
-class TextOutputFeatureConfig(BaseOutputFeatureConfig):
-    """TextOutputFeatureConfig is a dataclass that configures the parameters used for a text output feature."""
+class TextOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """TextOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the text output
+    feature and the text global defaults section of the Ludwig Config."""
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="text_output")
+    decoder: BaseDecoderConfig = DecoderDataclassField(
+        feature_type=TEXT,
+        default="generator",
+    )
 
     loss: BaseLossConfig = LossDataclassField(
         feature_type=TEXT,
         default=SEQUENCE_SOFTMAX_CROSS_ENTROPY,
     )
 
-    decoder: BaseDecoderConfig = DecoderDataclassField(
-        feature_type=TEXT,
-        default="generator",
-    )
 
-    reduce_input: str = schema_utils.ReductionOptions(
-        default="sum",
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
+@output_config_registry.register(TEXT)
+@dataclass(repr=False)
+class TextOutputFeatureConfig(BaseOutputFeatureConfig, TextOutputFeatureConfigMixin):
+    """TextOutputFeatureConfig is a dataclass that configures the parameters used for a text output feature."""
+
+    class_similarities: list = schema_utils.List(
+        list,
+        default=None,
+        description="If not null this parameter is a c x c matrix in the form of a list of lists that contains the "
+        "mutual similarity of classes. It is used if `class_similarities_temperature` is greater than 0. ",
     )
 
     dependencies: list = schema_utils.List(
@@ -55,7 +76,15 @@ class TextOutputFeatureConfig(BaseOutputFeatureConfig):
         description="List of input features that this feature depends on.",
     )
 
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="text_output")
+
     reduce_dependencies: str = schema_utils.ReductionOptions(
         default="sum",
         description="How to reduce the dependencies of the output feature.",
     )
+
+    reduce_input: str = schema_utils.ReductionOptions(
+        default="sum",
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
diff --git a/ludwig/schema/features/timeseries_feature.py b/ludwig/schema/features/timeseries_feature.py
index b5f8b8187e0..1ffc34a6680 100644
--- a/ludwig/schema/features/timeseries_feature.py
+++ b/ludwig/schema/features/timeseries_feature.py
@@ -1,20 +1,20 @@
 from marshmallow_dataclass import dataclass
 
 from ludwig.constants import TIMESERIES
-from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.base import BaseEncoderConfig
 from ludwig.schema.encoders.utils import EncoderDataclassField
 from ludwig.schema.features.base import BaseInputFeatureConfig
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry
+from ludwig.schema.features.utils import input_config_registry, input_mixin_registry
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(TIMESERIES)
+@input_mixin_registry.register(TIMESERIES)
 @dataclass
-class TimeseriesInputFeatureConfig(BaseInputFeatureConfig):
-    """TimeseriesInputFeatureConfig is a dataclass that configures the parameters used for a timeseries input
-    feature."""
+class TimeseriesInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """TimeseriesInputFeatureConfigMixin is a dataclass that configures the parameters used in both the timeseries
+    input feature and the timeseries global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=TIMESERIES)
 
@@ -23,9 +23,11 @@ class TimeseriesInputFeatureConfig(BaseInputFeatureConfig):
         default="parallel_cnn",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
+
+@input_config_registry.register(TIMESERIES)
+@dataclass(repr=False)
+class TimeseriesInputFeatureConfig(BaseInputFeatureConfig, TimeseriesInputFeatureConfigMixin):
+    """TimeseriesInputFeatureConfig is a dataclass that configures the parameters used for a timeseries input
+    feature."""
+
+    pass
diff --git a/ludwig/schema/features/utils.py b/ludwig/schema/features/utils.py
index f60a4776f49..09bd4a78830 100644
--- a/ludwig/schema/features/utils.py
+++ b/ludwig/schema/features/utils.py
@@ -1,26 +1,18 @@
 from ludwig.schema import utils as schema_utils
 from ludwig.utils.registry import Registry
 
-input_type_registry = Registry()
 input_config_registry = Registry()
-output_type_registry = Registry()
+input_mixin_registry = Registry()
 output_config_registry = Registry()
+output_mixin_registry = Registry()
 
 
-def register_input_feature(name: str):
-    def wrap(cls):
-        input_type_registry[name] = cls
-        return cls
+def get_input_feature_cls(name: str):
+    return input_config_registry[name]
 
-    return wrap
 
-
-def register_output_feature(name: str):
-    def wrap(cls):
-        output_type_registry[name] = cls
-        return cls
-
-    return wrap
+def get_output_feature_cls(name: str):
+    return output_config_registry[name]
 
 
 def get_input_feature_jsonschema():
@@ -29,7 +21,7 @@ def get_input_feature_jsonschema():
 
     Returns: JSON Schema
     """
-    input_feature_types = sorted(list(input_type_registry.keys()))
+    input_feature_types = sorted(list(input_config_registry.keys()))
     return {
         "type": "array",
         "items": {
@@ -57,11 +49,10 @@ def get_input_feature_conds():
 
     Returns: List of JSON clauses
     """
-    input_feature_types = sorted(list(input_type_registry.keys()))
+    input_feature_types = sorted(list(input_config_registry.keys()))
     conds = []
     for feature_type in input_feature_types:
-        feature_cls = input_type_registry[feature_type]
-        schema_cls = feature_cls.get_schema_cls()
+        schema_cls = get_input_feature_cls(feature_type)
         feature_schema = schema_utils.unload_jsonschema_from_marshmallow_class(schema_cls)
         feature_props = feature_schema["properties"]
         feature_cond = schema_utils.create_cond({"type": feature_type}, feature_props)
@@ -75,7 +66,7 @@ def get_output_feature_jsonschema():
 
     Returns: JSON Schema
     """
-    output_feature_types = sorted(list(output_type_registry.keys()))
+    output_feature_types = sorted(list(output_config_registry.keys()))
     return {
         "type": "array",
         "items": {
@@ -103,11 +94,10 @@ def get_output_feature_conds():
 
     Returns: List of JSON clauses
     """
-    output_feature_types = sorted(list(output_type_registry.keys()))
+    output_feature_types = sorted(list(output_config_registry.keys()))
     conds = []
     for feature_type in output_feature_types:
-        feature_cls = output_type_registry[feature_type]
-        schema_cls = feature_cls.get_schema_cls()
+        schema_cls = get_output_feature_cls(feature_type)
         feature_schema = schema_utils.unload_jsonschema_from_marshmallow_class(schema_cls)
         feature_props = feature_schema["properties"]
         feature_cond = schema_utils.create_cond({"type": feature_type}, feature_props)
diff --git a/ludwig/schema/features/vector_feature.py b/ludwig/schema/features/vector_feature.py
index 515e4028568..e0d3c111836 100644
--- a/ludwig/schema/features/vector_feature.py
+++ b/ludwig/schema/features/vector_feature.py
@@ -11,13 +11,20 @@
 from ludwig.schema.features.loss.utils import LossDataclassField
 from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
 from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField
-from ludwig.schema.features.utils import input_config_registry, output_config_registry
+from ludwig.schema.features.utils import (
+    input_config_registry,
+    input_mixin_registry,
+    output_config_registry,
+    output_mixin_registry,
+)
+from ludwig.schema.utils import BaseMarshmallowConfig
 
 
-@input_config_registry.register(VECTOR)
+@input_mixin_registry.register(VECTOR)
 @dataclass
-class VectorInputFeatureConfig(BaseInputFeatureConfig):
-    """VectorInputFeatureConfig is a dataclass that configures the parameters used for a vector input feature."""
+class VectorInputFeatureConfigMixin(BaseMarshmallowConfig):
+    """VectorInputFeatureConfigMixin is a dataclass that configures the parameters used in both the vector input
+    feature and the vector global defaults section of the Ludwig Config."""
 
     preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type=VECTOR)
 
@@ -26,36 +33,24 @@ class VectorInputFeatureConfig(BaseInputFeatureConfig):
         default="dense",
     )
 
-    tied: str = schema_utils.String(
-        default=None,
-        allow_none=True,
-        description="Name of input feature to tie the weights of the encoder with.  It needs to be the name of a "
-        "feature of the same type and with the same encoder parameters.",
-    )
-
 
-@output_config_registry.register(VECTOR)
-@dataclass
-class VectorOutputFeatureConfig(BaseOutputFeatureConfig):
-    """VectorOutputFeatureConfig is a dataclass that configures the parameters used for a vector output feature."""
+@input_config_registry.register(VECTOR)
+@dataclass(repr=False)
+class VectorInputFeatureConfig(BaseInputFeatureConfig, VectorInputFeatureConfigMixin):
+    """VectorInputFeatureConfig is a dataclass that configures the parameters used for a vector input feature."""
 
-    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="vector_output")
+    pass
 
-    reduce_input: str = schema_utils.ReductionOptions(
-        default=None,
-        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
-        "dimension (second if you count the batch dimension)",
-    )
 
-    reduce_dependencies: str = schema_utils.ReductionOptions(
-        default=None,
-        description="How to reduce the dependencies of the output feature.",
-    )
+@output_mixin_registry.register(VECTOR)
+@dataclass
+class VectorOutputFeatureConfigMixin(BaseMarshmallowConfig):
+    """VectorOutputFeatureConfigMixin is a dataclass that configures the parameters used in both the vector output
+    feature and the vector global defaults section of the Ludwig Config."""
 
-    vector_size: int = schema_utils.PositiveInteger(
-        default=None,
-        allow_none=True,
-        description="The size of the vector. If None, the vector size will be inferred from the data.",
+    decoder: BaseDecoderConfig = DecoderDataclassField(
+        feature_type=VECTOR,
+        default="projector",
     )
 
     loss: BaseLossConfig = LossDataclassField(
@@ -63,18 +58,38 @@ class VectorOutputFeatureConfig(BaseOutputFeatureConfig):
         default=MEAN_SQUARED_ERROR,
     )
 
-    decoder: BaseDecoderConfig = DecoderDataclassField(
-        feature_type=VECTOR,
-        default="projector",
-    )
+
+@output_config_registry.register(VECTOR)
+@dataclass(repr=False)
+class VectorOutputFeatureConfig(BaseOutputFeatureConfig, VectorOutputFeatureConfigMixin):
+    """VectorOutputFeatureConfig is a dataclass that configures the parameters used for a vector output feature."""
 
     dependencies: list = schema_utils.List(
         default=[],
         description="List of input features that this feature depends on.",
     )
 
+    preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="vector_output")
+
+    reduce_dependencies: str = schema_utils.ReductionOptions(
+        default=None,
+        description="How to reduce the dependencies of the output feature.",
+    )
+
+    reduce_input: str = schema_utils.ReductionOptions(
+        default=None,
+        description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
+        "dimension (second if you count the batch dimension)",
+    )
+
     softmax: bool = schema_utils.Boolean(
         default=False,
         description="Determines whether to apply a softmax at the end of the decoder. This is useful for predicting a "
         "vector of values that sum up to 1 and can be interpreted as probabilities.",
     )
+
+    vector_size: int = schema_utils.PositiveInteger(
+        default=None,
+        allow_none=True,
+        description="The size of the vector. If None, the vector size will be inferred from the data.",
+    )
diff --git a/ludwig/schema/metadata/parameter_metadata.py b/ludwig/schema/metadata/parameter_metadata.py
index 12a17ffc5e1..f52fe71c342 100644
--- a/ludwig/schema/metadata/parameter_metadata.py
+++ b/ludwig/schema/metadata/parameter_metadata.py
@@ -73,3 +73,6 @@ def convert_metadata_to_json(pm: ParameterMetadata):
     a string repr that is improperly parsed.
     """
     return json.loads(pm.to_json())
+
+
+INTERNAL_ONLY = ParameterMetadata(internal_only=True)
diff --git a/ludwig/schema/utils.py b/ludwig/schema/utils.py
index 7503fea13d4..d931ba511f1 100644
--- a/ludwig/schema/utils.py
+++ b/ludwig/schema/utils.py
@@ -101,11 +101,11 @@ def assert_is_a_marshmallow_class(cls):
     ), f"Expected marshmallow class, but `{cls}` does not have the necessary `Schema` attribute."
 
 
-def unload_jsonschema_from_marshmallow_class(mclass) -> TDict:
+def unload_jsonschema_from_marshmallow_class(mclass, additional_properties: bool = True) -> TDict:
     """Helper method to directly get a marshmallow class's JSON schema without extra wrapping props."""
     assert_is_a_marshmallow_class(mclass)
     schema = js().dump(mclass.Schema())["definitions"][mclass.__name__]
-    schema["additionalProperties"] = True
+    schema["additionalProperties"] = additional_properties
     return schema
 
 
@@ -457,8 +457,15 @@ def FloatRange(
     )
 
 
-def Dict(default: Union[None, TDict] = None, description: str = "", parameter_metadata: ParameterMetadata = None):
+def Dict(
+    default: Union[None, TDict] = None,
+    allow_none: bool = True,
+    description: str = "",
+    parameter_metadata: ParameterMetadata = None,
+):
     """Returns a dataclass field with marshmallow metadata enforcing input must be a dict."""
+    allow_none = allow_none or default is None
+
     if default is not None:
         try:
             assert isinstance(default, dict)
@@ -469,7 +476,7 @@ def Dict(default: Union[None, TDict] = None, description: str = "", parameter_me
         metadata={
             "marshmallow_field": fields.Dict(
                 fields.String(),
-                allow_none=True,
+                allow_none=allow_none,
                 load_default=default,
                 dump_default=default,
                 metadata={
@@ -483,7 +490,7 @@ def Dict(default: Union[None, TDict] = None, description: str = "", parameter_me
 
 
 def List(
-    list_type: Union[Type[str], Type[int], Type[float]] = str,
+    list_type: Union[Type[str], Type[int], Type[float], Type[list]] = str,
     default: Union[None, TList[Any]] = None,
     description: str = "",
     allow_none: bool = True,
@@ -503,6 +510,8 @@ def List(
         field_type = fields.Integer()
     elif list_type is float:
         field_type = fields.Float()
+    elif list_type is list:
+        field_type = fields.List(fields.Float())
     else:
         raise ValueError(f"Invalid list type: `{list_type}`")
 
diff --git a/tests/ludwig/schema/test_validate_config_misc.py b/tests/ludwig/schema/test_validate_config_misc.py
index 5a1a5f936c1..42f8ac5c797 100644
--- a/tests/ludwig/schema/test_validate_config_misc.py
+++ b/tests/ludwig/schema/test_validate_config_misc.py
@@ -1,7 +1,7 @@
 import pytest
 from jsonschema.exceptions import ValidationError
 
-from ludwig.constants import TRAINER
+from ludwig.constants import DECODER, ENCODER, LOSS, PREPROCESSING, TRAINER
 from ludwig.features.audio_feature import AudioFeatureMixin
 from ludwig.features.bag_feature import BagFeatureMixin
 from ludwig.features.binary_feature import BinaryFeatureMixin
@@ -256,9 +256,11 @@ def test_defaults_schema():
     schema = DefaultsConfig()
     assert schema.binary.decoder.type == "regressor"
     assert schema.binary.encoder.type == "passthrough"
-
-    assert schema.category.top_k == 3
     assert schema.category.encoder.dropout == 0.0
+    assert ENCODER in schema.category.to_dict()
+    assert PREPROCESSING in schema.category.to_dict()
+    assert DECODER in schema.category.to_dict()
+    assert LOSS in schema.category.to_dict()
 
 
 def test_validate_defaults_schema():
diff --git a/tests/ludwig/utils/test_defaults.py b/tests/ludwig/utils/test_defaults.py
index 2ef2a8a9a9a..96856117776 100644
--- a/tests/ludwig/utils/test_defaults.py
+++ b/tests/ludwig/utils/test_defaults.py
@@ -372,7 +372,7 @@ def test_merge_with_defaults():
                 "name": "number_output_feature",
                 "column": "number_output_feature",
                 "proc_column": "number_output_feature_mZFLky",
-                "loss": {"type": "mean_squared_error", "weight": 1},
+                "loss": {"type": "mean_squared_error", "weight": 1.0},
                 "decoder": {
                     "type": "regressor",
                     "fc_layers": None,
@@ -396,8 +396,8 @@ def test_merge_with_defaults():
                 "reduce_dependencies": "sum",
                 "preprocessing": {
                     "missing_value_strategy": "drop_row",
-                    "fill_value": 0,
-                    "computed_fill_value": 0,
+                    "fill_value": 0.0,
+                    "computed_fill_value": 0.0,
                     "normalization": None,
                 },
                 "input_size": None,

From f092b2160789587c87794582a183fc2309d25e87 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Tue, 11 Oct 2022 21:34:04 -0700
Subject: [PATCH 06/29] Add type to custom combiner (#2627)

---
 tests/integration_tests/test_custom_components.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration_tests/test_custom_components.py b/tests/integration_tests/test_custom_components.py
index 46fd7151ec7..59dbbfa89be 100644
--- a/tests/integration_tests/test_custom_components.py
+++ b/tests/integration_tests/test_custom_components.py
@@ -33,6 +33,9 @@
 
 @dataclass
 class CustomTestCombinerConfig(BaseCombinerConfig):
+
+    type: str = "custom_combiner"
+
     foo: bool = schema_utils.Boolean(default=False, description="")
 
 

From 6aa400eeccbe31f1a00ff4151900a5d89cf20d26 Mon Sep 17 00:00:00 2001
From: Arnav Garg <106701836+arnavgarg1@users.noreply.github.com>
Date: Wed, 12 Oct 2022 13:33:22 -0700
Subject: [PATCH 07/29] Remove hyperopt from config when running train through
 cli (#2631)

* remove hyperopt from config when running train through cli

* Add comment with todo

* added link to issue
---
 ludwig/train.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ludwig/train.py b/ludwig/train.py
index f731551b2d9..629b7ee2999 100644
--- a/ludwig/train.py
+++ b/ludwig/train.py
@@ -157,6 +157,10 @@ def train_cli(
     if HYPEROPT in config:
         if not query_yes_no(HYPEROPT_WARNING + CONTINUE_PROMPT):
             exit(1)
+        # Stop gap: remove hyperopt from the config to prevent interference with training step sizes
+        # TODO: https://github.com/ludwig-ai/ludwig/issues/2633
+        # Need to investigate why the presence of hyperopt in the config interferes with training step sizes
+        config.pop(HYPEROPT)
 
     if model_load_path:
         model = LudwigModel.load(

From 6342f7a64dd99ddfb2254421d182b00548a5826d Mon Sep 17 00:00:00 2001
From: Arnav Garg <106701836+arnavgarg1@users.noreply.github.com>
Date: Wed, 12 Oct 2022 15:32:18 -0700
Subject: [PATCH 08/29] Ensure resource availability for ray datasets workloads
 when running on cpu clusters (#2524)

---
 ludwig/backend/base.py                        | 11 ++-
 ludwig/backend/horovod.py                     |  5 +
 ludwig/backend/ray.py                         | 40 +++++++-
 ludwig/constants.py                           |  4 +
 ludwig/hyperopt/run.py                        | 94 +++++++------------
 ludwig/hyperopt/utils.py                      | 75 ++++++++++++++-
 tests/integration_tests/test_hyperopt.py      | 13 ++-
 tests/integration_tests/test_hyperopt_ray.py  | 22 ++---
 .../test_hyperopt_ray_horovod.py              | 21 +++--
 tests/ludwig/backend/test_ray.py              | 63 +++++++++++++
 10 files changed, 262 insertions(+), 86 deletions(-)

diff --git a/ludwig/backend/base.py b/ludwig/backend/base.py
index 64e93c0390c..96666390524 100644
--- a/ludwig/backend/base.py
+++ b/ludwig/backend/base.py
@@ -17,7 +17,7 @@
 from abc import ABC, abstractmethod
 from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
-from typing import Callable, Optional, Union
+from typing import Any, Callable, Dict, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -107,6 +107,10 @@ def num_nodes(self) -> int:
     def get_available_resources(self) -> Resources:
         raise NotImplementedError()
 
+    @abstractmethod
+    def max_concurrent_trials(self, hyperopt_config: Dict[str, Any]) -> Union[int, None]:
+        raise NotImplementedError()
+
 
 class LocalPreprocessingMixin:
     @property
@@ -197,3 +201,8 @@ def num_nodes(self) -> int:
 
     def get_available_resources(self) -> Resources:
         return Resources(cpus=psutil.cpu_count(), gpus=torch.cuda.device_count())
+
+    def max_concurrent_trials(self, hyperopt_config: Dict[str, Any]) -> Union[int, None]:
+        # Every trial will be run with Pandas and NO Ray Datasets. Allow Ray Tune to use all the
+        # trial resources it wants, because there is no Ray Datasets process to compete with it for CPUs.
+        return None
diff --git a/ludwig/backend/horovod.py b/ludwig/backend/horovod.py
index c82fe55cd82..4cc27fa0a2b 100644
--- a/ludwig/backend/horovod.py
+++ b/ludwig/backend/horovod.py
@@ -15,6 +15,7 @@
 # ==============================================================================
 
 import time
+from typing import Any, Dict, Union
 
 import psutil
 import torch
@@ -83,3 +84,7 @@ def get_available_resources(self) -> Resources:
         gpus = self._horovod.allreduce(gpus, op=self._horovod.Sum).item()
 
         return Resources(cpus=cpus, gpus=gpus)
+
+    def max_concurrent_trials(self, hyperopt_config: Dict[str, Any]) -> Union[int, None]:
+        # Return None since there is no Ray component
+        return None
diff --git a/ludwig/backend/ray.py b/ludwig/backend/ray.py
index 77951631c9c..f21a3423eb3 100644
--- a/ludwig/backend/ray.py
+++ b/ludwig/backend/ray.py
@@ -43,7 +43,16 @@
 
 from ludwig.backend.base import Backend, RemoteTrainingMixin
 from ludwig.backend.datasource import BinaryIgnoreNoneTypeDatasource
-from ludwig.constants import MODEL_ECD, MODEL_GBM, NAME, PREPROCESSING, PROC_COLUMN, TYPE
+from ludwig.constants import (
+    CPU_RESOURCES_PER_TRIAL,
+    EXECUTOR,
+    MODEL_ECD,
+    MODEL_GBM,
+    NAME,
+    PREPROCESSING,
+    PROC_COLUMN,
+    TYPE,
+)
 from ludwig.data.dataframe.base import DataFrameEngine
 from ludwig.data.dataset.ray import _SCALAR_TYPES, cast_as_tensor_dtype, RayDataset, RayDatasetManager, RayDatasetShard
 from ludwig.models.base import BaseModel
@@ -1008,6 +1017,35 @@ def get_available_resources(self) -> Resources:
         resources = ray.cluster_resources()
         return Resources(cpus=resources.get("CPU", 0), gpus=resources.get("GPU", 0))
 
+    def max_concurrent_trials(self, hyperopt_config: Dict[str, Any]) -> Union[int, None]:
+        cpus_per_trial = hyperopt_config[EXECUTOR].get(CPU_RESOURCES_PER_TRIAL, 1)
+        num_cpus_available = self.get_available_resources().cpus
+
+        # No actors will compete for ray datasets tasks dataset tasks are cpu bound
+        if cpus_per_trial == 0:
+            return None
+
+        if num_cpus_available < 2:
+            logger.warning(
+                "At least 2 CPUs are required for hyperopt when using a RayBackend, but only found "
+                f"{num_cpus_available}. If you are not using an auto-scaling Ray cluster, your hyperopt "
+                "trials may hang."
+            )
+
+        # Ray requires at least 1 free CPU to ensure trials don't stall
+        max_possible_trials = int(num_cpus_available // cpus_per_trial) - 1
+
+        # Users may be using an autoscaling cluster, so return None
+        if max_possible_trials < 1:
+            logger.warning(
+                f"Hyperopt trials will request {cpus_per_trial} CPUs in addition to CPUs needed for Ray Datasets, "
+                f" but only {num_cpus_available} CPUs are currently available. If you are not using an auto-scaling "
+                " Ray cluster, your hyperopt trials may hang."
+            )
+            return None
+
+        return max_possible_trials
+
 
 def initialize_ray():
     if not ray.is_initialized():
diff --git a/ludwig/constants.py b/ludwig/constants.py
index ad69fa4856d..b2b1ce3e6d7 100644
--- a/ludwig/constants.py
+++ b/ludwig/constants.py
@@ -142,6 +142,9 @@
 SEARCH_ALG = "search_alg"
 SCHEDULER = "scheduler"
 PARAMETERS = "parameters"
+MAX_CONCURRENT_TRIALS = "max_concurrent_trials"
+CPU_RESOURCES_PER_TRIAL = "cpu_resources_per_trial"
+GPU_RESOURCES_PER_TRIAL = "gpu_resources_per_trial"
 GOAL = "goal"
 GRID_SEARCH = "grid_search"
 
@@ -182,6 +185,7 @@
 REDUCE_INPUT = "reduce_input"
 REDUCE_DEPENDENCIES = "reduce_dependencies"
 
+BACKEND = "backend"
 COMBINER = "combiner"
 
 ENCODER = "encoder"
diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py
index 7c024e3854c..e25372fd891 100644
--- a/ludwig/hyperopt/run.py
+++ b/ludwig/hyperopt/run.py
@@ -1,9 +1,8 @@
 import copy
 import logging
 import os
-import warnings
 from pprint import pformat
-from typing import Any, Dict, List, Optional, Union
+from typing import List, Optional, Union
 
 import pandas as pd
 import yaml
@@ -13,19 +12,20 @@
 from ludwig.backend import Backend, initialize_backend, LocalBackend
 from ludwig.callbacks import Callback
 from ludwig.constants import (
+    AUTO,
     COMBINED,
     EXECUTOR,
     GOAL,
-    GRID_SEARCH,
     HYPEROPT,
     LOSS,
+    MAX_CONCURRENT_TRIALS,
     METRIC,
-    MINIMIZE,
     NAME,
+    NUM_SAMPLES,
     OUTPUT_FEATURES,
+    PARAMETERS,
     PREPROCESSING,
-    RAY,
-    SPACE,
+    SEARCH_ALG,
     SPLIT,
     TEST,
     TRAINING,
@@ -35,12 +35,18 @@
 from ludwig.data.split import get_splitter
 from ludwig.features.feature_registries import output_type_registry
 from ludwig.hyperopt.results import HyperoptResults
-from ludwig.hyperopt.utils import print_hyperopt_results, save_hyperopt_stats, should_tune_preprocessing
+from ludwig.hyperopt.utils import (
+    log_warning_if_all_grid_type_parameters,
+    print_hyperopt_results,
+    save_hyperopt_stats,
+    should_tune_preprocessing,
+    update_hyperopt_params_with_defaults,
+)
 from ludwig.utils.backward_compatibility import upgrade_to_latest_version
 from ludwig.utils.dataset_utils import generate_dataset_statistics
 from ludwig.utils.defaults import default_random_seed, merge_with_defaults
 from ludwig.utils.fs_utils import makedirs, open_file
-from ludwig.utils.misc_utils import get_class_attributes, get_from_registry, set_default_value, set_default_values
+from ludwig.utils.misc_utils import get_from_registry
 
 try:
     from ludwig.backend.ray import RayBackend
@@ -211,23 +217,33 @@ def hyperopt(
 
     hyperopt_config = config[HYPEROPT]
 
+    # Explicitly default to a local backend to avoid picking up Ray or Horovod
+    # backend from the environment.
+    backend = backend or config_dict.get("backend") or "local"
+    backend = initialize_backend(backend)
+
     update_hyperopt_params_with_defaults(hyperopt_config)
 
+    # Check if all features are grid type parameters and log UserWarning if needed
+    log_warning_if_all_grid_type_parameters(hyperopt_config[PARAMETERS], hyperopt_config[EXECUTOR].get(NUM_SAMPLES))
+
+    # Infer max concurrent trials
+    if hyperopt_config[EXECUTOR].get(MAX_CONCURRENT_TRIALS) == AUTO:
+        hyperopt_config[EXECUTOR][MAX_CONCURRENT_TRIALS] = backend.max_concurrent_trials(hyperopt_config)
+        logger.info(f"Set max_concurrent_trials to {hyperopt_config[EXECUTOR][MAX_CONCURRENT_TRIALS]}")
+
     # Print hyperopt config
-    logger.info("Hyperopt config")
+    logger.info("Hyperopt Config")
     logger.info(pformat(hyperopt_config, indent=4))
     logger.info("\n")
 
-    search_alg = hyperopt_config["search_alg"]
+    search_alg = hyperopt_config[SEARCH_ALG]
     executor = hyperopt_config[EXECUTOR]
-    parameters = hyperopt_config["parameters"]
-    split = hyperopt_config["split"]
+    parameters = hyperopt_config[PARAMETERS]
+    split = hyperopt_config[SPLIT]
     output_feature = hyperopt_config["output_feature"]
-    metric = hyperopt_config["metric"]
-    goal = hyperopt_config["goal"]
-
-    # Check if all features are grid type parameters and log UserWarning if needed
-    log_warning_if_all_grid_type_parameters(parameters, executor.get("num_samples"))
+    metric = hyperopt_config[METRIC]
+    goal = hyperopt_config[GOAL]
 
     ######################
     # check validity of output_feature / metric/ split combination
@@ -294,10 +310,6 @@ def hyperopt(
         parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor
     )
 
-    # Explicitly default to a local backend to avoid picking up Ray or Horovod
-    # backend from the environment.
-    backend = backend or config_dict.get("backend") or "local"
-    backend = initialize_backend(backend)
     if not (
         isinstance(backend, LocalBackend)
         or (isinstance(hyperopt_executor, RayTuneExecutor) and isinstance(backend, RayBackend))
@@ -399,43 +411,3 @@ def hyperopt(
     logger.info("Finished hyperopt")
 
     return hyperopt_results
-
-
-def log_warning_if_all_grid_type_parameters(hyperopt_parameter_config: Dict[str, Any], num_samples: int = 1) -> None:
-    """Logs warning if all parameters have a grid type search space and num_samples > 1 since this will result in
-    duplicate trials being created."""
-    if num_samples == 1:
-        return
-
-    total_grid_search_trials = 1
-
-    for _, param_info in hyperopt_parameter_config.items():
-        if param_info.get(SPACE, None) != GRID_SEARCH:
-            return
-        total_grid_search_trials *= len(param_info.get("values", []))
-
-    num_duplicate_trials = (total_grid_search_trials * num_samples) - total_grid_search_trials
-    warnings.warn(
-        "All hyperopt parameters in Ludwig config are using grid_search space, but number of samples "
-        f"({num_samples}) is greater than 1. This will result in {num_duplicate_trials} duplicate trials being "
-        "created. Consider setting `num_samples` to 1 in the hyperopt executor to prevent trial duplication.",
-        RuntimeWarning,
-    )
-
-
-def update_hyperopt_params_with_defaults(hyperopt_params):
-    from ludwig.hyperopt.execution import executor_registry
-
-    set_default_value(hyperopt_params, EXECUTOR, {})
-    set_default_value(hyperopt_params, SPLIT, VALIDATION)
-    set_default_value(hyperopt_params, "output_feature", COMBINED)
-    set_default_value(hyperopt_params, METRIC, LOSS)
-    set_default_value(hyperopt_params, GOAL, MINIMIZE)
-
-    set_default_values(hyperopt_params[EXECUTOR], {TYPE: RAY, "num_samples": 1})
-    executor = get_from_registry(hyperopt_params[EXECUTOR][TYPE], executor_registry)
-    executor_defaults = {k: v for k, v in executor.__dict__.items() if k in get_class_attributes(executor)}
-    set_default_values(
-        hyperopt_params[EXECUTOR],
-        executor_defaults,
-    )
diff --git a/ludwig/hyperopt/utils.py b/ludwig/hyperopt/utils.py
index d04ae170d65..c351adf2258 100644
--- a/ludwig/hyperopt/utils.py
+++ b/ludwig/hyperopt/utils.py
@@ -3,13 +3,42 @@
 import json
 import logging
 import os
+import warnings
 from typing import Any, Dict
 
-from ludwig.constants import HYPEROPT, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PARAMETERS, PREPROCESSING
+from ludwig.constants import (
+    AUTO,
+    COMBINED,
+    EXECUTOR,
+    GOAL,
+    GRID_SEARCH,
+    HYPEROPT,
+    INPUT_FEATURES,
+    LOSS,
+    MAX_CONCURRENT_TRIALS,
+    METRIC,
+    MINIMIZE,
+    NAME,
+    NUM_SAMPLES,
+    OUTPUT_FEATURES,
+    PARAMETERS,
+    PREPROCESSING,
+    RAY,
+    SPACE,
+    SPLIT,
+    TYPE,
+    VALIDATION,
+)
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.hyperopt.results import HyperoptResults, TrialResults
 from ludwig.utils.data_utils import save_json
-from ludwig.utils.misc_utils import merge_dict
+from ludwig.utils.misc_utils import (
+    get_class_attributes,
+    get_from_registry,
+    merge_dict,
+    set_default_value,
+    set_default_values,
+)
 from ludwig.utils.print_utils import print_boxed
 
 logger = logging.getLogger(__name__)
@@ -141,3 +170,45 @@ def substitute_parameters(
     config = feature_dict_to_list(config)
 
     return config
+
+
+def log_warning_if_all_grid_type_parameters(hyperopt_parameter_config: Dict[str, Any], num_samples: int = 1) -> None:
+    """Logs warning if all parameters have a grid type search space and num_samples > 1 since this will result in
+    duplicate trials being created."""
+    if num_samples == 1:
+        return
+
+    total_grid_search_trials = 1
+
+    for _, param_info in hyperopt_parameter_config.items():
+        if param_info.get(SPACE, None) != GRID_SEARCH:
+            return
+        total_grid_search_trials *= len(param_info.get("values", []))
+
+    num_duplicate_trials = (total_grid_search_trials * num_samples) - total_grid_search_trials
+    warnings.warn(
+        "All hyperopt parameters in Ludwig config are using grid_search space, but number of samples "
+        f"({num_samples}) is greater than 1. This will result in {num_duplicate_trials} duplicate trials being "
+        "created. Consider setting `num_samples` to 1 in the hyperopt executor to prevent trial duplication.",
+        RuntimeWarning,
+    )
+
+
+def update_hyperopt_params_with_defaults(hyperopt_params: Dict[str, Any]) -> None:
+    """Updates user's Ludwig config with default hyperopt parameters."""
+    from ludwig.hyperopt.execution import executor_registry
+
+    set_default_value(hyperopt_params, EXECUTOR, {})
+    set_default_value(hyperopt_params, SPLIT, VALIDATION)
+    set_default_value(hyperopt_params, "output_feature", COMBINED)
+    set_default_value(hyperopt_params, METRIC, LOSS)
+    set_default_value(hyperopt_params, GOAL, MINIMIZE)
+
+    set_default_values(hyperopt_params[EXECUTOR], {TYPE: RAY, NUM_SAMPLES: 1, MAX_CONCURRENT_TRIALS: AUTO})
+
+    executor = get_from_registry(hyperopt_params[EXECUTOR][TYPE], executor_registry)
+    executor_defaults = {k: v for k, v in executor.__dict__.items() if k in get_class_attributes(executor)}
+    set_default_values(
+        hyperopt_params[EXECUTOR],
+        executor_defaults,
+    )
diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py
index ebadd93ed0e..0bef8408cd6 100644
--- a/tests/integration_tests/test_hyperopt.py
+++ b/tests/integration_tests/test_hyperopt.py
@@ -22,14 +22,17 @@
 import torch
 from packaging import version
 
+from ludwig.backend import initialize_backend
 from ludwig.constants import (
     ACCURACY,
+    AUTO,
     CATEGORY,
     COMBINER,
     EXECUTOR,
     GRID_SEARCH,
     HYPEROPT,
     INPUT_FEATURES,
+    MAX_CONCURRENT_TRIALS,
     NAME,
     OUTPUT_FEATURES,
     RAY,
@@ -39,7 +42,8 @@
 )
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.hyperopt.results import HyperoptResults
-from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
+from ludwig.hyperopt.run import hyperopt
+from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults
 from ludwig.utils import fs_utils
 from ludwig.utils.data_utils import load_json
 from ludwig.utils.defaults import merge_with_defaults
@@ -214,6 +218,10 @@ def test_hyperopt_search_alg(
 
     update_hyperopt_params_with_defaults(hyperopt_config)
 
+    backend = initialize_backend("local")
+    if hyperopt_config[EXECUTOR].get(MAX_CONCURRENT_TRIALS) == AUTO:
+        hyperopt_config[EXECUTOR][MAX_CONCURRENT_TRIALS] = backend.max_concurrent_trials(hyperopt_config)
+
     parameters = hyperopt_config["parameters"]
     split = hyperopt_config["split"]
     output_feature = hyperopt_config["output_feature"]
@@ -276,7 +284,10 @@ def test_hyperopt_scheduler(
     if validation_metric:
         hyperopt_config["validation_metric"] = validation_metric
 
+    backend = initialize_backend("local")
     update_hyperopt_params_with_defaults(hyperopt_config)
+    if hyperopt_config[EXECUTOR].get(MAX_CONCURRENT_TRIALS) == AUTO:
+        hyperopt_config[EXECUTOR][MAX_CONCURRENT_TRIALS] = backend.max_concurrent_trials(hyperopt_config)
 
     parameters = hyperopt_config["parameters"]
     split = hyperopt_config["split"]
diff --git a/tests/integration_tests/test_hyperopt_ray.py b/tests/integration_tests/test_hyperopt_ray.py
index cfdb553e12f..5a280e165fe 100644
--- a/tests/integration_tests/test_hyperopt_ray.py
+++ b/tests/integration_tests/test_hyperopt_ray.py
@@ -21,12 +21,14 @@
 import pytest
 from mlflow.tracking import MlflowClient
 
+from ludwig.backend import initialize_backend
 from ludwig.callbacks import Callback
-from ludwig.constants import ACCURACY, TRAINER
+from ludwig.constants import ACCURACY, AUTO, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER
 from ludwig.contribs import MlflowCallback
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.hyperopt.results import HyperoptResults
-from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
+from ludwig.hyperopt.run import hyperopt
+from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import category_feature, generate_data, text_feature
 
@@ -149,7 +151,10 @@ def run_hyperopt_executor(
     if validation_metric:
         hyperopt_config["validation_metric"] = validation_metric
 
+    backend = initialize_backend("local")
     update_hyperopt_params_with_defaults(hyperopt_config)
+    if hyperopt_config[EXECUTOR].get(MAX_CONCURRENT_TRIALS) == AUTO:
+        hyperopt_config[EXECUTOR][MAX_CONCURRENT_TRIALS] = backend.max_concurrent_trials(hyperopt_config)
 
     parameters = hyperopt_config["parameters"]
     if search_alg.get("type", "") == "bohb":
@@ -162,17 +167,13 @@ def run_hyperopt_executor(
     metric = hyperopt_config["metric"]
     goal = hyperopt_config["goal"]
     search_alg = hyperopt_config["search_alg"]
+    executor = hyperopt_config["executor"]
 
     hyperopt_executor = get_build_hyperopt_executor(executor["type"])(
         parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor
     )
 
-    hyperopt_executor.execute(
-        config,
-        dataset=rel_path,
-        output_directory=tmpdir,
-        backend="local",
-    )
+    hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir, backend=backend)
 
 
 @pytest.mark.distributed
@@ -204,7 +205,6 @@ def test_hyperopt_run_hyperopt(csv_filename, backend, tmpdir, ray_cluster_4cpu):
         text_feature(name="utterance", encoder={"cell_type": "lstm", "reduce_output": "sum"}),
         category_feature(encoder={"vocab_size": 2}, reduce_input="sum"),
     ]
-
     output_features = [category_feature(decoder={"vocab_size": 2}, reduce_input="sum")]
 
     rel_path = generate_data(input_features, output_features, csv_filename)
@@ -237,8 +237,8 @@ def test_hyperopt_run_hyperopt(csv_filename, backend, tmpdir, ray_cluster_4cpu):
         "executor": {
             "type": "ray",
             "num_samples": 2,
-            "cpu_resources_per_trial": 1,
-            "max_concurrent_trials": 2,
+            "cpu_resources_per_trial": 2,
+            "max_concurrent_trials": "auto",
         },
         "search_alg": {"type": "variant_generator"},
     }
diff --git a/tests/integration_tests/test_hyperopt_ray_horovod.py b/tests/integration_tests/test_hyperopt_ray_horovod.py
index 2f9ee370781..40b39741245 100644
--- a/tests/integration_tests/test_hyperopt_ray_horovod.py
+++ b/tests/integration_tests/test_hyperopt_ray_horovod.py
@@ -22,10 +22,11 @@
 
 from ludwig.api import LudwigModel
 from ludwig.callbacks import Callback
-from ludwig.constants import ACCURACY, TRAINER
+from ludwig.constants import ACCURACY, AUTO, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.hyperopt.results import HyperoptResults
-from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
+from ludwig.hyperopt.run import hyperopt
+from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import binary_feature, create_data_set_to_use, generate_data, number_feature
 
@@ -71,8 +72,8 @@ def mock_storage_client(path):
             "lower": 0.001,
             "upper": 0.1,
         },
-        "combiner.num_fc_layers": {"space": "randint", "lower": 2, "upper": 6},
-        "combiner.num_steps": {"space": "grid_search", "values": [3, 4, 5]},
+        "combiner.num_fc_layers": {"space": "randint", "lower": 1, "upper": 3},
+        "combiner.num_steps": {"space": "grid_search", "values": [1, 2, 3]},
     },
     "goal": "minimize",
 }
@@ -122,7 +123,7 @@ def _get_config(search_alg, executor):
         "input_features": input_features,
         "output_features": output_features,
         "combiner": {"type": "concat", "num_fc_layers": 2},
-        TRAINER: {"epochs": 2, "learning_rate": 0.001},
+        TRAINER: {"epochs": 1, "learning_rate": 0.001},
         "hyperopt": {
             **HYPEROPT_CONFIG,
             "executor": executor,
@@ -181,7 +182,10 @@ def run_hyperopt_executor(
     if validation_metric:
         hyperopt_config["validation_metric"] = validation_metric
 
+    backend = RayBackend(**RAY_BACKEND_KWARGS)
     update_hyperopt_params_with_defaults(hyperopt_config)
+    if hyperopt_config[EXECUTOR].get(MAX_CONCURRENT_TRIALS) == AUTO:
+        hyperopt_config[EXECUTOR][MAX_CONCURRENT_TRIALS] = backend.max_concurrent_trials(hyperopt_config)
 
     parameters = hyperopt_config["parameters"]
     if search_alg.get("type", "") == "bohb":
@@ -196,7 +200,6 @@ def run_hyperopt_executor(
     search_alg = hyperopt_config["search_alg"]
 
     # preprocess
-    backend = RayBackend(**RAY_BACKEND_KWARGS)
     model = LudwigModel(config=config, backend=backend)
     training_set, validation_set, test_set, training_set_metadata = model.preprocess(
         dataset=dataset_parquet,
@@ -204,7 +207,7 @@ def run_hyperopt_executor(
 
     # hyperopt
     hyperopt_executor = MockRayTuneExecutor(
-        parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor
+        parameters, output_feature, metric, goal, split, search_alg=search_alg, **hyperopt_config[EXECUTOR]
     )
     hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket")
 
@@ -273,8 +276,8 @@ def test_hyperopt_run_hyperopt(csv_filename, ray_mock_dir, ray_cluster_7cpu):
                 "lower": 0.001,
                 "upper": 0.1,
             },
-            output_feature_name + ".output_size": {"space": "randint", "lower": 2, "upper": 32},
-            output_feature_name + ".num_fc_layers": {"space": "randint", "lower": 2, "upper": 6},
+            output_feature_name + ".output_size": {"space": "randint", "lower": 2, "upper": 8},
+            output_feature_name + ".num_fc_layers": {"space": "randint", "lower": 1, "upper": 3},
         },
         "goal": "minimize",
         "output_feature": output_feature_name,
diff --git a/tests/ludwig/backend/test_ray.py b/tests/ludwig/backend/test_ray.py
index cff37083d35..1b3b6b30416 100644
--- a/tests/ludwig/backend/test_ray.py
+++ b/tests/ludwig/backend/test_ray.py
@@ -10,7 +10,9 @@
 from ray.train.constants import TRAIN_ENABLE_WORKER_SPREAD_ENV  # noqa
 from ray.train.horovod import HorovodConfig  # noqa
 
+from ludwig.backend import initialize_backend  # noqa
 from ludwig.backend.ray import get_trainer_kwargs, spread_env  # noqa
+from ludwig.constants import AUTO, EXECUTOR, MAX_CONCURRENT_TRIALS, RAY  # noqa
 
 # Mark the entire module as distributed
 pytestmark = pytest.mark.distributed
@@ -126,3 +128,64 @@ def test_spread_env(trainer_kwargs, current_env_value, expected_env_value):
         os.environ[TRAIN_ENABLE_WORKER_SPREAD_ENV] = prev_env
     elif TRAIN_ENABLE_WORKER_SPREAD_ENV in os.environ:
         del os.environ[TRAIN_ENABLE_WORKER_SPREAD_ENV]
+
+
+@pytest.mark.distributed
+@pytest.mark.parametrize(
+    "hyperopt_config_old, hyperopt_config_expected",
+    [
+        (  # If max_concurrent_trials is none, it should not be set in the updated config
+            {
+                "parameters": {"trainer.learning_rate": {"space": "choice", "values": [0.001, 0.01, 0.1]}},
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": None},
+            },
+            {
+                "parameters": {"trainer.learning_rate": {"space": "choice", "values": [0.001, 0.01, 0.1]}},
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": None},
+            },
+        ),
+        (  # If max_concurrent_trials is auto, set it to total_trials - 2 if num_samples == num_cpus
+            {
+                "parameters": {"trainer.learning_rate": {"space": "choice", "values": [0.001, 0.01, 0.1]}},
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": "auto"},
+            },
+            {
+                "parameters": {"trainer.learning_rate": {"space": "choice", "values": [0.001, 0.01, 0.1]}},
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": 3},
+            },
+        ),
+        (  # Even though num_samples is set to 4, this will actually result in 9 trials. We should correctly set
+            # max_concurrent_trials to 2
+            {
+                "parameters": {
+                    "trainer.learning_rate": {"space": "grid_search", "values": [0.001, 0.01, 0.1]},
+                    "combiner.num_fc_layers": {"space": "grid_search", "values": [1, 2, 3]},
+                },
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": "auto"},
+            },
+            {
+                "parameters": {
+                    "trainer.learning_rate": {"space": "grid_search", "values": [0.001, 0.01, 0.1]},
+                    "combiner.num_fc_layers": {"space": "grid_search", "values": [1, 2, 3]},
+                },
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": 3},
+            },
+        ),
+        (  # Ensure user config value (1) is respected if it is passed in
+            {
+                "parameters": {"trainer.learning_rate": {"space": "choice", "values": [0.001, 0.01, 0.1]}},
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": 1},
+            },
+            {
+                "parameters": {"trainer.learning_rate": {"space": "choice", "values": [0.001, 0.01, 0.1]}},
+                "executor": {"num_samples": 4, "cpu_resources_per_trial": 1, "max_concurrent_trials": 1},
+            },
+        ),
+    ],
+    ids=["none", "auto", "auto_with_large_num_trials", "1"],
+)
+def test_set_max_concurrent_trials(hyperopt_config_old, hyperopt_config_expected, ray_cluster_4cpu):
+    backend = initialize_backend(RAY)
+    if hyperopt_config_old[EXECUTOR].get(MAX_CONCURRENT_TRIALS) == AUTO:
+        hyperopt_config_old[EXECUTOR][MAX_CONCURRENT_TRIALS] = backend.max_concurrent_trials(hyperopt_config_old)
+    assert hyperopt_config_old == hyperopt_config_expected

From f000e73ea1d3a28d32575c066437a26db10607c7 Mon Sep 17 00:00:00 2001
From: Arnav Garg <106701836+arnavgarg1@users.noreply.github.com>
Date: Wed, 12 Oct 2022 16:34:40 -0700
Subject: [PATCH 09/29] Fix OOM with ray nightly horovod test and speed up
 tests to prevent timeout (#2599)

* reducing scope of parameters in horovod tests

* reduce num_steps

* remove skip on executor nightly test

* more simplification

* revert to epochs

* revert to epochs

* add conditional check for num epochs

* add comment

* test oom and speed up

* uncomment tests

* use ray 2.0 checks

* add comment explaining max concurrent trials

* more optimizations

* fix ray version flag
---
 .../test_hyperopt_ray_horovod.py              | 61 ++++++++-----------
 1 file changed, 27 insertions(+), 34 deletions(-)

diff --git a/tests/integration_tests/test_hyperopt_ray_horovod.py b/tests/integration_tests/test_hyperopt_ray_horovod.py
index 40b39741245..f37d609db8d 100644
--- a/tests/integration_tests/test_hyperopt_ray_horovod.py
+++ b/tests/integration_tests/test_hyperopt_ray_horovod.py
@@ -33,10 +33,8 @@
 try:
     import ray
 
-    # Ray nightly version is always set to 3.0.0.dev0
-    _ray_nightly = version.parse(ray.__version__) >= version.parse("3.0.0.dev0")
-    _ray_114 = version.parse(ray.__version__) >= version.parse("1.14")
-    if _ray_114:
+    _ray_200 = version.parse(ray.__version__) > version.parse("1.13")
+    if _ray_200:
         from ray.tune.syncer import get_node_to_storage_syncer, SyncConfig
     else:
         from ray.tune.syncer import get_sync_client
@@ -45,11 +43,8 @@
     from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor
 except ImportError:
     ray = None
-    _ray_nightly = False
     RayTuneExecutor = object
 
-# Ray mocks
-
 # Dummy sync templates
 LOCAL_SYNC_TEMPLATE = "echo {source}/ {target}/"
 LOCAL_DELETE_TEMPLATE = "echo {target}"
@@ -58,7 +53,7 @@
 def mock_storage_client(path):
     """Mocks storage client that treats a local dir as durable storage."""
     os.makedirs(path, exist_ok=True)
-    if _ray_114:
+    if _ray_200:
         syncer = get_node_to_storage_syncer(SyncConfig(upload_dir=path))
     else:
         syncer = get_sync_client(LOCAL_SYNC_TEMPLATE, LOCAL_DELETE_TEMPLATE)
@@ -72,8 +67,7 @@ def mock_storage_client(path):
             "lower": 0.001,
             "upper": 0.1,
         },
-        "combiner.num_fc_layers": {"space": "randint", "lower": 1, "upper": 3},
-        "combiner.num_steps": {"space": "grid_search", "values": [1, 2, 3]},
+        "combiner.output_size": {"space": "grid_search", "values": [4, 8]},
     },
     "goal": "minimize",
 }
@@ -84,21 +78,10 @@ def mock_storage_client(path):
         "executor": {"type": "ray", "num_samples": 2, "cpu_resources_per_trial": 1},
         "search_alg": {"type": "variant_generator"},
     },
-    # TODO(shreya): Uncomment when https://github.com/ludwig-ai/ludwig/issues/2039 is fixed.
-    # {
-    #     "type": "ray",
-    #     "num_samples": 1,
-    #     "scheduler": {
-    #         "type": "async_hyperband",
-    #         "time_attr": "training_iteration",
-    #         "reduction_factor": 2,
-    #         "dynamic_resource_allocation": True,
-    #     },
-    # },
     {
         "executor": {
             "type": "ray",
-            "num_samples": 3,
+            "num_samples": 2,
             "scheduler": {
                 "type": "hb_bohb",
                 "time_attr": "training_iteration",
@@ -108,6 +91,17 @@ def mock_storage_client(path):
         },
         "search_alg": {"type": "bohb"},
     },
+    # TODO(shreya): Uncomment when https://github.com/ludwig-ai/ludwig/issues/2039 is fixed.
+    # {
+    #     "type": "ray",
+    #     "num_samples": 1,
+    #     "scheduler": {
+    #         "type": "async_hyperband",
+    #         "time_attr": "training_iteration",
+    #         "reduction_factor": 2,
+    #         "dynamic_resource_allocation": True,
+    #     },
+    # },
 ]
 
 
@@ -116,14 +110,17 @@ def mock_storage_client(path):
 
 
 def _get_config(search_alg, executor):
-    input_features = [number_feature(), number_feature()]
+    input_features = [number_feature()]
     output_features = [binary_feature()]
 
+    # Bohb causes training failures when num epochs is 1
+    num_epochs = 1 if search_alg["type"] == "variant_generator" else 2
+
     return {
         "input_features": input_features,
         "output_features": output_features,
-        "combiner": {"type": "concat", "num_fc_layers": 2},
-        TRAINER: {"epochs": 1, "learning_rate": 0.001},
+        "combiner": {"type": "concat"},
+        TRAINER: {"epochs": num_epochs, "learning_rate": 0.001},
         "hyperopt": {
             **HYPEROPT_CONFIG,
             "executor": executor,
@@ -190,7 +187,7 @@ def run_hyperopt_executor(
     parameters = hyperopt_config["parameters"]
     if search_alg.get("type", "") == "bohb":
         # bohb does not support grid_search search space
-        del parameters["combiner.num_steps"]
+        del parameters["combiner.output_size"]
         hyperopt_config["parameters"] = parameters
 
     split = hyperopt_config["split"]
@@ -224,9 +221,8 @@ def run_hyperopt_executor(
     )
 
 
-@pytest.mark.skipif(_ray_nightly, reason="https://github.com/ludwig-ai/ludwig/issues/2451")
 @pytest.mark.distributed
-@pytest.mark.parametrize("scenario", SCENARIOS)
+@pytest.mark.parametrize("scenario", SCENARIOS, ids=["variant_generator", "bohb"])
 def test_hyperopt_executor(scenario, csv_filename, ray_mock_dir, ray_cluster_7cpu):
     search_alg = scenario["search_alg"]
     executor = scenario["executor"]
@@ -237,8 +233,6 @@ def test_hyperopt_executor(scenario, csv_filename, ray_mock_dir, ray_cluster_7cp
 @pytest.mark.distributed
 def test_hyperopt_executor_with_metric(csv_filename, ray_mock_dir, ray_cluster_7cpu):
     run_hyperopt_executor(
-        # {"type": "ray", "num_samples": 2},
-        # {"type": "ray"},
         {"type": "variant_generator"},  # search_alg
         {"type": "ray", "num_samples": 2},  # executor
         csv_filename,
@@ -252,7 +246,7 @@ def test_hyperopt_executor_with_metric(csv_filename, ray_mock_dir, ray_cluster_7
 @pytest.mark.distributed
 @patch("ludwig.hyperopt.execution.RayTuneExecutor", MockRayTuneExecutor)
 def test_hyperopt_run_hyperopt(csv_filename, ray_mock_dir, ray_cluster_7cpu):
-    input_features = [number_feature(), number_feature()]
+    input_features = [number_feature()]
     output_features = [binary_feature()]
 
     csv_filename = os.path.join(ray_mock_dir, "dataset.csv")
@@ -262,8 +256,8 @@ def test_hyperopt_run_hyperopt(csv_filename, ray_mock_dir, ray_cluster_7cpu):
     config = {
         "input_features": input_features,
         "output_features": output_features,
-        "combiner": {"type": "concat", "num_fc_layers": 2},
-        TRAINER: {"epochs": 4, "learning_rate": 0.001},
+        "combiner": {"type": "concat"},
+        TRAINER: {"epochs": 1, "learning_rate": 0.001},
         "backend": {"type": "ray", **RAY_BACKEND_KWARGS},
     }
 
@@ -277,7 +271,6 @@ def test_hyperopt_run_hyperopt(csv_filename, ray_mock_dir, ray_cluster_7cpu):
                 "upper": 0.1,
             },
             output_feature_name + ".output_size": {"space": "randint", "lower": 2, "upper": 8},
-            output_feature_name + ".num_fc_layers": {"space": "randint", "lower": 1, "upper": 3},
         },
         "goal": "minimize",
         "output_feature": output_feature_name,

From 4cf9eed3c3adf5de577620c7edbc9f0f09c19699 Mon Sep 17 00:00:00 2001
From: Joppe Geluykens <joppe@predibase.com>
Date: Thu, 13 Oct 2022 10:18:23 +0200
Subject: [PATCH 10/29] [explain] add API annotations (#2635)

---
 ludwig/explain/captum.py      | 2 ++
 ludwig/explain/explainer.py   | 2 ++
 ludwig/explain/explanation.py | 3 +++
 ludwig/explain/gbm.py         | 2 ++
 4 files changed, 9 insertions(+)

diff --git a/ludwig/explain/captum.py b/ludwig/explain/captum.py
index c48bbe7233e..85cf76c4885 100644
--- a/ludwig/explain/captum.py
+++ b/ludwig/explain/captum.py
@@ -7,6 +7,7 @@
 from torch.autograd import Variable
 
 from ludwig.api import LudwigModel
+from ludwig.api_annotations import PublicAPI
 from ludwig.data.preprocessing import preprocess_for_prediction
 from ludwig.explain.explainer import Explainer
 from ludwig.explain.explanation import Explanation
@@ -107,6 +108,7 @@ def get_input_tensors(model: LudwigModel, input_set: pd.DataFrame) -> List[Varia
     return data_to_predict
 
 
+@PublicAPI(stability="experimental")
 class IntegratedGradientsExplainer(Explainer):
     def explain(self) -> Tuple[List[Explanation], List[float]]:
         """Explain the model's predictions using Integrated Gradients.
diff --git a/ludwig/explain/explainer.py b/ludwig/explain/explainer.py
index b4287d97cc5..b338bfed1d2 100644
--- a/ludwig/explain/explainer.py
+++ b/ludwig/explain/explainer.py
@@ -4,11 +4,13 @@
 import pandas as pd
 
 from ludwig.api import LudwigModel
+from ludwig.api_annotations import DeveloperAPI
 from ludwig.constants import BINARY, CATEGORY, TYPE
 from ludwig.explain.explanation import Explanation
 from ludwig.explain.util import prepare_data
 
 
+@DeveloperAPI
 class Explainer(metaclass=ABCMeta):
     def __init__(self, model: LudwigModel, inputs_df: pd.DataFrame, sample_df: pd.DataFrame, target: str):
         """Constructor for the explainer.
diff --git a/ludwig/explain/explanation.py b/ludwig/explain/explanation.py
index c22e66e0930..eef447ae6f6 100644
--- a/ludwig/explain/explanation.py
+++ b/ludwig/explain/explanation.py
@@ -4,6 +4,8 @@
 import numpy as np
 import numpy.typing as npt
 
+from ludwig.api_annotations import PublicAPI
+
 
 @dataclass
 class LabelExplanation:
@@ -13,6 +15,7 @@ class LabelExplanation:
     feature_attributions: npt.NDArray[np.float64]
 
 
+@PublicAPI(stability="experimental")
 @dataclass
 class Explanation:
     """Stores the explanations for a single row of input data.
diff --git a/ludwig/explain/gbm.py b/ludwig/explain/gbm.py
index cc801a5289f..c83c7b5afd4 100644
--- a/ludwig/explain/gbm.py
+++ b/ludwig/explain/gbm.py
@@ -1,10 +1,12 @@
 from typing import List, Tuple
 
+from ludwig.api_annotations import PublicAPI
 from ludwig.explain.explainer import Explainer
 from ludwig.explain.explanation import Explanation
 from ludwig.models.gbm import GBM
 
 
+@PublicAPI(stability="experimental")
 class GBMExplainer(Explainer):
     def explain(self) -> Tuple[List[Explanation], List[float]]:
         """Explain the model's predictions. Uses the feature importances from the model.

From b168ca1aa5763bb38e4318a72ab86b29b63bb9de Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Thu, 13 Oct 2022 09:39:52 -0700
Subject: [PATCH 11/29] Added storage backend API to allow injecting dynamic
 credentials (#2630)

---
 .github/workflows/pytest.yml                  | 10 ++-
 ludwig/backend/base.py                        | 15 +++-
 ludwig/backend/utils/__init__.py              |  0
 ludwig/backend/utils/storage.py               | 69 +++++++++++++++
 ludwig/data/cache/manager.py                  | 10 +--
 ludwig/data/preprocessing.py                  | 16 ++--
 ludwig/hyperopt/execution.py                  |  2 +-
 ludwig/hyperopt/run.py                        | 17 ++--
 ludwig/hyperopt/syncer.py                     | 21 +++--
 ludwig/utils/backward_compatibility.py        | 15 ++++
 ludwig/utils/fs_utils.py                      |  3 -
 requirements.txt                              |  2 +-
 requirements_test.txt                         |  2 +
 tests/README.md                               | 29 ++++++
 tests/docker-compose.yml                      | 16 ++++
 tests/integration_tests/test_cache_manager.py | 18 +---
 tests/integration_tests/test_hyperopt.py      | 35 ++++++--
 tests/integration_tests/test_remote.py        | 88 ++++++++++---------
 tests/integration_tests/utils.py              | 19 +++-
 .../utils/test_backward_compatibility.py      | 11 +++
 20 files changed, 291 insertions(+), 107 deletions(-)
 create mode 100644 ludwig/backend/utils/__init__.py
 create mode 100644 ludwig/backend/utils/storage.py
 create mode 100644 tests/README.md
 create mode 100644 tests/docker-compose.yml

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 6529758de28..43c57b8dde7 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -44,10 +44,16 @@ jobs:
       NEUROPOD_VERISON: "0.3.0-rc6"
       TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
       RAY_VERSION: ${{ matrix.ray-version }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
 
     name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}
+    services:
+      minio:
+        image: fclairamb/minio-github-actions
+        env:
+          MINIO_ACCESS_KEY: minio
+          MINIO_SECRET_KEY: minio123
+        ports:
+          - 9000:9000
 
     timeout-minutes: 80
     steps:
diff --git a/ludwig/backend/base.py b/ludwig/backend/base.py
index 96666390524..186ec57006b 100644
--- a/ludwig/backend/base.py
+++ b/ludwig/backend/base.py
@@ -24,6 +24,7 @@
 import psutil
 import torch
 
+from ludwig.backend.utils.storage import StorageManager
 from ludwig.data.cache.manager import CacheManager
 from ludwig.data.dataframe.pandas import PANDAS
 from ludwig.data.dataset.base import DatasetManager
@@ -42,17 +43,23 @@ def __init__(
         self,
         dataset_manager: DatasetManager,
         cache_dir: Optional[str] = None,
-        cache_credentials: Optional[Union[str, dict]] = None,
+        credentials: Optional[Dict[str, Dict[str, Any]]] = None,
     ):
+        credentials = credentials or {}
         self._dataset_manager = dataset_manager
-        self._cache_manager = CacheManager(self._dataset_manager, cache_dir, cache_credentials)
+        self._storage_manager = StorageManager(**credentials)
+        self._cache_manager = CacheManager(self._dataset_manager, cache_dir)
 
     @property
-    def cache(self):
+    def storage(self) -> StorageManager:
+        return self._storage_manager
+
+    @property
+    def cache(self) -> CacheManager:
         return self._cache_manager
 
     @property
-    def dataset_manager(self):
+    def dataset_manager(self) -> DatasetManager:
         return self._dataset_manager
 
     @abstractmethod
diff --git a/ludwig/backend/utils/__init__.py b/ludwig/backend/utils/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/ludwig/backend/utils/storage.py b/ludwig/backend/utils/storage.py
new file mode 100644
index 00000000000..3f02a6861b4
--- /dev/null
+++ b/ludwig/backend/utils/storage.py
@@ -0,0 +1,69 @@
+import contextlib
+from typing import Any, Dict, Optional, Union
+
+from ludwig.utils import data_utils
+
+CredInputs = Optional[Union[str, Dict[str, Any]]]
+
+
+DEFAULTS = "defaults"
+ARTIFACTS = "artifacts"
+DATASETS = "datasets"
+CACHE = "cache"
+
+
+class Storage:
+    def __init__(self, creds: Optional[Dict[str, Any]]):
+        self._creds = creds
+
+    @contextlib.contextmanager
+    def use_credentials(self):
+        with data_utils.use_credentials(self._creds):
+            yield
+
+    @property
+    def credentials(self) -> Optional[Dict[str, Any]]:
+        return self._creds
+
+
+class StorageManager:
+    def __init__(
+        self,
+        defaults: CredInputs = None,
+        artifacts: CredInputs = None,
+        datasets: CredInputs = None,
+        cache: CredInputs = None,
+    ):
+        defaults = load_creds(defaults)
+        cred_inputs = {
+            DEFAULTS: defaults,
+            ARTIFACTS: load_creds(artifacts),
+            DATASETS: load_creds(datasets),
+            CACHE: load_creds(cache),
+        }
+
+        self.storages = {k: Storage(v if v is not None else defaults) for k, v in cred_inputs.items()}
+
+    @property
+    def defaults(self) -> Storage:
+        return self.storages[DEFAULTS]
+
+    @property
+    def artifacts(self) -> Storage:
+        """TODO(travis): Currently used for hyperopt, but should be used for all outputs."""
+        return self.storages[ARTIFACTS]
+
+    @property
+    def datasets(self) -> Storage:
+        """TODO(travis): Should be used to read in datasets."""
+        return self.storages[DATASETS]
+
+    @property
+    def cache(self) -> Storage:
+        return self.storages[CACHE]
+
+
+def load_creds(cred: CredInputs) -> Dict[str, Any]:
+    if isinstance(cred, str):
+        cred = data_utils.load_json(cred)
+    return cred
diff --git a/ludwig/data/cache/manager.py b/ludwig/data/cache/manager.py
index f60c34ac002..d51141401d2 100644
--- a/ludwig/data/cache/manager.py
+++ b/ludwig/data/cache/manager.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import Optional, Union
+from typing import Optional
 
 from ludwig.constants import CHECKSUM, META, TEST, TRAINING, VALIDATION
 from ludwig.data.cache.types import alphanum, CacheableDataset
@@ -87,13 +87,9 @@ def __init__(
         self,
         dataset_manager: DatasetManager,
         cache_dir: Optional[str] = None,
-        cache_credentials: Optional[Union[str, dict]] = None,
     ):
         self._dataset_manager = dataset_manager
         self._cache_dir = cache_dir
-        if isinstance(cache_credentials, str):
-            cache_credentials = data_utils.load_json(cache_credentials)
-        self._cache_credentials = cache_credentials
 
     def get_dataset_cache(
         self,
@@ -151,7 +147,3 @@ def can_cache(self, skip_save_processed_input: bool) -> bool:
     @property
     def data_format(self) -> str:
         return self._dataset_manager.data_format
-
-    @property
-    def credentials(self) -> Optional[dict]:
-        return self._cache_credentials
diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py
index e4b44c17911..fae3d15d4f1 100644
--- a/ludwig/data/preprocessing.py
+++ b/ludwig/data/preprocessing.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import contextlib
 import logging
 import warnings
 from abc import ABC, abstractmethod
@@ -98,7 +99,6 @@
     SPSS_FORMATS,
     STATA_FORMATS,
     TSV_FORMATS,
-    use_credentials,
 )
 from ludwig.utils.defaults import default_preprocessing_parameters, default_random_seed
 from ludwig.utils.fs_utils import file_lock, path_exists
@@ -1575,7 +1575,7 @@ def preprocess_for_training(
         test_set = test_set.unwrap() if test_set is not None else None
 
         if data_format in CACHEABLE_FORMATS:
-            with use_credentials(backend.cache.credentials):
+            with backend.storage.cache.use_credentials():
                 cache_results = cache.get()
                 if cache_results is not None:
                     valid, *cache_values = cache_results
@@ -1602,7 +1602,7 @@ def preprocess_for_training(
         data_format_processor = get_from_registry(data_format, data_format_preprocessor_registry)
 
         if cached or data_format == "hdf5":
-            with use_credentials(backend.cache.credentials):
+            with backend.storage.cache.use_credentials():
                 # Always interpret hdf5 files as preprocessed, even if missing from the cache
                 processed = data_format_processor.prepare_processed_data(
                     features,
@@ -1637,14 +1637,14 @@ def preprocess_for_training(
 
             # cache the dataset
             if backend.cache.can_cache(skip_save_processed_input):
-                with use_credentials(backend.cache.credentials):
+                with backend.storage.cache.use_credentials():
                     logger.debug("cache processed data")
                     processed = cache.put(*processed)
                     # set cached=True to ensure credentials are used correctly below
                     cached = True
             training_set, test_set, validation_set, training_set_metadata = processed
 
-        with use_credentials(backend.cache.credentials if cached else None):
+        with backend.storage.cache.use_credentials() if cached else contextlib.nullcontext():
             logger.debug("create training dataset")
             training_dataset = backend.dataset_manager.create(training_set, config, training_set_metadata)
             if not len(training_set):
@@ -1913,7 +1913,7 @@ def preprocess_for_prediction(
 
     training_set = test_set = validation_set = None
     if data_format in CACHEABLE_FORMATS and split != FULL:
-        with use_credentials(backend.cache.credentials):
+        with backend.storage.cache.use_credentials():
             cache_results = cache.get()
             if cache_results is not None:
                 valid, *cache_values = cache_results
@@ -1929,7 +1929,7 @@ def preprocess_for_prediction(
 
     data_format_processor = get_from_registry(data_format, data_format_preprocessor_registry)
     if cached:
-        with use_credentials(backend.cache.credentials):
+        with backend.storage.cache.use_credentials():
             processed = data_format_processor.prepare_processed_data(
                 features,
                 dataset=dataset,
@@ -1967,7 +1967,7 @@ def preprocess_for_prediction(
         "output_features": output_features,
     }
 
-    with use_credentials(backend.cache.credentials if cached else None):
+    with backend.storage.cache.use_credentials() if cached else contextlib.nullcontext():
         dataset = backend.dataset_manager.create(
             dataset,
             config,
diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py
index 90964c1ab2e..a8e50e398f7 100644
--- a/ludwig/hyperopt/execution.py
+++ b/ludwig/hyperopt/execution.py
@@ -779,7 +779,7 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None):
 
         if has_remote_protocol(output_directory):
             if _ray_200:
-                self.sync_client = RemoteSyncer()
+                self.sync_client = RemoteSyncer(creds=backend.storage.artifacts.credentials)
                 self.sync_config = tune.SyncConfig(upload_dir=output_directory, syncer=self.sync_client)
             else:
                 raise ValueError(
diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py
index e25372fd891..b502d1e0e54 100644
--- a/ludwig/hyperopt/run.py
+++ b/ludwig/hyperopt/run.py
@@ -393,16 +393,17 @@ def hyperopt(
         print_hyperopt_results(hyperopt_results)
 
         if not skip_save_hyperopt_statistics:
-            results_directory = os.path.join(output_directory, experiment_name)
-            makedirs(results_directory, exist_ok=True)
+            with backend.storage.artifacts.use_credentials():
+                results_directory = os.path.join(output_directory, experiment_name)
+                makedirs(results_directory, exist_ok=True)
 
-            hyperopt_stats = {
-                "hyperopt_config": hyperopt_config,
-                "hyperopt_results": [t.to_dict() for t in hyperopt_results.ordered_trials],
-            }
+                hyperopt_stats = {
+                    "hyperopt_config": hyperopt_config,
+                    "hyperopt_results": [t.to_dict() for t in hyperopt_results.ordered_trials],
+                }
 
-            save_hyperopt_stats(hyperopt_stats, results_directory)
-            logger.info(f"Hyperopt stats saved to: {results_directory}")
+                save_hyperopt_stats(hyperopt_stats, results_directory)
+                logger.info(f"Hyperopt stats saved to: {results_directory}")
 
     for callback in callbacks or []:
         callback.on_hyperopt_end(experiment_name)
diff --git a/ludwig/hyperopt/syncer.py b/ludwig/hyperopt/syncer.py
index 561b3048587..940b0fc4830 100644
--- a/ludwig/hyperopt/syncer.py
+++ b/ludwig/hyperopt/syncer.py
@@ -12,16 +12,25 @@ def __init__(self, sync_period: float = 300.0, creds: Optional[Dict[str, Any]] =
         self.creds = creds
 
     def _sync_up_command(self, local_path: str, uri: str, exclude: Optional[List] = None) -> Tuple[Callable, Dict]:
-        with use_credentials(self.creds):
-            return upload, dict(lpath=local_path, rpath=uri)
+        def upload_cmd(*args, **kwargs):
+            with use_credentials(self.creds):
+                return upload(*args, **kwargs)
+
+        return upload_cmd, dict(lpath=local_path, rpath=uri)
 
     def _sync_down_command(self, uri: str, local_path: str) -> Tuple[Callable, Dict]:
-        with use_credentials(self.creds):
-            return download, dict(rpath=uri, lpath=local_path)
+        def download_cmd(*args, **kwargs):
+            with use_credentials(self.creds):
+                return download(*args, **kwargs)
+
+        return download_cmd, dict(rpath=uri, lpath=local_path)
 
     def _delete_command(self, uri: str) -> Tuple[Callable, Dict]:
-        with use_credentials(self.creds):
-            return delete, dict(url=uri, recursive=True)
+        def delete_cmd(*args, **kwargs):
+            with use_credentials(self.creds):
+                return delete(*args, **kwargs)
+
+        return delete_cmd, dict(url=uri, recursive=True)
 
     def __reduce__(self):
         """We need this custom serialization because we can't pickle thread.lock objects that are used by the
diff --git a/ludwig/utils/backward_compatibility.py b/ludwig/utils/backward_compatibility.py
index 657b465891e..f127648ca35 100644
--- a/ludwig/utils/backward_compatibility.py
+++ b/ludwig/utils/backward_compatibility.py
@@ -161,6 +161,21 @@ def _traverse_dicts(config: Any, f: Callable[[Dict], None]):
             _traverse_dicts(v, f)
 
 
+@register_config_transformation("0.6", "backend")
+def _update_backend_cache_credentials(backend: Dict[str, Any]) -> Dict[str, Any]:
+    if "cache_credentials" in backend:
+        credentials = backend.get("credentials", {})
+        if "cache" in credentials:
+            warnings.warn("`cache` already found in `backend.credentials`, ignoring `cache_credentials`")
+        else:
+            warnings.warn(
+                "`backend.cache_credentials` has been renamed `backend.credentials.cache`", DeprecationWarning
+            )
+            credentials["cache"] = backend.pop("cache_credentials")
+        backend["credentials"] = credentials
+    return backend
+
+
 @register_config_transformation("0.6", ["output_features"])
 def update_class_weights_in_features(feature: Dict[str, Any]) -> Dict[str, Any]:
     if LOSS in feature:
diff --git a/ludwig/utils/fs_utils.py b/ludwig/utils/fs_utils.py
index 116d2429f8a..1dec5d9ff9f 100644
--- a/ludwig/utils/fs_utils.py
+++ b/ludwig/utils/fs_utils.py
@@ -192,9 +192,6 @@ def copy(src, tgt, recursive=False):
 def makedirs(url, exist_ok=False):
     fs, path = get_fs_and_path(url)
     fs.makedirs(path, exist_ok=exist_ok)
-    if not path_exists(url):
-        with fsspec.open(url, mode="wb"):
-            pass
 
 
 def delete(url, recursive=False):
diff --git a/requirements.txt b/requirements.txt
index 135ccdd8ffb..125b06ac62e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,7 +17,7 @@ absl-py
 kaggle
 requests
 tables
-fsspec[http]<2022.8
+fsspec[http]
 dataclasses-json
 jsonschema>=4.5.0,<4.7
 marshmallow
diff --git a/requirements_test.txt b/requirements_test.txt
index 939106f0574..72fec125842 100644
--- a/requirements_test.txt
+++ b/requirements_test.txt
@@ -40,3 +40,5 @@ scikit-optimize
 
 # search_alg: zoopt
 zoopt
+
+s3fs>=2022.8.2
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000000..a80af0a330e
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,29 @@
+# Test Guide
+
+Assuming your CWD is the Ludwig repo root.
+
+## Basic
+
+```bash
+pytest -vs tests
+```
+
+## Private Tests
+
+These tests connect to services like remote filesystems (Minio / S3), which can be run locally using Docker.
+
+```bash
+# prepare test services
+docker-compose -f tests/docker-compose.yml up
+
+# run all tests
+RUN_PRIVATE=1 pytest -vs tests
+```
+
+## Slow Tests
+
+These tests are very slow, and should typically be run on GPU machines.
+
+```bash
+RUN_SLOW=1 pytest -vs tests
+```
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
new file mode 100644
index 00000000000..611725c554c
--- /dev/null
+++ b/tests/docker-compose.yml
@@ -0,0 +1,16 @@
+version: '3'
+
+services:
+  minio:
+    image: 'minio/minio:latest'
+    volumes:
+      - minio_storage:/data
+    ports:
+      - 9000:9000
+      - 9001:9001
+    environment:
+      - MINIO_ACCESS_KEY=minio
+      - MINIO_SECRET_KEY=minio123
+    command: server --console-address ":9001" /data
+volumes:
+  minio_storage:
diff --git a/tests/integration_tests/test_cache_manager.py b/tests/integration_tests/test_cache_manager.py
index 93722b8adc8..747fe400eed 100644
--- a/tests/integration_tests/test_cache_manager.py
+++ b/tests/integration_tests/test_cache_manager.py
@@ -1,4 +1,3 @@
-import json
 import os
 from pathlib import Path
 
@@ -24,22 +23,7 @@ def change_test_dir(tmpdir, monkeypatch):
 def test_cache_dataset(use_cache_dir, use_split, use_df, tmpdir, change_test_dir):
     dataset_manager = PandasDatasetManager(backend=LocalTestBackend())
     cache_dir = os.path.join(tmpdir, "cache") if use_cache_dir else None
-
-    creds_fname = os.path.join(tmpdir, "credentials.json")
-    creds = {
-        "s3": {
-            "client_kwargs": {
-                "endpoint_url": "http://localhost:9000",
-                "aws_access_key_id": "test",
-                "aws_secret_access_key": "test",
-            }
-        }
-    }
-    with open(creds_fname, "w") as f:
-        json.dump(creds, f)
-
-    manager = CacheManager(dataset_manager, cache_dir=cache_dir, cache_credentials=creds_fname)
-    assert manager.credentials == creds
+    manager = CacheManager(dataset_manager, cache_dir=cache_dir)
 
     config = {
         "input_features": [sequence_feature(encoder={"reduce_output": "sum"})],
diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py
index 0bef8408cd6..1ab7c214511 100644
--- a/tests/integration_tests/test_hyperopt.py
+++ b/tests/integration_tests/test_hyperopt.py
@@ -45,9 +45,16 @@
 from ludwig.hyperopt.run import hyperopt
 from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults
 from ludwig.utils import fs_utils
-from ludwig.utils.data_utils import load_json
+from ludwig.utils.data_utils import load_json, use_credentials
 from ludwig.utils.defaults import merge_with_defaults
-from tests.integration_tests.utils import category_feature, generate_data, private_param, remote_tmpdir, text_feature
+from tests.integration_tests.utils import (
+    category_feature,
+    generate_data,
+    minio_test_creds,
+    private_param,
+    remote_tmpdir,
+    text_feature,
+)
 
 ray = pytest.importorskip("ray")
 
@@ -311,8 +318,7 @@ def test_hyperopt_scheduler(
         assert isinstance(raytune_results, HyperoptResults)
 
 
-@pytest.mark.parametrize("search_space", ["random", "grid"])
-def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
+def _run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, backend, ray_cluster):
     input_features = [
         text_feature(name="utterance", encoder={"reduce_output": "sum"}),
         category_feature(encoder={"vocab_size": 3}),
@@ -327,6 +333,7 @@ def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
         OUTPUT_FEATURES: output_features,
         COMBINER: {TYPE: "concat", "num_fc_layers": 2},
         TRAINER: {"epochs": 2, "learning_rate": 0.001},
+        "backend": backend,
     }
 
     output_feature_name = output_features[0][NAME]
@@ -390,17 +397,33 @@ def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
     assert isinstance(hyperopt_results, HyperoptResults)
 
     # check for existence of the hyperopt statistics file
-    assert fs_utils.path_exists(os.path.join(tmpdir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME))
+    with use_credentials(minio_test_creds()):
+        assert fs_utils.path_exists(os.path.join(tmpdir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME))
+        for trial in hyperopt_results.experiment_analysis.trials:
+            assert fs_utils.path_exists(os.path.join(tmpdir, experiment_name, f"trial_{trial.trial_id}"))
+
+
+@pytest.mark.parametrize("search_space", ["random", "grid"])
+def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster):
+    _run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, "local", ray_cluster)
 
 
 @pytest.mark.parametrize("fs_protocol,bucket", [private_param(("s3", "ludwig-tests"))], ids=["s3"])
 def test_hyperopt_sync_remote(fs_protocol, bucket, csv_filename, ray_cluster):
+    backend = {
+        "type": "local",
+        "credentials": {
+            "artifacts": minio_test_creds(),
+        },
+    }
+
     with remote_tmpdir(fs_protocol, bucket) as tmpdir:
         with pytest.raises(ValueError) if not _ray200 else contextlib.nullcontext():
-            test_hyperopt_run_hyperopt(
+            _run_hyperopt_run_hyperopt(
                 csv_filename,
                 "random",
                 tmpdir,
+                backend,
                 ray_cluster,
             )
 
diff --git a/tests/integration_tests/test_remote.py b/tests/integration_tests/test_remote.py
index debf1f18004..b8ccd34c41e 100644
--- a/tests/integration_tests/test_remote.py
+++ b/tests/integration_tests/test_remote.py
@@ -8,9 +8,11 @@
 from ludwig.constants import TRAINER
 from ludwig.globals import DESCRIPTION_FILE_NAME
 from ludwig.utils import fs_utils
+from ludwig.utils.data_utils import use_credentials
 from tests.integration_tests.utils import (
     category_feature,
     generate_data,
+    minio_test_creds,
     private_param,
     remote_tmpdir,
     sequence_feature,
@@ -25,54 +27,60 @@
     ],
 )
 @pytest.mark.parametrize(
-    "fs_protocol,bucket", [("file", None), private_param(("s3", "ludwig-tests"))], ids=["file", "s3"]
+    "fs_protocol,bucket,creds",
+    [("file", None, None), private_param(("s3", "ludwig-tests", minio_test_creds()))],
+    ids=["file", "s3"],
 )
-def test_remote_training_set(csv_filename, fs_protocol, bucket, backend, ray_cluster_2cpu):
+def test_remote_training_set(csv_filename, fs_protocol, bucket, creds, backend, ray_cluster_2cpu):
     with remote_tmpdir(fs_protocol, bucket) as tmpdir:
-        input_features = [sequence_feature(encoder={"reduce_output": "sum"})]
-        output_features = [category_feature(decoder={"vocab_size": 2}, reduce_input="sum")]
+        with use_credentials(creds):
+            input_features = [sequence_feature(encoder={"reduce_output": "sum"})]
+            output_features = [category_feature(decoder={"vocab_size": 2}, reduce_input="sum")]
 
-        train_csv = os.path.join(tmpdir, "training.csv")
-        val_csv = os.path.join(tmpdir, "validation.csv")
-        test_csv = os.path.join(tmpdir, "test.csv")
+            train_csv = os.path.join(tmpdir, "training.csv")
+            val_csv = os.path.join(tmpdir, "validation.csv")
+            test_csv = os.path.join(tmpdir, "test.csv")
 
-        local_csv = generate_data(input_features, output_features, csv_filename)
-        fs_utils.upload_file(local_csv, train_csv)
-        fs_utils.copy(train_csv, val_csv)
-        fs_utils.copy(train_csv, test_csv)
+            local_csv = generate_data(input_features, output_features, csv_filename)
+            fs_utils.upload_file(local_csv, train_csv)
+            fs_utils.copy(train_csv, val_csv)
+            fs_utils.copy(train_csv, test_csv)
 
-        config = {
-            "input_features": input_features,
-            "output_features": output_features,
-            "combiner": {"type": "concat", "output_size": 14},
-            TRAINER: {"epochs": 2},
-        }
+            config = {
+                "input_features": input_features,
+                "output_features": output_features,
+                "combiner": {"type": "concat", "output_size": 14},
+                TRAINER: {"epochs": 2},
+            }
 
-        config_path = os.path.join(tmpdir, "config.yaml")
-        with fs_utils.open_file(config_path, "w") as f:
-            yaml.dump(config, f)
+            config_path = os.path.join(tmpdir, "config.yaml")
+            with fs_utils.open_file(config_path, "w") as f:
+                yaml.dump(config, f)
 
-        backend_config = {
-            "type": backend,
-        }
-        backend = initialize_backend(backend_config)
+            backend_config = {
+                "type": backend,
+            }
+            backend = initialize_backend(backend_config)
 
-        output_directory = os.path.join(tmpdir, "output")
-        model = LudwigModel(config_path, backend=backend)
-        _, _, output_run_directory = model.train(
-            training_set=train_csv, validation_set=val_csv, test_set=test_csv, output_directory=output_directory
-        )
+            output_directory = os.path.join(tmpdir, "output")
+            model = LudwigModel(config_path, backend=backend)
+            _, _, output_run_directory = model.train(
+                training_set=train_csv, validation_set=val_csv, test_set=test_csv, output_directory=output_directory
+            )
 
-        assert os.path.join(output_directory, "api_experiment_run") == output_run_directory
-        assert fs_utils.path_exists(os.path.join(output_run_directory, DESCRIPTION_FILE_NAME))
-        assert fs_utils.path_exists(os.path.join(output_run_directory, "training_statistics.json"))
-        assert fs_utils.path_exists(os.path.join(output_run_directory, "model"))
-        assert fs_utils.path_exists(os.path.join(output_run_directory, "model", "model_weights"))
+            assert os.path.join(output_directory, "api_experiment_run") == output_run_directory
+            assert fs_utils.path_exists(os.path.join(output_run_directory, DESCRIPTION_FILE_NAME))
+            assert fs_utils.path_exists(os.path.join(output_run_directory, "training_statistics.json"))
+            assert fs_utils.path_exists(os.path.join(output_run_directory, "model"))
+            assert fs_utils.path_exists(os.path.join(output_run_directory, "model", "model_weights"))
 
-        model.predict(dataset=test_csv, output_directory=output_directory)
+            model.predict(dataset=test_csv, output_directory=output_directory)
 
-        # Train again, this time the cache will be used
-        # Resume from the remote output directory
-        model.train(
-            training_set=train_csv, validation_set=val_csv, test_set=test_csv, model_resume_path=output_run_directory
-        )
+            # Train again, this time the cache will be used
+            # Resume from the remote output directory
+            model.train(
+                training_set=train_csv,
+                validation_set=val_csv,
+                test_set=test_csv,
+                model_resume_path=output_run_directory,
+            )
diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py
index 64a9b9b4d48..1cb720cc046 100644
--- a/tests/integration_tests/utils.py
+++ b/tests/integration_tests/utils.py
@@ -42,7 +42,7 @@
 from ludwig.features.feature_utils import compute_feature_hash
 from ludwig.trainers.trainer import Trainer
 from ludwig.utils import fs_utils
-from ludwig.utils.data_utils import read_csv, replace_file_extension
+from ludwig.utils.data_utils import read_csv, replace_file_extension, use_credentials
 
 logger = logging.getLogger(__name__)
 
@@ -886,10 +886,25 @@ def remote_tmpdir(fs_protocol, bucket):
     prefix = f"tmp_{uuid.uuid4().hex}"
     tmpdir = f"{fs_protocol}://{bucket}/{prefix}"
     try:
+        with use_credentials(minio_test_creds()):
+            fs_utils.makedirs(f"{fs_protocol}://{bucket}", exist_ok=True)
         yield tmpdir
     finally:
         try:
-            fs_utils.delete(tmpdir, recursive=True)
+            with use_credentials(minio_test_creds()):
+                fs_utils.delete(tmpdir, recursive=True)
         except FileNotFoundError as e:
             logging.info(f"failed to delete remote tempdir, does not exist: {str(e)}")
             pass
+
+
+def minio_test_creds():
+    return {
+        "s3": {
+            "client_kwargs": {
+                "endpoint_url": os.environ.get("LUDWIG_MINIO_ENDPOINT", "http://localhost:9000"),
+                "aws_access_key_id": os.environ.get("LUDWIG_MINIO_ACCESS_KEY", "minio"),
+                "aws_secret_access_key": os.environ.get("LUDWIG_MINIO_SECRET_KEY", "minio123"),
+            }
+        }
+    }
diff --git a/tests/ludwig/utils/test_backward_compatibility.py b/tests/ludwig/utils/test_backward_compatibility.py
index 6db0df0b9d4..4c64ca655c0 100644
--- a/tests/ludwig/utils/test_backward_compatibility.py
+++ b/tests/ludwig/utils/test_backward_compatibility.py
@@ -23,6 +23,7 @@
 from ludwig.schema import validate_config
 from ludwig.schema.trainer import ECDTrainerConfig
 from ludwig.utils.backward_compatibility import (
+    _update_backend_cache_credentials,
     _upgrade_encoder_decoder_params,
     _upgrade_feature,
     _upgrade_preprocessing_split,
@@ -657,3 +658,13 @@ def test_upgrade_model_progress_already_valid():
 
     unchanged_model_progress = upgrade_model_progress(valid_model_progress)
     assert unchanged_model_progress == valid_model_progress
+
+
+def test_cache_credentials_backward_compatibility():
+    # From v0.6.3.
+    creds = {"s3": {"client_kwargs": {}}}
+    backend = {"type": "local", "cache_dir": "/foo/bar", "cache_credentials": creds}
+
+    _update_backend_cache_credentials(backend)
+
+    assert backend == {"type": "local", "cache_dir": "/foo/bar", "credentials": {"cache": creds}}

From 5f63ce7d527b86c6d19f6aec1b4a454d83a8aa28 Mon Sep 17 00:00:00 2001
From: Justin <justinxzhao@gmail.com>
Date: Thu, 13 Oct 2022 13:58:19 -0400
Subject: [PATCH 12/29] Update version to 0.7.dev (#2625)

---
 ludwig/globals.py | 2 +-
 setup.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ludwig/globals.py b/ludwig/globals.py
index 3049bd04384..3c6dbbdfb7b 100644
--- a/ludwig/globals.py
+++ b/ludwig/globals.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 # ==============================================================================
 
-LUDWIG_VERSION = "0.6.dev"
+LUDWIG_VERSION = "0.7.dev"
 
 MODEL_WEIGHTS_FILE_NAME = "model_weights"
 MODEL_HYPERPARAMETERS_FILE_NAME = "model_hyperparameters.json"
diff --git a/setup.py b/setup.py
index c98c2c44fe0..2854316cc52 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,7 @@
 
 setup(
     name="ludwig",
-    version="0.6.dev",
+    version="0.7.dev",
     description="Declarative machine learning: End-to-end machine learning pipelines using data-driven configurations.",
     long_description=long_description,
     long_description_content_type="text/markdown",

From 9600d59079d7fc55657f1c7abb1fe793cd19df47 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Thu, 13 Oct 2022 11:24:30 -0700
Subject: [PATCH 13/29] Unpin Ray nightly in CI (#2614)

---
 .github/workflows/pytest.yml        |  4 ++--
 ludwig/__init__.py                  |  3 +++
 ludwig/utils/visualization_utils.py | 15 ++++++++-------
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 43c57b8dde7..327c4a58b8e 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -114,10 +114,10 @@ jobs:
           if [ "$MARKERS" == "distributed" ]; then
             if [ "$RAY_VERSION" == "nightly" ]; then
               # NOTE: hardcoded for python 3.9 on Linux
-              # pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
+              pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
 
               # NOTE: Pinned Ray nightly version to September 20, 2022 to get tests to pass
-              pip install https://s3-us-west-2.amazonaws.com/ray-wheels/master/fa182d3c9e478ef4c169ccf7459764768996110f/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
+              # pip install https://s3-us-west-2.amazonaws.com/ray-wheels/master/fa182d3c9e478ef4c169ccf7459764768996110f/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
             else
               pip install ray==$RAY_VERSION
             fi
diff --git a/ludwig/__init__.py b/ludwig/__init__.py
index fd334b88605..227355abdb4 100644
--- a/ludwig/__init__.py
+++ b/ludwig/__init__.py
@@ -18,3 +18,6 @@
 from ludwig.globals import LUDWIG_VERSION as __version__  # noqa
 
 logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
+
+# Disable annoying message about NUMEXPR_MAX_THREADS
+logging.getLogger("numexpr").setLevel(logging.WARNING)
diff --git a/ludwig/utils/visualization_utils.py b/ludwig/utils/visualization_utils.py
index 95865e726ad..0e5050cb668 100644
--- a/ludwig/utils/visualization_utils.py
+++ b/ludwig/utils/visualization_utils.py
@@ -15,7 +15,6 @@
 # ==============================================================================
 import copy
 import logging
-import sys
 from collections import Counter
 from sys import platform
 
@@ -32,7 +31,10 @@
     import matplotlib as mpl
 
     if platform == "darwin":  # OS X
-        mpl.use("TkAgg")
+        try:
+            mpl.use("TkAgg")
+        except ModuleNotFoundError:
+            logging.warning("Unable to set TkAgg backend for matplotlib. Your Python may not be configured for Tk")
     import matplotlib.patches as patches
     import matplotlib.path as path
     import matplotlib.patheffects as PathEffects
@@ -41,13 +43,12 @@
     from matplotlib import ticker
     from matplotlib.lines import Line2D
     from mpl_toolkits.mplot3d import Axes3D
-except ImportError:
-    logger.error(
-        " matplotlib or seaborn are not installed. "
+except ImportError as e:
+    raise RuntimeError(
+        "matplotlib or seaborn are not installed. "
         "In order to install all visualization dependencies run "
         "pip install ludwig[viz]"
-    )
-    sys.exit(-1)
+    ) from e
 
 INT_QUANTILES = 10
 FLOAT_QUANTILES = 10

From dc24b2f5f43821aa60dce438ce137f9ffb02fff9 Mon Sep 17 00:00:00 2001
From: Arnav Garg <106701836+arnavgarg1@users.noreply.github.com>
Date: Thu, 13 Oct 2022 11:25:23 -0700
Subject: [PATCH 14/29] Skip Horovod 0.26 installation, add packaging to
 requirements.txt (#2642)

---
 requirements.txt             | 1 +
 requirements_distributed.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 125b06ac62e..966cd11e1c6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,6 +31,7 @@ psutil
 protobuf==3.20.1 # https://github.com/databrickslabs/dbx/issues/257
 experiment_impact_tracker
 gpustat
+packaging
 
 # new data format support
 xlwt            # excel
diff --git a/requirements_distributed.txt b/requirements_distributed.txt
index a59c2d2d53b..c9acb3e0f5a 100644
--- a/requirements_distributed.txt
+++ b/requirements_distributed.txt
@@ -3,7 +3,7 @@ dask[dataframe]
 pyarrow==6.0.1 # https://github.com/ray-project/ray/issues/22310
 
 # requirements for horovod
-horovod[pytorch]>=0.24.0
+horovod[pytorch]>=0.24.0,!=0.26.0
 # requirements for ray
 ray[default,data,serve,tune]>=1.13.0
 pickle5; python_version <= '3.7'

From 03c39d4449b8fae95b3731c35a1f286ce9b2f446 Mon Sep 17 00:00:00 2001
From: Arnav Garg <106701836+arnavgarg1@users.noreply.github.com>
Date: Thu, 13 Oct 2022 14:22:00 -0700
Subject: [PATCH 15/29] [Annotations] Callbacks (#2641)

* Added API annotations for Ludwig callbacks

* fix imports

* update comment to TODO
---
 ludwig/api_annotations.py                 | 2 +-
 ludwig/backend/ray.py                     | 2 ++
 ludwig/benchmarking/profiler_callbacks.py | 3 +++
 ludwig/callbacks.py                       | 3 +++
 ludwig/contribs/aim.py                    | 2 ++
 ludwig/contribs/comet.py                  | 2 ++
 ludwig/contribs/mlflow/__init__.py        | 2 ++
 ludwig/contribs/wandb.py                  | 2 ++
 ludwig/contribs/whylogs/__init__.py       | 2 ++
 ludwig/hyperopt/execution.py              | 2 ++
 10 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/ludwig/api_annotations.py b/ludwig/api_annotations.py
index 63464274a3b..65e82ff1b87 100644
--- a/ludwig/api_annotations.py
+++ b/ludwig/api_annotations.py
@@ -5,7 +5,7 @@ def PublicAPI(*args, **kwargs):
     """Annotation for documenting public APIs. Public APIs are classes and methods exposed to end users of Ludwig.
 
     If stability="stable", the APIs will remain backwards compatible across minor Ludwig releases
-    (e.g., Ludwig 0.6 -> Ludwig 0.5).
+    (e.g., Ludwig 0.6 -> Ludwig 0.7).
 
     If stability="experimental", the APIs can be used by advanced users who are tolerant to and expect
     breaking changes. This will likely be seen in the case of incremental new feature development.
diff --git a/ludwig/backend/ray.py b/ludwig/backend/ray.py
index f21a3423eb3..d1f741d79f1 100644
--- a/ludwig/backend/ray.py
+++ b/ludwig/backend/ray.py
@@ -41,6 +41,7 @@
 if TYPE_CHECKING:
     from ludwig.api import LudwigModel
 
+from ludwig.api_annotations import DeveloperAPI
 from ludwig.backend.base import Backend, RemoteTrainingMixin
 from ludwig.backend.datasource import BinaryIgnoreNoneTypeDatasource
 from ludwig.constants import (
@@ -302,6 +303,7 @@ def tune_learning_rate_fn(
         hvd.shutdown()
 
 
+@DeveloperAPI
 class TqdmCallback(rt.TrainingCallback):
     """Class for a custom ray callback that updates tqdm progress bars in the driver process."""
 
diff --git a/ludwig/benchmarking/profiler_callbacks.py b/ludwig/benchmarking/profiler_callbacks.py
index a5d5a3dd131..2daa83cfb51 100644
--- a/ludwig/benchmarking/profiler_callbacks.py
+++ b/ludwig/benchmarking/profiler_callbacks.py
@@ -1,10 +1,13 @@
 from typing import Any, Dict
 
+from ludwig.api_annotations import DeveloperAPI
 from ludwig.benchmarking.profiler import LudwigProfiler
 from ludwig.callbacks import Callback
 from ludwig.constants import EVALUATION, PREPROCESSING, TRAINING
 
 
+# TODO: Change annotation to PublicAPI once Ludwig 0.7 is released
+@DeveloperAPI
 class LudwigProfilerCallback(Callback):
     """Class that defines the methods necessary to hook into process."""
 
diff --git a/ludwig/callbacks.py b/ludwig/callbacks.py
index af958c49c2c..2594039a2b6 100644
--- a/ludwig/callbacks.py
+++ b/ludwig/callbacks.py
@@ -17,7 +17,10 @@
 from abc import ABC
 from typing import Any, Callable, Dict, List, Union
 
+from ludwig.api_annotations import PublicAPI
 
+
+@PublicAPI
 class Callback(ABC):
     def on_cmdline(self, cmd: str, *args: List[str]):
         """Called when Ludwig is run on the command line with the callback enabled.
diff --git a/ludwig/contribs/aim.py b/ludwig/contribs/aim.py
index 5eb02dfab24..bd44adcabd4 100644
--- a/ludwig/contribs/aim.py
+++ b/ludwig/contribs/aim.py
@@ -1,6 +1,7 @@
 import json
 import logging
 
+from ludwig.api_annotations import PublicAPI
 from ludwig.callbacks import Callback
 from ludwig.utils.data_utils import NumpyEncoder
 from ludwig.utils.package_utils import LazyLoader
@@ -10,6 +11,7 @@
 logger = logging.getLogger(__name__)
 
 
+@PublicAPI
 class AimCallback(Callback):
     """Class that defines the methods necessary to hook into process."""
 
diff --git a/ludwig/contribs/comet.py b/ludwig/contribs/comet.py
index 0328c8272a5..32ad590af68 100644
--- a/ludwig/contribs/comet.py
+++ b/ludwig/contribs/comet.py
@@ -16,6 +16,7 @@
 import os
 from datetime import datetime
 
+from ludwig.api_annotations import PublicAPI
 from ludwig.callbacks import Callback
 from ludwig.utils.package_utils import LazyLoader
 
@@ -24,6 +25,7 @@
 logger = logging.getLogger(__name__)
 
 
+@PublicAPI
 class CometCallback(Callback):
     """Class that defines the methods necessary to hook into process."""
 
diff --git a/ludwig/contribs/mlflow/__init__.py b/ludwig/contribs/mlflow/__init__.py
index 5edc4bea795..c3efffc7c7e 100644
--- a/ludwig/contribs/mlflow/__init__.py
+++ b/ludwig/contribs/mlflow/__init__.py
@@ -4,6 +4,7 @@
 import threading
 from typing import Any, Dict
 
+from ludwig.api_annotations import DeveloperAPI
 from ludwig.callbacks import Callback
 from ludwig.constants import TRAINER
 from ludwig.data.dataset.base import Dataset
@@ -27,6 +28,7 @@ def _get_or_create_experiment_id(experiment_name, artifact_uri: str = None):
     return mlflow.create_experiment(name=experiment_name, artifact_location=artifact_uri)
 
 
+@DeveloperAPI
 class MlflowCallback(Callback):
     def __init__(self, tracking_uri=None, log_artifacts: bool = True):
         self.experiment_id = None
diff --git a/ludwig/contribs/wandb.py b/ludwig/contribs/wandb.py
index a6065c54f58..3f3bb0940ce 100644
--- a/ludwig/contribs/wandb.py
+++ b/ludwig/contribs/wandb.py
@@ -15,6 +15,7 @@
 import logging
 import os
 
+from ludwig.api_annotations import PublicAPI
 from ludwig.callbacks import Callback
 from ludwig.utils.package_utils import LazyLoader
 
@@ -23,6 +24,7 @@
 logger = logging.getLogger(__name__)
 
 
+@PublicAPI
 class WandbCallback(Callback):
     """Class that defines the methods necessary to hook into process."""
 
diff --git a/ludwig/contribs/whylogs/__init__.py b/ludwig/contribs/whylogs/__init__.py
index 26bf10aae77..57bc46aba3d 100644
--- a/ludwig/contribs/whylogs/__init__.py
+++ b/ludwig/contribs/whylogs/__init__.py
@@ -1,9 +1,11 @@
+from ludwig.api_annotations import PublicAPI
 from ludwig.callbacks import Callback
 from ludwig.utils.package_utils import LazyLoader
 
 whylogs = LazyLoader("whylogs", globals(), "whylogs")
 
 
+@PublicAPI
 class WhyLogsCallback(Callback):
     def __init__(self, path_to_config=None):
         self.path_to_config = path_to_config
diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py
index a8e50e398f7..98d2db48eab 100644
--- a/ludwig/hyperopt/execution.py
+++ b/ludwig/hyperopt/execution.py
@@ -26,6 +26,7 @@
 from ray.util.queue import Queue as RayQueue
 
 from ludwig.api import LudwigModel
+from ludwig.api_annotations import PublicAPI
 from ludwig.backend import initialize_backend, RAY
 from ludwig.backend.ray import initialize_ray
 from ludwig.callbacks import Callback
@@ -889,6 +890,7 @@ def _register(name, trainable):
         return HyperoptResults(ordered_trials=ordered_trials, experiment_analysis=analysis)
 
 
+@PublicAPI
 class CallbackStopper(Stopper):
     """Ray Tune Stopper that triggers the entire job to stop if one callback returns True."""
 

From de4085b4acbd47517b5fbb8c0bb539d072204de6 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Thu, 13 Oct 2022 17:11:28 -0700
Subject: [PATCH 16/29] Fix automl (#2639)

* AutoML fix and test coverage

* Refactored tests

* added test to increase coverage in existing automl file, deleted new unnecessary automl file

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Address feedback, comment assert statments

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/automl/automl.py                | 22 +++++++++++++-----
 tests/integration_tests/test_automl.py | 31 ++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/ludwig/automl/automl.py b/ludwig/automl/automl.py
index 57ea670c09a..5f63893139f 100644
--- a/ludwig/automl/automl.py
+++ b/ludwig/automl/automl.py
@@ -27,10 +27,12 @@
     AUTOML_DEFAULT_IMAGE_ENCODER,
     AUTOML_DEFAULT_TABULAR_MODEL,
     AUTOML_DEFAULT_TEXT_ENCODER,
+    ENCODER,
     HYPEROPT,
     IMAGE,
     TABULAR,
     TEXT,
+    TYPE,
 )
 from ludwig.contrib import add_contrib_callback_args
 from ludwig.globals import LUDWIG_VERSION
@@ -278,17 +280,25 @@ def _model_select(
         # text heuristics
         for input_feature in base_config["input_features"]:
             # default text encoder is bert
-            if input_feature["type"] == TEXT:
+            if input_feature[TYPE] == TEXT:
                 model_category = TEXT
-                input_feature["encoder"] = AUTOML_DEFAULT_TEXT_ENCODER
-                base_config = merge_dict(base_config, default_configs[TEXT][AUTOML_DEFAULT_TEXT_ENCODER])
+                if ENCODER in input_feature:
+                    input_feature[ENCODER][TYPE] = AUTOML_DEFAULT_TEXT_ENCODER
+                else:
+                    input_feature[ENCODER] = {TYPE: AUTOML_DEFAULT_TEXT_ENCODER}
                 base_config[HYPEROPT]["executor"]["num_samples"] = 5  # set for small hyperparameter search space
 
             # TODO (ASN): add image heuristics
-            if input_feature["type"] == IMAGE:
+            if input_feature[TYPE] == IMAGE:
                 model_category = IMAGE
-                input_feature["encoder"] = AUTOML_DEFAULT_IMAGE_ENCODER
-                base_config = merge_dict(base_config, default_configs["combiner"]["concat"])
+                if ENCODER in input_feature:
+                    input_feature[ENCODER][TYPE] = AUTOML_DEFAULT_IMAGE_ENCODER
+                else:
+                    input_feature[ENCODER] = {TYPE: AUTOML_DEFAULT_IMAGE_ENCODER}
+
+        # Needs to be outside for loop because merge dict creates deep copy - this prevents image section from setting
+        base_config = merge_dict(base_config, default_configs[TEXT][AUTOML_DEFAULT_TEXT_ENCODER])
+        base_config = merge_dict(base_config, default_configs["combiner"]["concat"])
 
     # override and constrain automl config based on user specified values
     if user_config is not None:
diff --git a/tests/integration_tests/test_automl.py b/tests/integration_tests/test_automl.py
index f349f25aed4..fa5e280a981 100644
--- a/tests/integration_tests/test_automl.py
+++ b/tests/integration_tests/test_automl.py
@@ -8,8 +8,8 @@
 import pytest
 
 from ludwig.api import LudwigModel
-from ludwig.constants import COLUMN, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, SPLIT, TRAINER, TYPE
-from tests.integration_tests.utils import category_feature, generate_data, number_feature
+from ludwig.constants import COLUMN, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, SPLIT, TRAINER, TYPE
+from tests.integration_tests.utils import category_feature, generate_data, image_feature, number_feature, text_feature
 
 try:
     import dask.dataframe as dd
@@ -88,6 +88,33 @@ def test_autoconfig_preprocessing_imbalanced():
     assert config[PREPROCESSING][SPLIT] == {TYPE: "stratify", COLUMN: "category"}
 
 
+@pytest.mark.distributed
+def test_autoconfig_preprocessing_text_image(tmpdir):
+    image_dest_folder = os.path.join(tmpdir, "generated_images")
+
+    input_features = [text_feature(preprocessing={"tokenizer": "space"}), image_feature(folder=image_dest_folder)]
+    output_features = [category_feature(output_feature=True)]
+
+    # Generate Dataset
+    rel_path = generate_data(input_features, output_features, os.path.join(tmpdir, "dataset.csv"))
+    df = pd.read_csv(rel_path)
+    target = df.columns[-1]
+
+    config = create_auto_config(dataset=df, target=target, time_limit_s=1, tune_for_memory=False)
+
+    # Check no features shuffled around
+    assert len(input_features) == 2
+    assert len(output_features) == 1
+
+    # Check encoders are properly nested
+    assert isinstance(config[INPUT_FEATURES][0][ENCODER], dict)
+    assert isinstance(config[INPUT_FEATURES][1][ENCODER], dict)
+
+    # Check automl default encoders are properly set
+    assert config[INPUT_FEATURES][0][ENCODER][TYPE] == "bert"
+    assert config[INPUT_FEATURES][1][ENCODER][TYPE] == "stacked_cnn"
+
+
 @pytest.mark.distributed
 @pytest.mark.parametrize("time_budget", [200, 1], ids=["high", "low"])
 def test_train_with_config(time_budget, test_data, ray_cluster_2cpu, tmpdir):

From 9fff216c771fc49be4298b2a818d9b9d997623dd Mon Sep 17 00:00:00 2001
From: abidwael <103003638+abidwael@users.noreply.github.com>
Date: Fri, 14 Oct 2022 10:08:21 -0700
Subject: [PATCH 17/29] accepting dictionary as input to
 `benchmarking.benchmark` (#2626)

* accepting dictionary as input to `benchmark` and returning `BenchmarkingArtifacts`

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* adopting google style docstrings

* replace `BenchmarkingArtifact` class with dataclass

* address comments

* changing where validation happens

* add unit test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* split test into two functions

* formatting

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/benchmarking/README.md                 |  15 ++-
 ludwig/benchmarking/artifacts.py              |  60 +++++++++
 ludwig/benchmarking/benchmark.py              |  48 ++++---
 ludwig/benchmarking/utils.py                  | 123 ++++++++++++++----
 .../invalid/benchmarking_config_1.yaml        |  19 +++
 .../invalid/benchmarking_config_2.yaml        |  17 +++
 .../invalid/benchmarking_config_3.yaml        |  18 +++
 .../example_files/process_config.py           |  29 +++++
 .../valid/benchmarking_config_1.yaml          |  24 ++++
 .../valid/benchmarking_config_2.yaml          |  13 ++
 .../valid/benchmarking_config_3.yaml          |  19 +++
 .../ludwig/benchmarking/test_benchmarking.py  |  31 +++++
 12 files changed, 369 insertions(+), 47 deletions(-)
 create mode 100644 ludwig/benchmarking/artifacts.py
 create mode 100644 tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_1.yaml
 create mode 100644 tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_2.yaml
 create mode 100644 tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_3.yaml
 create mode 100644 tests/ludwig/benchmarking/example_files/process_config.py
 create mode 100644 tests/ludwig/benchmarking/example_files/valid/benchmarking_config_1.yaml
 create mode 100644 tests/ludwig/benchmarking/example_files/valid/benchmarking_config_2.yaml
 create mode 100644 tests/ludwig/benchmarking/example_files/valid/benchmarking_config_3.yaml
 create mode 100644 tests/ludwig/benchmarking/test_benchmarking.py

diff --git a/ludwig/benchmarking/README.md b/ludwig/benchmarking/README.md
index a2e229fdb5d..8768302f25e 100644
--- a/ludwig/benchmarking/README.md
+++ b/ludwig/benchmarking/README.md
@@ -42,10 +42,6 @@ You can find an example of a benchmarking config in the `examples/` directory.
 
 ## Basic Usage
 
-basic: manually specify datasets, configs and run experiments. talk about the
-hyperopt on vs off distinction, how the config parameters can be specified
-for each experiment. explain the concept of an experiment
-
 Say you implemented a new feature and would like to test it on several datasets.
 In this case, this is what the benchmarking config could look like
 
@@ -210,6 +206,17 @@ full_bench_with_profiler_with_torch
 The only difference is the `system_resource_usage` and `torch_ops_resource_usage`.
 The difference between these two outputs can be found in the `LudwigProfiler` README.
 
+## Parameters and defaults
+
+Each of these parameters can also be specified in the experiments section to override the global value.
+If not specified, the value of the global parameter will be propagated to the experiments.
+
+- `experiment_name` (required): name of the benchmarking run.
+- `export` (required): dictionary specifying whether to export the experiment artifacts and the export path.
+- `hyperopt` (optional): whether this is a hyperopt run or `LudwigModel.experiment`.
+- `process_config_file_path` (optional): path to python script that will modify configs.
+- `profiler` (optional): dictionary specifying whether to use the profiler and its parameters.
+
 ## Comparing experiments
 
 You can summarize the exported artifacts of two experiments on multiple datasets.
diff --git a/ludwig/benchmarking/artifacts.py b/ludwig/benchmarking/artifacts.py
new file mode 100644
index 00000000000..994424dc579
--- /dev/null
+++ b/ludwig/benchmarking/artifacts.py
@@ -0,0 +1,60 @@
+import os
+from dataclasses import dataclass
+from typing import Any, Dict
+
+from ludwig.utils.data_utils import load_json, load_yaml
+
+
+@dataclass
+class BenchmarkingResult:
+    # The Ludwig benchmarking config.
+    benchmarking_config: Dict[str, Any]
+
+    # The config for one experiment.
+    experiment_config: Dict[str, Any]
+
+    # The Ludwig config used to run the experiment.
+    ludwig_config: Dict[str, Any]
+
+    # The python script that is used to process the config before being used.
+    process_config_file: str
+
+    # Loaded `description.json` file.
+    description: Dict[str, Any]
+
+    # Loaded `test_statistics.json` file.
+    test_statistics: Dict[str, Any]
+
+    # Loaded `training_statistics.json` file.
+    training_statistics: Dict[str, Any]
+
+    # Loaded `model_hyperparameters.json` file.
+    model_hyperparameters: Dict[str, Any]
+
+    # Loaded `training_progress.json` file.
+    training_progress: Dict[str, Any]
+
+    # Loaded `training_set_metadata.json` file.
+    training_set_metadata: Dict[str, Any]
+
+
+def build_benchmarking_result(benchmarking_config: dict, experiment_idx: int):
+    experiment_config = benchmarking_config["experiments"][experiment_idx]
+    process_config_file = ""
+    if experiment_config["process_config_file_path"]:
+        with open(experiment_config["process_config_file_path"]) as f:
+            process_config_file = "".join(f.readlines())
+    experiment_run_path = os.path.join(experiment_config["experiment_name"], "experiment_run")
+
+    return BenchmarkingResult(
+        benchmarking_config=benchmarking_config,
+        experiment_config=experiment_config,
+        ludwig_config=load_yaml(experiment_config["config_path"]),
+        process_config_file=process_config_file,
+        description=load_json(os.path.join(experiment_run_path, "description.json")),
+        test_statistics=load_json(os.path.join(experiment_run_path, "test_statistics.json")),
+        training_statistics=load_json(os.path.join(experiment_run_path, "training_statistics.json")),
+        model_hyperparameters=load_json(os.path.join(experiment_run_path, "model", "model_hyperparameters.json")),
+        training_progress=load_json(os.path.join(experiment_run_path, "model", "training_progress.json")),
+        training_set_metadata=load_json(os.path.join(experiment_run_path, "model", "training_set_metadata.json")),
+    )
diff --git a/ludwig/benchmarking/benchmark.py b/ludwig/benchmarking/benchmark.py
index 8b82c436039..28e9cf865f5 100644
--- a/ludwig/benchmarking/benchmark.py
+++ b/ludwig/benchmarking/benchmark.py
@@ -7,6 +7,7 @@
 
 import ludwig.datasets
 from ludwig.api import LudwigModel
+from ludwig.benchmarking.artifacts import BenchmarkingResult, build_benchmarking_result
 from ludwig.benchmarking.profiler_callbacks import LudwigProfilerCallback
 from ludwig.benchmarking.utils import (
     create_default_config,
@@ -14,7 +15,10 @@
     delete_model_checkpoints,
     export_artifacts,
     load_from_module,
+    populate_benchmarking_config_with_defaults,
+    propagate_global_parameters,
     save_yaml,
+    validate_benchmarking_config,
 )
 from ludwig.contrib import add_contrib_callback_args
 from ludwig.hyperopt.run import hyperopt
@@ -26,15 +30,16 @@
 def setup_experiment(experiment: Dict[str, str]) -> Dict[Any, Any]:
     """Set up the backend and load the Ludwig config.
 
-    experiment: dictionary containing the dataset name, config path, and experiment name.
-    Returns a Ludwig config.
+    Args:
+        experiment: dictionary containing the dataset name, config path, and experiment name.
+        Returns a Ludwig config.
     """
     shutil.rmtree(os.path.join(experiment["experiment_name"]), ignore_errors=True)
     if "config_path" not in experiment:
         experiment["config_path"] = create_default_config(experiment)
     model_config = load_yaml(experiment["config_path"])
 
-    if "process_config_file_path" in experiment:
+    if experiment["process_config_file_path"]:
         process_config_spec = importlib.util.spec_from_file_location(
             "process_config_file_path.py", experiment["process_config_file_path"]
         )
@@ -52,8 +57,8 @@ def setup_experiment(experiment: Dict[str, str]) -> Dict[Any, Any]:
 def benchmark_one(experiment: Dict[str, Union[str, Dict[str, str]]]) -> None:
     """Run a Ludwig exepriment and track metrics given a dataset name.
 
-    experiment: dictionary containing the dataset name, config path, and experiment name.
-    export_artifacts_dict: dictionary containing an export boolean flag and a path to export to.
+    Args:
+        experiment: dictionary containing the dataset name, config path, and experiment name.
     """
     logger.info(f"\nRunning experiment *{experiment['experiment_name']}* on dataset *{experiment['dataset_name']}*")
 
@@ -94,7 +99,7 @@ def benchmark_one(experiment: Dict[str, Union[str, Dict[str, str]]]) -> None:
         model = LudwigModel(
             config=model_config, callbacks=ludwig_profiler_callbacks, logging_level=logging.ERROR, backend=backend
         )
-        _, _, _, output_directory = model.experiment(
+        model.experiment(
             dataset=dataset,
             output_directory=experiment["experiment_name"],
             skip_save_processed_input=True,
@@ -102,36 +107,37 @@ def benchmark_one(experiment: Dict[str, Union[str, Dict[str, str]]]) -> None:
             skip_save_predictions=True,
             skip_collect_predictions=True,
         )
-        delete_model_checkpoints(output_directory)
+        delete_model_checkpoints(experiment["experiment_name"])
 
 
-def benchmark(bench_config_path: str) -> None:
+def benchmark(benchmarking_config: Union[Dict[str, Any], str]) -> Dict[str, BenchmarkingResult]:
     """Launch benchmarking suite from a benchmarking config.
 
-    bench_config_path: config for the benchmarking tool. Specifies datasets and their
-        corresponding Ludwig configs, as well as export options.
+    Args:
+        benchmarking_config: config or config path for the benchmarking tool. Specifies datasets and their
+            corresponding Ludwig configs, as well as export options.
     """
-    benchmarking_config = load_yaml(bench_config_path)
-    for experiment in benchmarking_config["experiments"]:
+    if isinstance(benchmarking_config, str):
+        benchmarking_config = load_yaml(benchmarking_config)
+    validate_benchmarking_config(benchmarking_config)
+    benchmarking_config = populate_benchmarking_config_with_defaults(benchmarking_config)
+    benchmarking_config = propagate_global_parameters(benchmarking_config)
+
+    experiment_artifacts = {}
+    for experiment_idx, experiment in enumerate(benchmarking_config["experiments"]):
         try:
-            if "experiment_name" not in experiment:
-                experiment["experiment_name"] = benchmarking_config["experiment_name"]
-            if "hyperopt" not in experiment:
-                experiment["hyperopt"] = benchmarking_config["hyperopt"]
-            if "process_config_file_path" in benchmarking_config:
-                experiment["process_config_file_path"] = benchmarking_config["process_config_file_path"]
-            if "profiler" in benchmarking_config:
-                experiment["profiler"] = benchmarking_config["profiler"]
             benchmark_one(experiment)
+            dataset_name = experiment["dataset_name"]
+            experiment_artifacts[dataset_name] = build_benchmarking_result(benchmarking_config, experiment_idx)
         except Exception:
             logger.exception(
                 f"Experiment *{experiment['experiment_name']}* on dataset *{experiment['dataset_name']}* failed"
             )
-
         finally:
             if benchmarking_config["export"]["export_artifacts"]:
                 export_base_path = benchmarking_config["export"]["export_base_path"]
                 export_artifacts(experiment, experiment["experiment_name"], export_base_path)
+    return experiment_artifacts
 
 
 def cli(sys_argv):
diff --git a/ludwig/benchmarking/utils.py b/ludwig/benchmarking/utils.py
index 7d1640aeffb..5605e6f5563 100644
--- a/ludwig/benchmarking/utils.py
+++ b/ludwig/benchmarking/utils.py
@@ -38,8 +38,9 @@ def load_from_module(
     """Load the ludwig dataset, optionally subsamples it, and returns a repeatable split. A stratified split is
     used for classification datasets.
 
-    dataset_module: ludwig datasets module (e.g. ludwig.datasets.sst2, ludwig.datasets.ames_housing, etc.)
-    subsample_frac: percentage of the total dataset to load.
+    Args:
+        dataset_module: ludwig datasets module (e.g. ludwig.datasets.sst2, ludwig.datasets.ames_housing, etc.)
+        subsample_frac: percentage of the total dataset to load.
     """
     dataset = dataset_module.load(split=False)
     if subsample_frac < 1:
@@ -58,12 +59,13 @@ def load_from_module(
 def export_artifacts(experiment: Dict[str, str], experiment_output_directory: str, export_base_path: str):
     """Save the experiment artifacts to the `bench_export_directory`.
 
-    :param experiment: experiment dict that contains "dataset_name" (e.g. ames_housing),
-        "experiment_name" (specified by user), and "config_path" (path to experiment config.
-        Relative to ludwig/benchmarks/configs).
-    :param experiment_output_directory: path where the model, data, and logs of the experiment are saved.
-    :param export_base_path: remote or local path (directory) where artifacts are
-        exported. (e.g. s3://benchmarking.us-west-2.ludwig.com/bench/ or your/local/bench/)
+    Args:
+        experiment: experiment dict that contains "dataset_name" (e.g. ames_housing),
+            "experiment_name" (specified by user), and "config_path" (path to experiment config.
+            Relative to ludwig/benchmarks/configs).
+        experiment_output_directory: path where the model, data, and logs of the experiment are saved.
+        export_base_path: remote or local path (directory) where artifacts are
+            exported. (e.g. s3://benchmarking.us-west-2.ludwig.com/bench/ or your/local/bench/)
     """
     protocol, _ = fsspec.core.split_protocol(export_base_path)
     fs, _ = get_fs_and_path(export_base_path)
@@ -95,12 +97,13 @@ def download_artifacts(
 ) -> Tuple[str, List[str]]:
     """Download benchmarking artifacts for two experiments.
 
-    bench_config: bench config file. Can be the same one that was used to run
-        these experiments.
-    base_experiment: name of the experiment we're comparing against.
-    experimental_experiment: name of the experiment we're comparing.
-    download_base_path: base path under which live the stored artifacts of
-        the benchmarking experiments.
+    Args:
+        bench_config_path: bench config file path. Can be the same one that was used to run
+            these experiments.
+        base_experiment: name of the experiment we're comparing against.
+        experimental_experiment: name of the experiment we're comparing.
+        download_base_path: base path under which live the stored artifacts of
+            the benchmarking experiments.
     """
     bench_config = load_yaml(bench_config_path)
     protocol, _ = fsspec.core.split_protocol(download_base_path)
@@ -129,12 +132,13 @@ async def download_one(
 ) -> Tuple[str, str]:
     """Download `config.yaml` and `report.json` for an experiment.
 
-    fs: filesystem to use to download.
-    download_base_path: base path under which live the stored artifacts of
-        the benchmarking experiments.
-    dataset_name: name of the dataset we ran the experiments on.
-    experiment_name: name of the experiment (e.g. `v0.5.3_with_bert`)
-    local_dir: local directory under which the artifacts will be downloaded.
+    Args:
+        fs: filesystem to use to download.
+        download_base_path: base path under which live the stored artifacts of
+            the benchmarking experiments.
+        dataset_name: name of the dataset we ran the experiments on.
+        experiment_name: name of the experiment (e.g. `v0.5.3_with_bert`)
+        local_dir: local directory under which the artifacts will be downloaded.
     """
     loop = asyncio.get_running_loop()
     local_experiment_dir = os.path.join(local_dir, dataset_name, experiment_name)
@@ -155,12 +159,77 @@ async def download_one(
     return dataset_name, local_dir
 
 
+def validate_benchmarking_config(benchmarking_config: Dict[str, Any]) -> None:
+    """Validates the parameters of the benchmarking config.
+
+    Args:
+        benchmarking_config: benchmarking config dictionary.
+
+    Raises:
+        ValueError if any of the expected parameters is not there.
+    """
+    if "experiment_name" not in benchmarking_config and not all(
+        "experiment_name" in experiment for experiment in benchmarking_config["experiments"]
+    ):
+        raise ValueError("You must either specify a global experiment name or an experiment name for each experiment.")
+    if "export" not in benchmarking_config:
+        raise ValueError(
+            """You must specify export parameters. Example:
+            export:
+              export_artifacts: true
+              export_base_path: s3://benchmarking.us-west-2.ludwig.com/bench/    # include the slash at the end.
+        """
+        )
+    if "experiments" not in benchmarking_config:
+        raise ValueError("You must specify a list of experiments.")
+    for experiment in benchmarking_config["experiments"]:
+        if "dataset_name" not in experiment:
+            raise ValueError("A Ludwig dataset must be specified.")
+
+
+def populate_benchmarking_config_with_defaults(benchmarking_config: Dict[str, Any]) -> Dict[str, Any]:
+    """Populates the parameters of the benchmarking config with defaults.
+
+    Args:
+        benchmarking_config: benchmarking config dictionary.
+    """
+    if "hyperopt" not in benchmarking_config:
+        benchmarking_config["hyperopt"] = False
+    if "process_config_file_path" not in benchmarking_config:
+        benchmarking_config["process_config_file_path"] = None
+    if "profiler" not in benchmarking_config:
+        benchmarking_config["profiler"] = {"enable": False, "use_torch_profiler": False, "logging_interval": None}
+    return benchmarking_config
+
+
+def propagate_global_parameters(benchmarking_config: Dict[str, Any]) -> Dict[str, Any]:
+    """Propagate the global parameters of the benchmarking config to local experiments.
+
+    Args:
+        benchmarking_config: benchmarking config dictionary.
+    """
+    for experiment in benchmarking_config["experiments"]:
+        if "experiment_name" not in experiment:
+            experiment["experiment_name"] = benchmarking_config["experiment_name"]
+        if "export" not in experiment:
+            experiment["export"] = benchmarking_config["export"]
+        if "hyperopt" not in experiment:
+            experiment["hyperopt"] = benchmarking_config["hyperopt"]
+        if "process_config_file_path" not in experiment:
+            experiment["process_config_file_path"] = benchmarking_config["process_config_file_path"]
+        if "profiler" not in experiment:
+            experiment["profiler"] = benchmarking_config["profiler"]
+    return benchmarking_config
+
+
 def create_default_config(experiment: Dict[str, Any]) -> str:
     """Create a Ludwig config that only contains input and output features.
 
-    :param dataset_name: name of the dataset to load the config for.
+    Args:
+        experiment: experiment dictionary.
 
-    return: path where the default config is saved.
+    Returns:
+        path where the default config is saved.
     """
     model_config = model_configs_for_dataset(experiment["dataset_name"])["default"]
 
@@ -175,12 +244,22 @@ def create_default_config(experiment: Dict[str, Any]) -> str:
 
 
 def delete_model_checkpoints(output_directory: str):
+    """Deletes outputs of the experiment run that we don't want to save with the artifacts.
+
+    Args:
+        output_directory: output directory of the hyperopt run.
+    """
     shutil.rmtree(os.path.join(output_directory, "model", "training_checkpoints"), ignore_errors=True)
     if os.path.isfile(os.path.join(output_directory, "model", "model_weights")):
         os.remove(os.path.join(output_directory, "model", "model_weights"))
 
 
 def delete_hyperopt_outputs(output_directory: str):
+    """Deletes outputs of the hyperopt run that we don't want to save with the artifacts.
+
+    Args:
+        output_directory: output directory of the hyperopt run.
+    """
     for path, currentDirectory, files in os.walk(output_directory):
         for file in files:
             filename = os.path.join(path, file)
diff --git a/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_1.yaml b/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_1.yaml
new file mode 100644
index 00000000000..c91dbda5dc6
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_1.yaml
@@ -0,0 +1,19 @@
+# This benchmarking config is missing because the global experiment name is missing.
+process_config_file_path: tests/ludwig/benchmarking/example_files/process_config_example.py
+hyperopt: false
+export:
+  export_artifacts: true
+  export_base_path: s3://benchmarking.us-west-2.ludwig.com/bench/    # include the slash at the end.
+profiler:
+  enable: false
+  use_torch_profiler: false
+  logging_interval: 0.1
+experiments:
+  - dataset_name: ames_housing
+    experiment_name: large_learning_rate
+    config_path: tests/regression_tests/benchmark/configs/ames_housing.yaml
+  - dataset_name: protein
+    config_path: tests/regression_tests/benchmark/configs/protein.yaml
+  - dataset_name: mercedes_benz_greener
+    experiment_name: zscore_normalization
+    config_path: tests/regression_tests/benchmark/configs/mercedes_benz_greener.yaml
diff --git a/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_2.yaml b/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_2.yaml
new file mode 100644
index 00000000000..c1bcd8229be
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_2.yaml
@@ -0,0 +1,17 @@
+# This benchmarking config is invalid beacuse it's missing the export section.
+experiment_name: github_action
+process_config_file_path: tests/ludwig/benchmarking/example_files/process_config_example.py
+hyperopt: false
+profiler:
+  enable: false
+  use_torch_profiler: false
+  logging_interval: 0.1
+experiments:
+  - dataset_name: ames_housing
+    experiment_name: large_learning_rate
+    config_path: tests/regression_tests/benchmark/configs/ames_housing.yaml
+  - dataset_name: protein
+    config_path: tests/regression_tests/benchmark/configs/protein.yaml
+  - dataset_name: mercedes_benz_greener
+    experiment_name: zscore_normalization
+    config_path: tests/regression_tests/benchmark/configs/mercedes_benz_greener.yaml
diff --git a/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_3.yaml b/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_3.yaml
new file mode 100644
index 00000000000..514e7ed0b5c
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/invalid/benchmarking_config_3.yaml
@@ -0,0 +1,18 @@
+# This benchmarking config is invalid because some of the dataset names aren't specified.
+experiment_name: github_action
+process_config_file_path: tests/ludwig/benchmarking/example_files/process_config_example.py
+hyperopt: false
+export:
+  export_artifacts: true
+  export_base_path: s3://benchmarking.us-west-2.ludwig.com/bench/    # include the slash at the end.
+profiler:
+  enable: false
+  use_torch_profiler: false
+  logging_interval: 0.1
+experiments:
+  - experiment_name: large_learning_rate
+    config_path: tests/regression_tests/benchmark/configs/ames_housing.yaml
+  - config_path: tests/regression_tests/benchmark/configs/protein.yaml
+  - dataset_name: mercedes_benz_greener
+    experiment_name: zscore_normalization
+    config_path: tests/regression_tests/benchmark/configs/mercedes_benz_greener.yaml
diff --git a/tests/ludwig/benchmarking/example_files/process_config.py b/tests/ludwig/benchmarking/example_files/process_config.py
new file mode 100644
index 00000000000..cd28f8dafe4
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/process_config.py
@@ -0,0 +1,29 @@
+def process_config(ludwig_config: dict, experiment_dict: dict) -> dict:
+    """Modify a Ludwig config.
+
+    :param ludwig_config: a Ludwig config.
+    :param experiment_dict: a benchmarking config experiment dictionary.
+
+    returns: a modified Ludwig config.
+    """
+
+    # Only keep input_features and output_features for the ames_housing dataset.
+    if experiment_dict["dataset_name"] == "ames_housing":
+        main_config_keys = list(ludwig_config.keys())
+        for key in main_config_keys:
+            if key not in ["input_features", "output_features"]:
+                del ludwig_config[key]
+
+    # Set the early_stop criteria to stop training after 7 epochs of no score improvement.
+    ludwig_config["trainer"] = {"early_stop": 7}
+
+    # use sparse encoder for categorical features to mimic logreg
+    ludwig_config["combiner"] = {"type": "concat"}
+    for i, feature in enumerate(ludwig_config["input_features"]):
+        if feature["type"] == "category":
+            ludwig_config["input_features"][i]["encoder"] = "sparse"
+    for i, feature in enumerate(ludwig_config["output_features"]):
+        if feature["type"] == "category":
+            ludwig_config["output_features"][i]["encoder"] = "sparse"
+
+    return ludwig_config
diff --git a/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_1.yaml b/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_1.yaml
new file mode 100644
index 00000000000..49dbb193747
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_1.yaml
@@ -0,0 +1,24 @@
+# You can specify any of the global parameters locally to any experiment. This will override the global behavior.
+experiment_name: github_action
+process_config_file_path: tests/ludwig/benchmarking/example_files/process_config_example.py
+export:
+  export_artifacts: true
+  export_base_path: s3://benchmarking.us-west-2.ludwig.com/bench/    # include the slash at the end.
+profiler:
+  enable: false
+  use_torch_profiler: false
+  logging_interval: 0.1
+experiments:
+  - dataset_name: ames_housing
+    experiment_name: large_learning_rate
+    config_path: tests/regression_tests/benchmark/configs/ames_housing.yaml
+    hyperopt: true
+  - dataset_name: protein
+    config_path: tests/regression_tests/benchmark/configs/protein.yaml
+    profiler:
+      enable: true
+      use_torch_profiler: true
+      logging_interval: 0.1
+  - dataset_name: mercedes_benz_greener
+    experiment_name: zscore_normalization
+    config_path: tests/regression_tests/benchmark/configs/mercedes_benz_greener.yaml
diff --git a/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_2.yaml b/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_2.yaml
new file mode 100644
index 00000000000..b8aaa2fa67d
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_2.yaml
@@ -0,0 +1,13 @@
+# This is a minimal example of a valid benchmarking config. the hyperopt section of the benchmarking config
+# will default to false. The profiler section will also default to false.
+experiment_name: github_action
+export:
+  export_artifacts: true
+  export_base_path: s3://benchmarking.us-west-2.ludwig.com/bench/    # include the slash at the end.
+experiments:
+  - dataset_name: ames_housing
+    config_path: tests/regression_tests/benchmark/configs/ames_housing.yaml
+  - dataset_name: protein
+    config_path: tests/regression_tests/benchmark/configs/protein.yaml
+  - dataset_name: mercedes_benz_greener
+    config_path: tests/regression_tests/benchmark/configs/mercedes_benz_greener.yaml
diff --git a/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_3.yaml b/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_3.yaml
new file mode 100644
index 00000000000..6bf1a990e3a
--- /dev/null
+++ b/tests/ludwig/benchmarking/example_files/valid/benchmarking_config_3.yaml
@@ -0,0 +1,19 @@
+# We can skip specifying a global experiment name if it's specified for each experiment.
+process_config_file_path: tests/ludwig/benchmarking/example_files/process_config_example.py
+export:
+  export_artifacts: true
+  export_base_path: s3://benchmarking.us-west-2.ludwig.com/bench/    # include the slash at the end.
+profiler:
+  enable: true
+  use_torch_profiler: false
+  logging_interval: 0.1
+experiments:
+  - dataset_name: ames_housing
+    experiment_name: large_learning_rate
+    config_path: tests/regression_tests/benchmark/configs/ames_housing.yaml
+  - dataset_name: protein
+    experiment_name: decay_rate_0.8
+    config_path: tests/regression_tests/benchmark/configs/protein.yaml
+  - dataset_name: mercedes_benz_greener
+    experiment_name: zscore_normalization
+    config_path: tests/regression_tests/benchmark/configs/mercedes_benz_greener.yaml
diff --git a/tests/ludwig/benchmarking/test_benchmarking.py b/tests/ludwig/benchmarking/test_benchmarking.py
new file mode 100644
index 00000000000..41ebb832f43
--- /dev/null
+++ b/tests/ludwig/benchmarking/test_benchmarking.py
@@ -0,0 +1,31 @@
+import os
+from contextlib import nullcontext as does_not_raise
+
+import pytest
+
+from ludwig.benchmarking.utils import validate_benchmarking_config
+from ludwig.utils.data_utils import load_yaml
+
+
+def get_benchamrking_configs(validity):
+    local_dir = "/".join(__file__.split("/")[:-1])
+    return [
+        os.path.join(local_dir, "example_files", validity, config_fp)
+        for config_fp in os.listdir(os.path.join(local_dir, "example_files", validity))
+    ]
+
+
+@pytest.mark.parametrize("benchmarking_config_fp", get_benchamrking_configs("valid"))
+def test_valid_benchmarking_configs_valid(benchmarking_config_fp):
+    benchmarking_config = load_yaml(benchmarking_config_fp)
+
+    with does_not_raise():
+        validate_benchmarking_config(benchmarking_config)
+
+
+@pytest.mark.parametrize("benchmarking_config_fp", get_benchamrking_configs("invalid"))
+def test_invalid_benchmarking_configs_valid(benchmarking_config_fp):
+    benchmarking_config = load_yaml(benchmarking_config_fp)
+
+    with pytest.raises(ValueError):
+        validate_benchmarking_config(benchmarking_config)

From bb8bef02c002eccbb6369292ac54490875bebbc4 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Fri, 14 Oct 2022 11:38:04 -0700
Subject: [PATCH 18/29] Fixed automl APIs to work with remote filesystems
 (#2650)

---
 ludwig/automl/automl.py                | 36 +++++++++++--------
 ludwig/models/ecd.py                   |  4 ++-
 tests/integration_tests/test_automl.py | 48 +++++++++++++++++++++-----
 3 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/ludwig/automl/automl.py b/ludwig/automl/automl.py
index 5f63893139f..2d185258f1b 100644
--- a/ludwig/automl/automl.py
+++ b/ludwig/automl/automl.py
@@ -12,12 +12,11 @@
 import logging
 import os
 import warnings
-from typing import Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
 import pandas as pd
 import yaml
-from packaging.version import parse as parse_version
 
 from ludwig.api import LudwigModel
 from ludwig.automl.auto_tune_config import memory_tune_config
@@ -44,7 +43,7 @@
     has_imbalanced_output,
     set_output_feature_metric,
 )
-from ludwig.utils.data_utils import load_dataset
+from ludwig.utils.data_utils import load_dataset, use_credentials
 from ludwig.utils.defaults import default_random_seed
 from ludwig.utils.fs_utils import open_file
 from ludwig.utils.misc_utils import merge_dict
@@ -54,10 +53,8 @@
     import dask.dataframe as dd
     import ray
     from ray.tune import ExperimentAnalysis
-
-    _ray_113 = parse_version(ray.__version__) >= parse_version("1.13.0")
-except ImportError:
-    raise ImportError(" ray is not installed. In order to use auto_train please run pip install ludwig[ray]")
+except ImportError as e:
+    raise RuntimeError("ray is not installed. In order to use auto_train please run pip install ludwig[ray]") from e
 
 
 logger = logging.getLogger(__name__)
@@ -66,8 +63,9 @@
 
 
 class AutoTrainResults:
-    def __init__(self, experiment_analysis: ExperimentAnalysis):
+    def __init__(self, experiment_analysis: ExperimentAnalysis, creds: Dict[str, Any] = None):
         self._experiment_analysis = experiment_analysis
+        self._creds = creds
 
     @property
     def experiment_analysis(self):
@@ -84,11 +82,15 @@ def best_model(self) -> Optional[LudwigModel]:
             logger.warning("No best model found")
             return None
 
-        if not _ray_113:
-            return LudwigModel.load(os.path.join(checkpoint, "model"))
-
-        with checkpoint.as_directory() as checkpoint:
-            return LudwigModel.load(os.path.join(checkpoint, "model"))
+        ckpt_type, ckpt_path = checkpoint.get_internal_representation()
+        if ckpt_type == "uri":
+            # Read remote URIs using Ludwig's internal remote file loading APIs, as
+            # Ray's do not handle custom credentials at the moment.
+            with use_credentials(self._creds):
+                return LudwigModel.load(os.path.join(ckpt_path, "model"))
+        else:
+            with checkpoint.as_directory() as ckpt_path:
+                return LudwigModel.load(os.path.join(ckpt_path, "model"))
 
 
 def auto_train(
@@ -246,8 +248,14 @@ def train_with_config(
                 "Consider increasing the time budget for experiment. "
             )
 
+    # Extract credentials needed to pull artifacts, if provided
+    creds = None
+    backend: Backend = initialize_backend(kwargs.get("backend"))
+    if backend is not None:
+        creds = backend.storage.artifacts.credentials
+
     experiment_analysis = hyperopt_results.experiment_analysis
-    return AutoTrainResults(experiment_analysis)
+    return AutoTrainResults(experiment_analysis, creds)
 
 
 def _model_select(
diff --git a/ludwig/models/ecd.py b/ludwig/models/ecd.py
index 7169a3e33e4..e5893422692 100644
--- a/ludwig/models/ecd.py
+++ b/ludwig/models/ecd.py
@@ -14,6 +14,7 @@
 from ludwig.schema.utils import load_config_with_kwargs
 from ludwig.utils import output_feature_utils
 from ludwig.utils.data_utils import clear_data_cache
+from ludwig.utils.fs_utils import open_file
 from ludwig.utils.torch_utils import get_torch_device
 
 logger = logging.getLogger(__name__)
@@ -156,7 +157,8 @@ def load(self, save_path):
         """Loads the model from the given path."""
         weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME)
         device = torch.device(get_torch_device())
-        self.load_state_dict(torch.load(weights_save_path, map_location=device))
+        with open_file(weights_save_path, "rb") as f:
+            self.load_state_dict(torch.load(f, map_location=device))
 
     def get_args(self):
         """Returns init arguments for constructing this model."""
diff --git a/tests/integration_tests/test_automl.py b/tests/integration_tests/test_automl.py
index fa5e280a981..7118779bd18 100644
--- a/tests/integration_tests/test_automl.py
+++ b/tests/integration_tests/test_automl.py
@@ -1,3 +1,4 @@
+import contextlib
 import os
 import tempfile
 from typing import Any, Dict, List, Set
@@ -6,18 +7,31 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
 
 from ludwig.api import LudwigModel
 from ludwig.constants import COLUMN, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, SPLIT, TRAINER, TYPE
-from tests.integration_tests.utils import category_feature, generate_data, image_feature, number_feature, text_feature
+from tests.integration_tests.utils import (
+    category_feature,
+    generate_data,
+    image_feature,
+    minio_test_creds,
+    number_feature,
+    private_param,
+    remote_tmpdir,
+    text_feature,
+)
 
-try:
-    import dask.dataframe as dd
+ray = pytest.importorskip("ray")
 
-    from ludwig.automl.automl import create_auto_config, train_with_config
-    from ludwig.hyperopt.execution import RayTuneExecutor
-except ImportError:
-    pass
+import dask.dataframe as dd  # noqa
+
+from ludwig.automl.automl import create_auto_config, train_with_config  # noqa
+from ludwig.hyperopt.execution import RayTuneExecutor  # noqa
+
+_ray200 = version.parse(ray.__version__) >= version.parse("2.0")
+
+pytestmark = pytest.mark.distributed
 
 
 @pytest.fixture(scope="module")
@@ -118,6 +132,24 @@ def test_autoconfig_preprocessing_text_image(tmpdir):
 @pytest.mark.distributed
 @pytest.mark.parametrize("time_budget", [200, 1], ids=["high", "low"])
 def test_train_with_config(time_budget, test_data, ray_cluster_2cpu, tmpdir):
+    _run_train_with_config(time_budget, test_data, tmpdir)
+
+
+@pytest.mark.parametrize("fs_protocol,bucket", [private_param(("s3", "ludwig-tests"))], ids=["s3"])
+def test_train_with_config_remote(fs_protocol, bucket, test_data, ray_cluster_2cpu):
+    backend = {
+        "type": "local",
+        "credentials": {
+            "artifacts": minio_test_creds(),
+        },
+    }
+
+    with remote_tmpdir(fs_protocol, bucket) as tmpdir:
+        with pytest.raises(ValueError) if not _ray200 else contextlib.nullcontext():
+            _run_train_with_config(200, test_data, tmpdir, backend=backend)
+
+
+def _run_train_with_config(time_budget, test_data, tmpdir, **kwargs):
     input_features, output_features, dataset_csv = test_data
     config = {
         "input_features": input_features,
@@ -161,7 +193,7 @@ def test_train_with_config(time_budget, test_data, ray_cluster_2cpu, tmpdir):
         mock_fn.side_effect = fn
 
         outdir = os.path.join(tmpdir, "output")
-        results = train_with_config(dataset_csv, config, output_directory=outdir)
+        results = train_with_config(dataset_csv, config, output_directory=outdir, **kwargs)
         best_model = results.best_model
 
         if time_budget > 1:

From 6f7d7dfd7386f8879df6ccb1ce3c7cd3966a0ed7 Mon Sep 17 00:00:00 2001
From: Daniel Treiman <dan.treiman@gmail.com>
Date: Fri, 14 Oct 2022 12:54:49 -0700
Subject: [PATCH 19/29] Adds minimum split size, ensures random split is never
 smaller than minimum for local backend (#2623)

* Typo fix

* Adds MIN_DATASET_SPLIT_ROWS, _make_fractions_ensure_minimum_rows, _make_divisions_ensure_minimum_rows

* Makes RandomSplitter never generate a nonempty split smaller than minimum.

* Only compute length of dataframe in split if local.

* Adds distributed version of guaranteed splits.

* Restored

* Removes print debugging statements.

* Select test parts first, don't choose more partitions than are available.

* Fixes randomly choosing partitions when there are fewer available.

* Adds more tests, fixes comments.

* Reverts back to random_split for distributed dataframes.

* Removes make_fractions_ensure_minimum_rows, not necessary anymore.

* Revise comment.

* Moves MIN_DATASET_SPLIT_ROWS into _make_divisions_ensure_minimum_rows default arguments.
---
 ludwig/api.py                      |  6 +--
 ludwig/constants.py                |  2 +-
 ludwig/data/dataset_synthesizer.py |  2 +-
 ludwig/data/split.py               | 65 ++++++++++++++++++++++--------
 tests/ludwig/data/test_split.py    | 21 ++++++++++
 5 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/ludwig/api.py b/ludwig/api.py
index c0cae9970ca..bec2e957f3a 100644
--- a/ludwig/api.py
+++ b/ludwig/api.py
@@ -45,7 +45,7 @@
     HYPEROPT,
     HYPEROPT_WARNING,
     LEARNING_RATE,
-    MIN_VALIDATION_SET_ROWS,
+    MIN_DATASET_SPLIT_ROWS,
     MODEL_TYPE,
     PREPROCESSING,
     TEST,
@@ -577,11 +577,11 @@ def on_epoch_end(self, trainer, progress_tracker, save_path):
                                     "Recommend providing a validation set when using calibration."
                                 )
                                 calibrator.train_calibration(training_set, TRAINING)
-                            elif len(validation_set) < MIN_VALIDATION_SET_ROWS:
+                            elif len(validation_set) < MIN_DATASET_SPLIT_ROWS:
                                 logger.warning(
                                     f"Validation set size ({len(validation_set)} rows) is too small for calibration."
                                     "Will use training set for calibration."
-                                    f"Validation set much have at least {MIN_VALIDATION_SET_ROWS} rows."
+                                    f"Validation set much have at least {MIN_DATASET_SPLIT_ROWS} rows."
                                 )
                                 calibrator.train_calibration(training_set, TRAINING)
                             else:
diff --git a/ludwig/constants.py b/ludwig/constants.py
index b2b1ce3e6d7..902f4eff066 100644
--- a/ludwig/constants.py
+++ b/ludwig/constants.py
@@ -128,7 +128,7 @@
 TRAIN_SPLIT = 0
 VALIDATION_SPLIT = 1
 TEST_SPLIT = 2
-MIN_VALIDATION_SET_ROWS = 3  # The minimum validation set size to ensure metric computation doesn't fail.
+MIN_DATASET_SPLIT_ROWS = 3  # The minimum number of rows in a split. Splits smaller than this size are treated as empty.
 
 META = "meta"
 
diff --git a/ludwig/data/dataset_synthesizer.py b/ludwig/data/dataset_synthesizer.py
index 9c3cd267552..f990d93e8c7 100644
--- a/ludwig/data/dataset_synthesizer.py
+++ b/ludwig/data/dataset_synthesizer.py
@@ -172,7 +172,7 @@ def build_synthetic_dataset(dataset_size: int, features: List[dict], outdir: str
 
     :param dataset_size: (int) size of the dataset
     :param features: (List[dict]) list of features to generate in YAML format.
-        Provide a list contaning one dictionary for each feature,
+        Provide a list containing one dictionary for each feature,
         each dictionary must include a name, a type
         and can include some generation parameters depending on the type
     :param outdir: (str) Path to an output directory. Used for saving synthetic image and audio files.
diff --git a/ludwig/data/split.py b/ludwig/data/split.py
index d78e28acf54..a232bdbcd05 100644
--- a/ludwig/data/split.py
+++ b/ludwig/data/split.py
@@ -22,7 +22,7 @@
 from sklearn.model_selection import train_test_split
 
 from ludwig.backend.base import Backend
-from ludwig.constants import BINARY, CATEGORY, COLUMN, DATE, SPLIT, TYPE
+from ludwig.constants import BINARY, CATEGORY, COLUMN, DATE, MIN_DATASET_SPLIT_ROWS, SPLIT, TYPE
 from ludwig.schema.split import (
     DateTimeSplitConfig,
     FixedSplitConfig,
@@ -61,6 +61,41 @@ def required_columns(self) -> List[str]:
         return []
 
 
+def _make_divisions_ensure_minimum_rows(
+    divisions: List[int],
+    n_examples: int,
+    min_val_rows: int = MIN_DATASET_SPLIT_ROWS,
+    min_test_rows: int = MIN_DATASET_SPLIT_ROWS,
+) -> List[int]:
+    """Revises divisions to ensure no dataset split has too few examples."""
+    result = list(divisions)
+    n = [dn - dm for dm, dn in zip((0,) + divisions, divisions + (n_examples,))]  # Number of examples in each split.
+    if 0 < n[2] < min_test_rows and n[0] > 0:
+        # Test set is nonempty but too small, take examples from training set.
+        shift = min(min_test_rows - n[2], n[0])
+        result = [d - shift for d in result]
+    if 0 < n[1] < min_val_rows and n[0] > 0:
+        # Validation set is nonempty but too small, take examples from training set.
+        result[0] -= min(min_val_rows - n[1], result[0])
+    return result
+
+
+def _split_divisions_with_min_rows(n_rows: int, probabilities: List[float]) -> List[int]:
+    """Generates splits for a dataset of n_rows into train, validation, and test sets according to split
+    probabilities, also ensuring that at least min_val_rows or min_test_rows are present in each nonempty split.
+
+    Returns division indices to split on.
+    """
+    d1 = int(np.ceil(probabilities[0] * n_rows))
+    if probabilities[-1] > 0:
+        n2 = int(probabilities[1] * n_rows)
+        d2 = d1 + n2
+    else:
+        # If the last probability is 0, then use the entire remaining dataset for validation.
+        d2 = n_rows
+    return _make_divisions_ensure_minimum_rows((d1, d2), n_rows)
+
+
 @split_registry.register("random", default=True)
 class RandomSplitter(Splitter):
     def __init__(self, probabilities: List[float] = DEFAULT_PROBABILITIES, **kwargs):
@@ -69,21 +104,19 @@ def __init__(self, probabilities: List[float] = DEFAULT_PROBABILITIES, **kwargs)
     def split(
         self, df: DataFrame, backend: Backend, random_seed: float = default_random_seed
     ) -> Tuple[DataFrame, DataFrame, DataFrame]:
-        if backend.df_engine.partitioned:
-            # The below approach is very inefficient for partitioned backends, which
-            # can split by partition. This may not be exact in all cases, but is much more efficient.
-            return df.random_split(self.probabilities, random_state=random_seed)
-
-        n = len(df)
-        d1 = int(self.probabilities[0] * n)
-        if not self.probabilities[-1]:
-            # If the last probability is 0, then use the entire remaining dataset for validation.
-            d2 = n
-        else:
-            d2 = d1 + int(self.probabilities[1] * n)
-
-        # Note that sometimes this results in the test set with 1 example even if the last probability is 0.
-        return np.split(df.sample(frac=1, random_state=random_seed), [d1, d2])
+        probabilities = self.probabilities
+        if not backend.df_engine.partitioned:
+            divisions = _split_divisions_with_min_rows(len(df), probabilities)
+            shuffled_df = df.sample(frac=1, random_state=random_seed)
+            return (
+                shuffled_df.iloc[: divisions[0]],  # Train
+                shuffled_df.iloc[divisions[0] : divisions[1]],  # Validation
+                shuffled_df.iloc[divisions[1] :],  # Test
+            )
+
+        # The above approach is very inefficient for partitioned backends, which can split by partition.
+        # This does not give exact guarantees on split size but is much more efficient for large datasets.
+        return df.random_split(self.probabilities, random_state=random_seed)
 
     def has_split(self, split_index: int) -> bool:
         return self.probabilities[split_index] > 0
diff --git a/tests/ludwig/data/test_split.py b/tests/ludwig/data/test_split.py
index 4ff019d7410..40c6806d644 100644
--- a/tests/ludwig/data/test_split.py
+++ b/tests/ludwig/data/test_split.py
@@ -16,6 +16,27 @@
     DaskEngine = Mock
 
 
+def test_make_divisions_ensure_minimum_rows():
+    from ludwig.data.split import _make_divisions_ensure_minimum_rows
+
+    # Constraints are satisfied, the function should make no change to divisions.
+    divisions = _make_divisions_ensure_minimum_rows((70, 80), 100, min_val_rows=3, min_test_rows=3)
+    assert divisions[0] == 70
+    assert divisions[1] == 80
+    # Constraints are satisfied, the function should make no change to divisions.
+    divisions = _make_divisions_ensure_minimum_rows((20, 22), 25, min_val_rows=0, min_test_rows=0)
+    assert divisions[0] == 20
+    assert divisions[1] == 22
+    # The number of rows in validation set is too small.
+    divisions = _make_divisions_ensure_minimum_rows((17, 19), 25, min_val_rows=3, min_test_rows=3)
+    assert divisions[0] == 16
+    assert divisions[1] == 19
+    # The number of rows in validation and test sets are both too small.
+    divisions = _make_divisions_ensure_minimum_rows((20, 22), 25, min_val_rows=3, min_test_rows=3)
+    assert divisions[0] == 19
+    assert divisions[1] == 22
+
+
 @pytest.mark.parametrize(
     ("df_engine",),
     [

From 07adec872a8348116a06974aff28202160506a35 Mon Sep 17 00:00:00 2001
From: abidwael <103003638+abidwael@users.noreply.github.com>
Date: Fri, 14 Oct 2022 14:16:23 -0700
Subject: [PATCH 20/29] Categorical passthrough encoder training failure fix
 (#2649)

* fix for categorical passthrough encoder

* add test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/encoders/category_encoders.py         | 35 +++++++++++++++++++-
 ludwig/encoders/generic_encoders.py          |  4 +--
 ludwig/schema/encoders/base.py               |  4 +--
 ludwig/schema/encoders/category_encoders.py  | 14 ++++++++
 tests/ludwig/models/test_training_success.py | 33 ++++++++++++++++++
 5 files changed, 85 insertions(+), 5 deletions(-)
 create mode 100644 tests/ludwig/models/test_training_success.py

diff --git a/ludwig/encoders/category_encoders.py b/ludwig/encoders/category_encoders.py
index 94efd18cc1d..9fb7630023a 100644
--- a/ludwig/encoders/category_encoders.py
+++ b/ludwig/encoders/category_encoders.py
@@ -22,11 +22,44 @@
 from ludwig.encoders.base import Encoder
 from ludwig.encoders.registry import register_encoder
 from ludwig.modules.embedding_modules import Embed
-from ludwig.schema.encoders.category_encoders import CategoricalEmbedConfig, CategoricalSparseConfig
+from ludwig.schema.encoders.category_encoders import (
+    CategoricalEmbedConfig,
+    CategoricalPassthroughEncoderConfig,
+    CategoricalSparseConfig,
+)
 
 logger = logging.getLogger(__name__)
 
 
+@register_encoder("passthrough", [CATEGORY])
+class CategoricalPassthroughEncoder(Encoder):
+    def __init__(self, input_size=1, encoder_config=None, **kwargs):
+        super().__init__()
+        self.config = encoder_config
+
+        logger.debug(f" {self.name}")
+        self.input_size = input_size
+
+    def forward(self, inputs, mask=None):
+        """
+        :param inputs: The inputs fed into the encoder.
+               Shape: [batch x 1]
+        """
+        return inputs.float()
+
+    @staticmethod
+    def get_schema_cls():
+        return CategoricalPassthroughEncoderConfig
+
+    @property
+    def input_shape(self) -> torch.Size:
+        return torch.Size([self.input_size])
+
+    @property
+    def output_shape(self) -> torch.Size:
+        return self.input_shape
+
+
 @register_encoder("dense", CATEGORY)
 class CategoricalEmbedEncoder(Encoder):
     def __init__(
diff --git a/ludwig/encoders/generic_encoders.py b/ludwig/encoders/generic_encoders.py
index 3a06187cbb7..4d37e29d2ef 100644
--- a/ludwig/encoders/generic_encoders.py
+++ b/ludwig/encoders/generic_encoders.py
@@ -17,7 +17,7 @@
 
 import torch
 
-from ludwig.constants import BINARY, CATEGORY, NUMBER, VECTOR
+from ludwig.constants import BINARY, NUMBER, VECTOR
 from ludwig.encoders.base import Encoder
 from ludwig.encoders.registry import register_encoder
 from ludwig.modules.fully_connected_modules import FCStack
@@ -26,7 +26,7 @@
 logger = logging.getLogger(__name__)
 
 
-@register_encoder("passthrough", [CATEGORY, NUMBER, VECTOR])
+@register_encoder("passthrough", [NUMBER, VECTOR])
 class PassthroughEncoder(Encoder):
     def __init__(self, input_size=1, encoder_config=None, **kwargs):
         super().__init__()
diff --git a/ludwig/schema/encoders/base.py b/ludwig/schema/encoders/base.py
index ca28e91c41c..6c531f5ad71 100644
--- a/ludwig/schema/encoders/base.py
+++ b/ludwig/schema/encoders/base.py
@@ -3,7 +3,7 @@
 
 from marshmallow_dataclass import dataclass
 
-from ludwig.constants import BINARY, CATEGORY, NUMBER, VECTOR
+from ludwig.constants import BINARY, NUMBER, VECTOR
 from ludwig.schema import utils as schema_utils
 from ludwig.schema.encoders.utils import register_encoder_config
 
@@ -19,7 +19,7 @@ class BaseEncoderConfig(schema_utils.BaseMarshmallowConfig, ABC):
     "Name corresponding to an encoder."
 
 
-@register_encoder_config("passthrough", [CATEGORY, NUMBER, VECTOR])
+@register_encoder_config("passthrough", [NUMBER, VECTOR])
 @dataclass
 class PassthroughEncoderConfig(BaseEncoderConfig):
     """PassthroughEncoderConfig is a dataclass that configures the parameters used for a passthrough encoder."""
diff --git a/ludwig/schema/encoders/category_encoders.py b/ludwig/schema/encoders/category_encoders.py
index 9457f7f7546..7be97b7550d 100644
--- a/ludwig/schema/encoders/category_encoders.py
+++ b/ludwig/schema/encoders/category_encoders.py
@@ -9,6 +9,20 @@
 from ludwig.schema.metadata.encoder_metadata import ENCODER_METADATA
 
 
+@register_encoder_config("passthrough", CATEGORY)
+@dataclass
+class CategoricalPassthroughEncoderConfig(BaseEncoderConfig):
+    """CategoricalPassthroughEncoderConfig is a dataclass that configures the parameters used for a categorical
+    passthrough encoder."""
+
+    type: str = schema_utils.StringOptions(
+        ["passthrough"],
+        default="passthrough",
+        allow_none=False,
+        description="Type of encoder.",
+    )
+
+
 @register_encoder_config("dense", CATEGORY)
 @dataclass
 class CategoricalEmbedConfig(BaseEncoderConfig):
diff --git a/tests/ludwig/models/test_training_success.py b/tests/ludwig/models/test_training_success.py
new file mode 100644
index 00000000000..f65cebdffe6
--- /dev/null
+++ b/tests/ludwig/models/test_training_success.py
@@ -0,0 +1,33 @@
+from contextlib import nullcontext as no_error_raised
+
+from ludwig.api import LudwigModel
+from ludwig.constants import TRAINER
+from tests.integration_tests.utils import category_feature, generate_data
+
+
+def test_category_passthrough_encoder(csv_filename):
+    input_features = [category_feature(), category_feature()]
+    output_features = [category_feature(output_feature=True)]
+    config = {
+        "input_features": input_features,
+        "output_features": output_features,
+        TRAINER: {"train_steps": 1},
+        "defaults": {"category": {"encoder": {"type": "passthrough"}}},
+    }
+
+    # Generate training data
+    training_data_csv_path = generate_data(input_features, output_features, csv_filename)
+
+    # Train Ludwig (Pythonic) model:
+    ludwig_model = LudwigModel(config)
+
+    with no_error_raised():
+        ludwig_model.experiment(
+            dataset=training_data_csv_path,
+            skip_save_training_description=True,
+            skip_save_training_statistics=True,
+            skip_save_model=True,
+            skip_save_progress=True,
+            skip_save_log=True,
+            skip_save_processed_input=True,
+        )

From 4f40ffec8e81eb3f6385243498babe1409a675be Mon Sep 17 00:00:00 2001
From: Daniel Treiman <dan.treiman@gmail.com>
Date: Fri, 14 Oct 2022 14:21:21 -0700
Subject: [PATCH 21/29] Changes learning_curves to use "step" or "epoch" as
 x-axis label. (#2578)

* Started building dataclasses for model training output.

* Adds EvaluationFrequency to training stats, dataclasses for training results.

* Adds x_label, x_step to learning_curves.

* fix x axis when using checkpoints_per_epoch.

* Fixes CLI test by making dataclass JSON-serializable and implementing __contains__.

* Adds default value for EvaluationFrequency, maybe fixes test_learning_curves with only training metrics.

* Fixes kfold CV.

* Fixes viz tests, restoring original functionality of load_data_for_viz

* Adds todos to deprecate.
---
 ludwig/api.py                                 | 126 ++++++++++++++++--
 ludwig/utils/data_utils.py                    |   3 +
 ludwig/utils/metric_utils.py                  |   3 +-
 ludwig/utils/visualization_utils.py           |  14 +-
 ludwig/visualize.py                           |  57 +++++---
 .../test_visualization_api.py                 |  17 +--
 6 files changed, 171 insertions(+), 49 deletions(-)

diff --git a/ludwig/api.py b/ludwig/api.py
index bec2e957f3a..959360feb13 100644
--- a/ludwig/api.py
+++ b/ludwig/api.py
@@ -20,6 +20,7 @@
     Python Version: 3+
 """
 import copy
+import dataclasses
 import logging
 import os
 import sys
@@ -27,11 +28,12 @@
 import traceback
 from collections import OrderedDict
 from pprint import pformat
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
 import torch
+from marshmallow_dataclass import dataclass
 from tabulate import tabulate
 
 from ludwig.backend import Backend, initialize_backend, provision_preprocessing_workers
@@ -104,6 +106,88 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass
+class EvaluationFrequency:
+    """Represents the frequency of periodic evaluation of a metric during training. For example:
+
+    "every epoch"
+    frequency: 1, period: EPOCH
+
+    "every 50 steps".
+    frequency: 50, period: STEP
+    """
+
+    frequency: float = 1.0
+    period: str = "epoch"  # One of "epoch" or "step".
+
+    EPOCH: ClassVar[str] = "epoch"  # One epoch is a single pass through the training set.
+    STEP: ClassVar[str] = "step"  # One step is training on one mini-batch.
+
+
+@dataclass
+class TrainingStats:
+    """Training stats were previously represented as a tuple or a dict.
+
+    This class replaces those while preserving dict and tuple-like behavior (unpacking, [] access).
+    """
+
+    training: Dict[str, Any]
+    validation: Dict[str, Any]
+    test: Dict[str, Any]
+    evaluation_frequency: EvaluationFrequency = dataclasses.field(default_factory=EvaluationFrequency)
+
+    # TODO(daniel): deprecate multiple return value unpacking and dictionary-style element access
+    def __iter__(self):
+        return iter((self.training, self.test, self.validation))
+
+    def __contains__(self, key):
+        return (
+            (key == TRAINING and self.training)
+            or (key == VALIDATION and self.validation)
+            or (key == TEST and self.test)
+        )
+
+    def __getitem__(self, key):
+        # Supports dict-style [] element access for compatibility.
+        return {TRAINING: self.training, VALIDATION: self.validation, TEST: self.test}[key]
+
+
+@dataclass
+class PreprocessedDataset:
+    training_set: Dataset
+    validation_set: Dataset
+    test_set: Dataset
+    training_set_metadata: Dict[str, Any]
+
+    # TODO(daniel): deprecate multiple return value unpacking and indexed access
+    def __iter__(self):
+        return iter((self.training_set, self.validation_set, self.test_set, self.training_set_metadata))
+
+    def __getitem__(self, index):
+        return (self.training_set, self.validation_set, self.test_set, self.training_set_metadata)[index]
+
+
+@dataclass
+class TrainingResults:
+    train_stats: TrainingStats
+    preprocessed_data: PreprocessedDataset
+    output_directory: str
+
+    def __iter__(self):
+        """Supports tuple-style return value unpacking ex.
+
+        train_stats, training_set, output_dir = model.train(...)
+        """
+        return iter((self.train_stats, self.preprocessed_data, self.output_directory))
+
+    def __getitem__(self, index):
+        """Provides indexed getter ex.
+
+        train_stats = model.train(...)[0]
+        """
+        return (self.train_stats, self.preprocessed_data, self.output_directory)[index]
+
+
 class LudwigModel:
     """Class that allows access to high level Ludwig functionalities.
 
@@ -263,7 +347,7 @@ def train(
         output_directory: str = "results",
         random_seed: int = default_random_seed,
         **kwargs,
-    ) -> Tuple[dict, Union[dict, pd.DataFrame], str]:
+    ) -> TrainingResults:
         """This function is used to perform a full training of the model on the specified dataset.
 
         During training if the skip parameters are False
@@ -589,17 +673,30 @@ def on_epoch_end(self, trainer, progress_tracker, save_path):
                         if not skip_save_model:
                             self.model.save(model_dir)
 
+                    # Evaluation Frequency
+                    if self.config[TRAINER].get("steps_per_checkpoint", None):
+                        evaluation_frequency = EvaluationFrequency(
+                            self.config[TRAINER]["steps_per_checkpoint"], EvaluationFrequency.STEP
+                        )
+                    elif self.config[TRAINER].get("checkpoints_per_epoch", None):
+                        evaluation_frequency = EvaluationFrequency(
+                            1.0 / self.config[TRAINER]["checkpoints_per_epoch"], EvaluationFrequency.EPOCH
+                        )
+                    else:
+                        evaluation_frequency = EvaluationFrequency(1, EvaluationFrequency.EPOCH)
+
                     # Unpack train()'s return.
                     # The statistics are all nested dictionaries of TrainerMetrics: feature_name -> metric_name ->
                     # List[TrainerMetric], with one entry per training checkpoint, according to steps_per_checkpoint.
                     # We reduce the dictionary of TrainerMetrics to a simple list of floats for interfacing with Ray
                     # Tune.
                     (self.model, train_trainset_stats, train_valiset_stats, train_testset_stats) = train_stats
-                    train_stats = {
-                        TRAINING: metric_utils.reduce_trainer_metrics_dict(train_trainset_stats),
-                        VALIDATION: metric_utils.reduce_trainer_metrics_dict(train_valiset_stats),
-                        TEST: metric_utils.reduce_trainer_metrics_dict(train_testset_stats),
-                    }
+                    train_stats = TrainingStats(
+                        metric_utils.reduce_trainer_metrics_dict(train_trainset_stats),
+                        metric_utils.reduce_trainer_metrics_dict(train_valiset_stats),
+                        metric_utils.reduce_trainer_metrics_dict(train_testset_stats),
+                        evaluation_frequency,
+                    )
 
                     # save training statistics
                     if self.backend.is_coordinator():
@@ -633,7 +730,7 @@ def on_epoch_end(self, trainer, progress_tracker, save_path):
                 self.backend.sync_model(self.model)
 
                 print_boxed("FINISHED")
-                return train_stats, preprocessed_data, output_url
+                return TrainingResults(train_stats, preprocessed_data, output_url)
 
     def train_online(
         self,
@@ -985,7 +1082,7 @@ def experiment(
         output_directory: str = "results",
         random_seed: int = default_random_seed,
         **kwargs,
-    ) -> Tuple[Optional[dict], dict, Union[dict, pd.DataFrame], str]:
+    ) -> Tuple[Optional[dict], TrainingStats, PreprocessedDataset, str]:
         """Trains a model on a dataset's training and validation splits and uses it to predict on the test split.
         It saves the trained model and the statistics of training and testing.
 
@@ -1239,7 +1336,7 @@ def preprocess(
         skip_save_processed_input: bool = True,
         random_seed: int = default_random_seed,
         **kwargs,
-    ) -> Tuple[Dataset, Dataset, Dataset, dict]:
+    ) -> PreprocessedDataset:
         """This function is used to preprocess data.
 
         # Inputs
@@ -1282,7 +1379,7 @@ def preprocess(
 
         # Return
 
-        :return: (Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, Dict]) tuple containing
+        :return: (PreprocessedDataset) data structure containing
             `(proc_training_set, proc_validation_set, proc_test_set, training_set_metadata)`.
         """
         print_boxed("PREPROCESSING")
@@ -1309,7 +1406,7 @@ def preprocess(
 
         (proc_training_set, proc_validation_set, proc_test_set, training_set_metadata) = preprocessed_data
 
-        return proc_training_set, proc_validation_set, proc_test_set, training_set_metadata
+        return PreprocessedDataset(proc_training_set, proc_validation_set, proc_test_set, training_set_metadata)
 
     @staticmethod
     def load(
@@ -1737,10 +1834,11 @@ def kfold_cross_validate(
 
             # augment the training statistics with scoring metric from
             # the hold out fold
-            train_stats["fold_eval_stats"] = eval_stats
+            train_stats_dict = dataclasses.asdict(train_stats)
+            train_stats_dict["fold_eval_stats"] = eval_stats
 
             # collect training statistics for this fold
-            kfold_cv_stats["fold_" + str(fold_num)] = train_stats
+            kfold_cv_stats["fold_" + str(fold_num)] = train_stats_dict
 
     # consolidate raw fold metrics across all folds
     raw_kfold_stats = {}
diff --git a/ludwig/utils/data_utils.py b/ludwig/utils/data_utils.py
index 6385e6842af..1a2900167ef 100644
--- a/ludwig/utils/data_utils.py
+++ b/ludwig/utils/data_utils.py
@@ -17,6 +17,7 @@
 import collections.abc
 import contextlib
 import csv
+import dataclasses
 import functools
 import hashlib
 import json
@@ -675,6 +676,8 @@ def default(self, obj):
             return float(obj)
         elif isinstance(obj, np.ndarray):
             return obj.tolist()
+        elif dataclasses.is_dataclass(obj):
+            return dataclasses.asdict(obj)
         else:
             return json.JSONEncoder.default(self, obj)
 
diff --git a/ludwig/utils/metric_utils.py b/ludwig/utils/metric_utils.py
index 1cfdf38e5a0..5946448c94c 100644
--- a/ludwig/utils/metric_utils.py
+++ b/ludwig/utils/metric_utils.py
@@ -85,7 +85,8 @@ def reduce_trainer_metrics_dict(
         for metric_name, trainer_metrics in trainer_metric_dict.items():
             for trainer_metric in trainer_metrics:
                 flattened_dict[feature_name][metric_name].append(trainer_metric[-1])
-    return flattened_dict
+    # Convert defaultdict to dict so JSON serialization works with dataclasses.asdict().
+    return {k: dict(v) for k, v in flattened_dict.items()}
 
 
 def get_metric_names(output_features: Dict[str, Dict]) -> Dict[str, List[str]]:
diff --git a/ludwig/utils/visualization_utils.py b/ludwig/utils/visualization_utils.py
index 0e5050cb668..31367d14d7e 100644
--- a/ludwig/utils/visualization_utils.py
+++ b/ludwig/utils/visualization_utils.py
@@ -74,7 +74,15 @@ def visualize_callbacks(callbacks, fig):
 
 
 def learning_curves_plot(
-    train_values, vali_values, metric, algorithm_names=None, title=None, filename=None, callbacks=None
+    train_values,
+    vali_values,
+    metric,
+    x_label="epoch",
+    x_step=1,
+    algorithm_names=None,
+    title=None,
+    filename=None,
+    callbacks=None,
 ):
     num_algorithms = len(train_values)
     max_len = max(len(tv) for tv in train_values)
@@ -94,10 +102,10 @@ def learning_curves_plot(
     ax.grid(which="both")
     ax.grid(which="minor", alpha=0.5)
     ax.grid(which="major", alpha=0.75)
-    ax.set_xlabel("epochs")
+    ax.set_xlabel(x_label)
     ax.set_ylabel(metric.replace("_", " "))
 
-    xs = list(range(1, max_len + 1))
+    xs = np.arange(1, (max_len * x_step) + 1, x_step)
 
     for i in range(num_algorithms):
         name_prefix = algorithm_names[i] + " " if algorithm_names is not None and i < len(algorithm_names) else ""
diff --git a/ludwig/visualize.py b/ludwig/visualize.py
index 5b59f0d89eb..7a4b7f47973 100644
--- a/ludwig/visualize.py
+++ b/ludwig/visualize.py
@@ -14,11 +14,12 @@
 # limitations under the License.
 # ==============================================================================
 import argparse
+import itertools
 import logging
 import os
 import sys
 from functools import partial
-from typing import List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -28,9 +29,10 @@
 from sklearn.metrics import brier_score_loss
 from yaml import warnings
 
+from ludwig.api import TrainingStats
 from ludwig.backend import LOCAL_BACKEND
 from ludwig.callbacks import Callback
-from ludwig.constants import ACCURACY, EDIT_DISTANCE, HITS_AT_K, LOSS, PREDICTIONS, SPACE, SPLIT, TRAINING, VALIDATION
+from ludwig.constants import ACCURACY, EDIT_DISTANCE, HITS_AT_K, LOSS, PREDICTIONS, SPACE, SPLIT
 from ludwig.contrib import add_contrib_callback_args
 from ludwig.utils import visualization_utils
 from ludwig.utils.data_utils import (
@@ -112,8 +114,8 @@ def validate_conf_thresholds_and_probabilities_2d_3d(probabilities, threshold_ou
             raise RuntimeError(exception_message)
 
 
-def load_data_for_viz(load_type, model_file_statistics, **kwargs):
-    """Load model file data in to list of .
+def load_data_for_viz(load_type, model_file_statistics, dtype=int, ground_truth_split=2) -> Dict[str, Any]:
+    """Load JSON files (training stats, evaluation stats...) for a list of models.
 
     :param load_type: type of the data loader to be used.
     :param model_file_statistics: JSON file or list of json files containing any
@@ -122,11 +124,10 @@ def load_data_for_viz(load_type, model_file_statistics, **kwargs):
     """
     supported_load_types = dict(
         load_json=load_json,
-        load_from_file=partial(
-            load_from_file, dtype=kwargs.get("dtype", int), ground_truth_split=kwargs.get("ground_truth_split", 2)
-        ),
+        load_from_file=partial(load_from_file, dtype=dtype, ground_truth_split=ground_truth_split),
     )
     loader = supported_load_types[load_type]
+    # Loads training stats from JSON file(s).
     try:
         stats_per_model = [loader(stats_f) for stats_f in model_file_statistics]
     except (TypeError, AttributeError):
@@ -135,6 +136,25 @@ def load_data_for_viz(load_type, model_file_statistics, **kwargs):
     return stats_per_model
 
 
+def load_training_stats_for_viz(load_type, model_file_statistics, dtype=int, ground_truth_split=2) -> TrainingStats:
+    """Load model file data (specifically training stats) for a list of models.
+
+    :param load_type: type of the data loader to be used.
+    :param model_file_statistics: JSON file or list of json files containing any
+           model experiment stats.
+    :return List of model statistics loaded as TrainingStats objects.
+    """
+    stats_per_model = load_data_for_viz(
+        load_type, model_file_statistics, dtype=dtype, ground_truth_split=ground_truth_split
+    )
+    try:
+        stats_per_model = [TrainingStats.Schema().load(j) for j in stats_per_model]
+    except Exception:
+        logger.exception(f"Failed to load model statistics {model_file_statistics}!")
+        raise
+    return stats_per_model
+
+
 def convert_to_list(item):
     """If item is not list class instance or None put inside a list.
 
@@ -152,10 +172,9 @@ def _validate_output_feature_name_from_train_stats(output_feature_name, train_st
     :return output_feature_names: list of output_feature_name(s) containing ground truth
     """
     output_feature_names_set = set()
-    for ls in train_stats_per_model:
-        for _, values in ls.items():
-            for key in values:
-                output_feature_names_set.add(key)
+    for train_stats in train_stats_per_model:
+        for key in itertools.chain(train_stats.training.keys(), train_stats.validation.keys(), train_stats.test.keys()):
+            output_feature_names_set.add(key)
     try:
         if output_feature_name in output_feature_names_set:
             return [output_feature_name]
@@ -338,7 +357,7 @@ def learning_curves_cli(training_statistics: Union[str, List[str]], **kwargs: di
 
     :return None:
     """
-    train_stats_per_model = load_data_for_viz("load_json", training_statistics)
+    train_stats_per_model = load_training_stats_for_viz("load_json", training_statistics)
     learning_curves(train_stats_per_model, **kwargs)
 
 
@@ -1284,28 +1303,32 @@ def learning_curves(
     metrics = [LOSS, ACCURACY, HITS_AT_K, EDIT_DISTANCE]
     for output_feature_name in output_feature_names:
         for metric in metrics:
-            if metric in train_stats_per_model_list[0][TRAINING][output_feature_name]:
+            if metric in train_stats_per_model_list[0].training[output_feature_name]:
                 filename = None
                 if filename_template_path:
                     filename = filename_template_path.format(output_feature_name, metric)
 
                 training_stats = [
-                    learning_stats[TRAINING][output_feature_name][metric]
+                    learning_stats.training[output_feature_name][metric]
                     for learning_stats in train_stats_per_model_list
                 ]
 
                 validation_stats = []
                 for learning_stats in train_stats_per_model_list:
-                    if VALIDATION in learning_stats and output_feature_name in learning_stats[VALIDATION]:
-                        validation_stats.append(learning_stats[VALIDATION][output_feature_name][metric])
+                    if learning_stats.validation and output_feature_name in learning_stats.validation:
+                        validation_stats.append(learning_stats.validation[output_feature_name][metric])
                     else:
                         validation_stats.append(None)
 
+                evaluation_frequency = train_stats_per_model_list[0].evaluation_frequency
+
                 visualization_utils.learning_curves_plot(
                     training_stats,
                     validation_stats,
                     metric,
-                    model_names_list,
+                    x_label=evaluation_frequency.period,
+                    x_step=evaluation_frequency.frequency,
+                    algorithm_names=model_names_list,
                     title=f"Learning Curves {output_feature_name}",
                     filename=filename,
                     callbacks=callbacks,
diff --git a/tests/integration_tests/test_visualization_api.py b/tests/integration_tests/test_visualization_api.py
index ea915671104..aa39131b3e8 100644
--- a/tests/integration_tests/test_visualization_api.py
+++ b/tests/integration_tests/test_visualization_api.py
@@ -21,19 +21,8 @@
 import pytest
 
 from ludwig import visualize
-from ludwig.api import LudwigModel
-from ludwig.constants import (
-    ENCODER,
-    NAME,
-    PREDICTIONS,
-    PROBABILITIES,
-    PROBABILITY,
-    TEST,
-    TRAINER,
-    TRAINING,
-    TYPE,
-    VALIDATION,
-)
+from ludwig.api import LudwigModel, TrainingStats
+from ludwig.constants import ENCODER, NAME, PREDICTIONS, PROBABILITIES, PROBABILITY, TRAINER, TYPE
 from ludwig.data.split import get_splitter
 from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME
 from ludwig.utils.data_utils import read_csv
@@ -153,7 +142,7 @@ def test_learning_curves_vis_api(experiment_to_use, training_only):
     if training_only:
         # ensure plot works with only training metrics
         # Handle situation in Issue #1875
-        train_stats = {TEST: {}, TRAINING: train_stats[TRAINING], VALIDATION: {}}
+        train_stats = TrainingStats(train_stats.training, {}, {})
     with TemporaryDirectory() as tmpvizdir:
         for viz_output in viz_outputs:
             vis_output_pattern_pdf = tmpvizdir + f"/*.{viz_output}"

From b78648ec0c0098e69d9bb158989819fa9da25f55 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Fri, 14 Oct 2022 15:43:50 -0700
Subject: [PATCH 22/29] Remove Trainer `type` Param (#2647)

* Added trainer type bandaid fix until config object lands

* Got rid of trainer type, schema validates based on model type now

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Flake 8

* partway through refactoring trainer type to just use model type throughout Ludwig

* Continued refactoring trainer type removal

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Flake 8

* Fix tests

* Flake 8

* Fix test

* Fix bug

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ludwig/backend/base.py                        |  4 +-
 ludwig/backend/ray.py                         | 11 +--
 ludwig/constants.py                           |  1 +
 ludwig/schema/__init__.py                     | 12 +--
 ludwig/schema/trainer.py                      | 84 ++++---------------
 ludwig/schema/utils.py                        | 11 +--
 ludwig/trainers/registry.py                   | 44 ++++------
 ludwig/trainers/trainer.py                    |  2 +-
 ludwig/trainers/trainer_lightgbm.py           |  4 +-
 tests/integration_tests/test_gbm.py           | 30 -------
 .../schema/test_validate_config_misc.py       | 33 +++++++-
 tests/ludwig/utils/test_defaults.py           | 53 ------------
 12 files changed, 80 insertions(+), 209 deletions(-)

diff --git a/ludwig/backend/base.py b/ludwig/backend/base.py
index 186ec57006b..fb334f3568c 100644
--- a/ludwig/backend/base.py
+++ b/ludwig/backend/base.py
@@ -161,9 +161,7 @@ def create_trainer(
     ) -> "BaseTrainer":  # noqa: F821
         from ludwig.trainers.registry import trainers_registry
 
-        trainers_for_model = get_from_registry(model.type(), trainers_registry)
-
-        trainer_cls = get_from_registry(config.type, trainers_for_model)
+        trainer_cls = get_from_registry(model.type(), trainers_registry)
 
         return trainer_cls(config=config, model=model, **kwargs)
 
diff --git a/ludwig/backend/ray.py b/ludwig/backend/ray.py
index d1f741d79f1..2ed549146dd 100644
--- a/ludwig/backend/ray.py
+++ b/ludwig/backend/ray.py
@@ -59,7 +59,7 @@
 from ludwig.models.base import BaseModel
 from ludwig.models.ecd import ECD
 from ludwig.models.predictor import BasePredictor, get_output_columns, Predictor, RemotePredictor
-from ludwig.schema.trainer import ECDTrainerConfig, GBMTrainerConfig
+from ludwig.schema.trainer import ECDTrainerConfig
 from ludwig.trainers.registry import ray_trainers_registry, register_ray_trainer
 from ludwig.trainers.trainer import BaseTrainer, RemoteTrainer
 from ludwig.utils.data_utils import use_credentials
@@ -375,7 +375,7 @@ def create_runner(**kwargs):
         trainer.shutdown()
 
 
-@register_ray_trainer("trainer", MODEL_ECD, default=True)
+@register_ray_trainer(MODEL_ECD, default=True)
 class RayTrainerV2(BaseTrainer):
     def __init__(
         self,
@@ -564,7 +564,7 @@ def __init__(self, **kwargs):
         super().__init__(horovod=horovod, **kwargs)
 
 
-@register_ray_trainer("ray_legacy_trainer", MODEL_ECD)
+@register_ray_trainer("ecd_ray_legacy")
 class RayLegacyTrainer(BaseTrainer):
     def __init__(self, horovod_kwargs: Dict[str, Any], executable_kwargs: Dict[str, Any], **kwargs):
         # TODO ray: make this more configurable by allowing YAML overrides of timeout_s, etc.
@@ -891,10 +891,7 @@ def initialize_pytorch(self, **kwargs):
     def create_trainer(self, model: BaseModel, **kwargs) -> "BaseTrainer":  # noqa: F821
         executable_kwargs = {**kwargs, **self._pytorch_kwargs}
         if not self._use_legacy:
-            trainers_for_model = get_from_registry(model.type(), ray_trainers_registry)
-
-            config: Union[ECDTrainerConfig, GBMTrainerConfig] = kwargs["config"]
-            trainer_cls = get_from_registry(config.type, trainers_for_model)
+            trainer_cls = get_from_registry(model.type(), ray_trainers_registry)
 
             # Deep copy to workaround https://github.com/ray-project/ray/issues/24139
             all_kwargs = {
diff --git a/ludwig/constants.py b/ludwig/constants.py
index 902f4eff066..a6cfb7ccda6 100644
--- a/ludwig/constants.py
+++ b/ludwig/constants.py
@@ -103,6 +103,7 @@
 RESIZE_METHODS = [CROP_OR_PAD, INTERPOLATE]
 
 TRAINER = "trainer"
+LIGHTGBM_TRAINER = "lightgbm_trainer"
 METRIC = "metric"
 PREDICTION = "prediction"
 LOGITS = "logits"
diff --git a/ludwig/schema/__init__.py b/ludwig/schema/__init__.py
index f7851fbe6e7..c45d50f9459 100644
--- a/ludwig/schema/__init__.py
+++ b/ludwig/schema/__init__.py
@@ -28,6 +28,7 @@
     DEFAULTS,
     HYPEROPT,
     INPUT_FEATURES,
+    MODEL_ECD,
     MODEL_TYPE,
     OUTPUT_FEATURES,
     PREPROCESSING,
@@ -40,8 +41,8 @@
 from ludwig.schema.trainer import get_model_type_jsonschema, get_trainer_jsonschema
 
 
-@lru_cache(maxsize=1)
-def get_schema():
+@lru_cache(maxsize=2)
+def get_schema(model_type: str):
     schema = {
         "type": "object",
         "properties": {
@@ -49,7 +50,7 @@ def get_schema():
             INPUT_FEATURES: get_input_feature_jsonschema(),
             OUTPUT_FEATURES: get_output_feature_jsonschema(),
             COMBINER: get_combiner_jsonschema(),
-            TRAINER: get_trainer_jsonschema(),
+            TRAINER: get_trainer_jsonschema(model_type),
             PREPROCESSING: get_preprocessing_jsonschema(),
             HYPEROPT: {},
             DEFAULTS: get_defaults_jsonschema(),
@@ -60,7 +61,7 @@ def get_schema():
     return schema
 
 
-@lru_cache(maxsize=1)
+@lru_cache(maxsize=2)
 def get_validator():
     # Manually add support for tuples (pending upstream changes: https://github.com/Julian/jsonschema/issues/148):
     def custom_is_array(checker, instance):
@@ -79,5 +80,6 @@ def validate_config(config):
 
     # Update config from previous versions to check that backwards compatibility will enable a valid config
     updated_config = upgrade_to_latest_version(config)
+    model_type = updated_config.get(MODEL_TYPE, MODEL_ECD)
 
-    validate(instance=updated_config, schema=get_schema(), cls=get_validator())
+    validate(instance=updated_config, schema=get_schema(model_type), cls=get_validator())
diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py
index fb1a8047af8..2f5924575be 100644
--- a/ludwig/schema/trainer.py
+++ b/ludwig/schema/trainer.py
@@ -1,18 +1,9 @@
 from abc import ABC
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 from marshmallow_dataclass import dataclass
 
-from ludwig.constants import (
-    COMBINED,
-    DEFAULT_BATCH_SIZE,
-    LOSS,
-    MAX_POSSIBLE_BATCH_SIZE,
-    MODEL_ECD,
-    MODEL_GBM,
-    TRAINING,
-    TYPE,
-)
+from ludwig.constants import COMBINED, DEFAULT_BATCH_SIZE, LOSS, MAX_POSSIBLE_BATCH_SIZE, MODEL_ECD, MODEL_GBM, TRAINING
 from ludwig.schema import utils as schema_utils
 from ludwig.schema.metadata.trainer_metadata import TRAINER_METADATA
 from ludwig.schema.optimizers import (
@@ -26,9 +17,9 @@
 trainer_schema_registry = Registry()
 
 
-def register_trainer_schema(name: str):
+def register_trainer_schema(model_type: str):
     def wrap(trainer_config: BaseTrainerConfig):
-        trainer_schema_registry[name] = trainer_config
+        trainer_schema_registry[model_type] = trainer_config
         return trainer_config
 
     return wrap
@@ -38,8 +29,6 @@ def wrap(trainer_config: BaseTrainerConfig):
 class BaseTrainerConfig(schema_utils.BaseMarshmallowConfig, ABC):
     """Common trainer parameter values."""
 
-    type: str
-
     learning_rate: Union[float, str] = schema_utils.OneOfOptionsField(
         default=0.001,
         allow_none=False,
@@ -102,21 +91,12 @@ class BaseTrainerConfig(schema_utils.BaseMarshmallowConfig, ABC):
     )
 
 
-@register_trainer_schema("trainer")
+@register_trainer_schema("ecd_ray_legacy")
+@register_trainer_schema(MODEL_ECD)
 @dataclass
 class ECDTrainerConfig(BaseTrainerConfig):
     """Dataclass that configures most of the hyperparameters used for ECD model training."""
 
-    type: str = schema_utils.StringOptions(
-        ["trainer", "ray_legacy_trainer"],
-        default="trainer",
-        description=(
-            "Trainer to use for training the model. Must be one of ['trainer', 'ray_legacy_trainer'] - "
-            "corresponds to name in `ludwig.trainers.registry.(ray_)trainers_registry` (default: 'trainer')"
-        ),
-        allow_none=False,
-    )
-
     optimizer: BaseOptimizerConfig = OptimizerDataclassField(
         default={"type": "adam"}, description="Parameter values for selected torch optimizer."
     )
@@ -313,22 +293,11 @@ class ECDTrainerConfig(BaseTrainerConfig):
     )
 
 
-@register_trainer_schema("lightgbm_trainer")
+@register_trainer_schema(MODEL_GBM)
 @dataclass
 class GBMTrainerConfig(BaseTrainerConfig):
     """Dataclass that configures most of the hyperparameters used for GBM model training."""
 
-    type: str = schema_utils.StringOptions(
-        ["lightgbm_trainer"],
-        default="lightgbm_trainer",
-        description=(
-            "Trainer to use for training the model. Must be one of ['lightgbm_trainer'] - "
-            "corresponds to name in `ludwig.trainers.registry.(ray_)trainers_registry` "
-            "(default: 'lightgbm_trainer')"
-        ),
-        allow_none=False,
-    )
-
     # NOTE: Overwritten here to provide a default value. In many places, we fall back to eval_batch_size if batch_size
     # is not specified. GBM does not have a value for batch_size, so we need to specify eval_batch_size here.
     eval_batch_size: Union[None, int, str] = schema_utils.PositiveInteger(
@@ -550,46 +519,21 @@ class GBMTrainerConfig(BaseTrainerConfig):
 def get_model_type_jsonschema():
     return {
         "type": "string",
-        "enum": [MODEL_ECD, MODEL_GBM],
+        "enum": [MODEL_ECD, MODEL_GBM, "ecd_ray_legacy"],
         "default": MODEL_ECD,
         "title": "type",
         "description": "Select the model type.",
     }
 
 
-def get_trainer_jsonschema():
-    def allowed_types_for_trainer_schema(cls) -> List[str]:
-        """Returns the allowed values for the "type" field on the given trainer schema."""
-        return cls.Schema().fields[TYPE].validate.choices
-
-    conds = []
-    all_trainer_types = []
-    for trainer in trainer_schema_registry:
-        trainer_cls = trainer_schema_registry[trainer]
-
-        allowed_trainer_types = allowed_types_for_trainer_schema(trainer_cls)
-        all_trainer_types.extend(allowed_trainer_types)
-
-        other_props = schema_utils.unload_jsonschema_from_marshmallow_class(trainer_cls)["properties"]
-        other_props.pop("type")
-        for trainer_type in allowed_trainer_types:
-            trainer_cond = schema_utils.create_cond(
-                {"type": trainer_type},
-                other_props,
-            )
-            conds.append(trainer_cond)
+def get_trainer_jsonschema(model_type: str):
+    trainer_cls = trainer_schema_registry[model_type]
+    props = schema_utils.unload_jsonschema_from_marshmallow_class(trainer_cls)["properties"]
 
     return {
         "type": "object",
-        "properties": {
-            "type": {
-                "type": "string",
-                "enum": all_trainer_types,
-                "title": "type",
-                "description": "Select the trainer type.",
-            },
-        },
+        "properties": props,
         "title": "trainer_options",
-        "allOf": conds,
-        "description": "Use type 'trainer' for training ECD models, or 'lightgbm_trainer' for Tree models.",
+        "additionalProperties": False,
+        "description": "Schema for trainer determined by Model Type",
     }
diff --git a/ludwig/schema/utils.py b/ludwig/schema/utils.py
index d931ba511f1..918824a0ea3 100644
--- a/ludwig/schema/utils.py
+++ b/ludwig/schema/utils.py
@@ -33,19 +33,12 @@ def load_trainer_with_kwargs(
     In particular, it chooses the correct default type for an incoming config (if it doesn't have one already), but
     otherwise passes all other parameters through without change.
     """
-    from ludwig.constants import MODEL_ECD, TYPE
+    from ludwig.constants import MODEL_ECD
     from ludwig.schema.trainer import ECDTrainerConfig, GBMTrainerConfig
 
     trainer_schema = ECDTrainerConfig if model_type == MODEL_ECD else GBMTrainerConfig
 
-    def default_type_for_trainer_schema(cls):
-        """Returns the default values for the "type" field on the given trainer schema."""
-        return cls.Schema().fields[TYPE].dump_default
-
-    # Create a copy of kwargs with the correct default type (which will be overridden if kwargs already contains 'type')
-    kwargs_with_type = {**{TYPE: default_type_for_trainer_schema(trainer_schema)}, **kwargs}
-
-    return load_config_with_kwargs(trainer_schema, kwargs_with_type)
+    return load_config_with_kwargs(trainer_schema, kwargs)
 
 
 def load_config_with_kwargs(
diff --git a/ludwig/trainers/registry.py b/ludwig/trainers/registry.py
index 04394a535b9..8515e6cf0d1 100644
--- a/ludwig/trainers/registry.py
+++ b/ludwig/trainers/registry.py
@@ -1,60 +1,48 @@
-from typing import List, Union
-
 from ludwig.utils.registry import DEFAULT_KEYS, Registry
 
 trainers_registry = Registry()
 ray_trainers_registry = Registry()
 
 
-def register_trainer(name: str, model_types: Union[str, List[str]], default=False):
+def register_trainer(model_type: str, default=False):
     """Register a trainer class that supports training the given model types.
 
     Using default=True will make the trainer the default trainer for the model type.
 
     Args:
-        name: The name of the trainer, as it can be used in the config.
-        model_types: The model types that the trainer supports.
+        model_type: The model_type which dictates the trainer type to use.
         default: Whether the trainer should be the default trainer for the model type.
     """
-    if isinstance(model_types, str):
-        model_types = [model_types]
 
     def wrap(cls):
-        for model_type in model_types:
-            _model_type_registry = trainers_registry.get(model_type, {})
-            _model_type_registry[name] = cls
-            if default:
-                if DEFAULT_KEYS[0] in _model_type_registry:
-                    raise ValueError(f"Default trainer already registered for model type {model_type}")
-                for key in DEFAULT_KEYS:
-                    _model_type_registry[key] = cls
-            trainers_registry[model_type] = _model_type_registry
+        trainers_registry[model_type] = cls
+        if default:
+            if DEFAULT_KEYS[0] in trainers_registry:
+                raise ValueError(f"Default trainer already registered for model type {model_type}")
+            for key in DEFAULT_KEYS:
+                trainers_registry[key] = cls
         return cls
 
     return wrap
 
 
-def register_ray_trainer(name: str, model_types: Union[str, List[str]], default=False):
+def register_ray_trainer(model_type: str, default=False):
     """Register a trainer class that supports training the given model types with Ray backend.
 
     Using default=True will make the trainer the default trainer for the model type.
 
     Args:
-        name: The name of the trainer, as it can be used in the config.
-        model_types: The model types that the trainer supports.
+        model_type: The model_type which dictates the trainer type to use.
         default: Whether the trainer should be the default trainer for the model type.
     """
-    if isinstance(model_types, str):
-        model_types = [model_types]
 
     def wrap(cls):
-        for model_type in model_types:
-            _model_type_registry = ray_trainers_registry.get(model_type, {})
-            _model_type_registry[name] = cls
-            if default:
-                for key in DEFAULT_KEYS:
-                    _model_type_registry[key] = cls
-            ray_trainers_registry[model_type] = _model_type_registry
+        ray_trainers_registry[model_type] = cls
+        if default:
+            if DEFAULT_KEYS[0] in ray_trainers_registry:
+                raise ValueError(f"Default trainer already registered for model type {model_type}")
+            for key in DEFAULT_KEYS:
+                ray_trainers_registry[key] = cls
         return cls
 
     return wrap
diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py
index ac3933ec3ed..0ca05e7db59 100644
--- a/ludwig/trainers/trainer.py
+++ b/ludwig/trainers/trainer.py
@@ -67,7 +67,7 @@
 logger = logging.getLogger(__name__)
 
 
-@register_trainer("trainer", MODEL_ECD, default=True)
+@register_trainer(MODEL_ECD, default=True)
 class Trainer(BaseTrainer):
     """Trainer is a class that trains a model."""
 
diff --git a/ludwig/trainers/trainer_lightgbm.py b/ludwig/trainers/trainer_lightgbm.py
index c0256e198ec..a47a984fa42 100644
--- a/ludwig/trainers/trainer_lightgbm.py
+++ b/ludwig/trainers/trainer_lightgbm.py
@@ -39,7 +39,7 @@ def iter_feature_metrics(features: LudwigFeatureDict) -> Iterable[Tuple[str, str
             yield feature_name, metric
 
 
-@register_trainer("lightgbm_trainer", MODEL_GBM, default=True)
+@register_trainer(MODEL_GBM)
 class LightGBMTrainer(BaseTrainer):
     TRAIN_KEY = "train"
     VALID_KEY = "validation"
@@ -854,7 +854,7 @@ def _map_to_lgb_ray_params(params: Dict[str, Any]) -> Dict[str, Any]:
     return ray_params
 
 
-@register_ray_trainer("lightgbm_trainer", MODEL_GBM, default=True)
+@register_ray_trainer(MODEL_GBM)
 class LightGBMRayTrainer(LightGBMTrainer):
     def __init__(
         self,
diff --git a/tests/integration_tests/test_gbm.py b/tests/integration_tests/test_gbm.py
index e1c17f24206..6e6a00e1572 100644
--- a/tests/integration_tests/test_gbm.py
+++ b/tests/integration_tests/test_gbm.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pytest
 import torch
-from marshmallow import ValidationError
 
 from ludwig.api import LudwigModel
 from ludwig.constants import INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES, TRAINER
@@ -201,35 +200,6 @@ def test_ray_gbm_number(tmpdir, ray_backend, ray_cluster_4cpu):
     run_test_gbm_number(tmpdir, ray_backend)
 
 
-def run_test_gbm_schema(backend_config):
-    input_features = [number_feature()]
-    output_features = [binary_feature()]
-
-    # When I pass an invalid trainer configuration,
-    invalid_trainer = "trainer"
-    config = {
-        MODEL_TYPE: "gbm",
-        "input_features": input_features,
-        "output_features": output_features,
-        TRAINER: {
-            "num_boost_round": 2,
-            "type": invalid_trainer,
-        },
-    }
-    with pytest.raises(ValidationError):
-        # Then I should get a schema validation error
-        LudwigModel(config, backend=backend_config)
-
-
-def test_local_gbm_schema(local_backend):
-    run_test_gbm_schema(local_backend)
-
-
-@pytest.mark.distributed
-def test_ray_gbm_schema(ray_backend, ray_cluster_4cpu):
-    run_test_gbm_schema(ray_backend)
-
-
 def test_hummingbird_conversion_binary(tmpdir, local_backend):
     input_features = [number_feature(), category_feature(encoder={"reduce_output": "sum"})]
     output_features = [binary_feature()]
diff --git a/tests/ludwig/schema/test_validate_config_misc.py b/tests/ludwig/schema/test_validate_config_misc.py
index 42f8ac5c797..14e702acc3e 100644
--- a/tests/ludwig/schema/test_validate_config_misc.py
+++ b/tests/ludwig/schema/test_validate_config_misc.py
@@ -1,7 +1,7 @@
 import pytest
 from jsonschema.exceptions import ValidationError
 
-from ludwig.constants import DECODER, ENCODER, LOSS, PREPROCESSING, TRAINER
+from ludwig.constants import DECODER, ENCODER, LOSS, MODEL_ECD, MODEL_GBM, MODEL_TYPE, PREPROCESSING, TRAINER
 from ludwig.features.audio_feature import AudioFeatureMixin
 from ludwig.features.bag_feature import BagFeatureMixin
 from ludwig.features.binary_feature import BinaryFeatureMixin
@@ -300,3 +300,34 @@ def test_validate_defaults_schema():
     }
 
     validate_config(config)
+
+
+def test_validate_no_trainer_type():
+    config = {
+        "model_type": "ecd",
+        "input_features": [
+            category_feature(),
+            number_feature(),
+        ],
+        "output_features": [category_feature()],
+        "trainer": {"learning_rate": "auto", "batch_size": "auto"},
+    }
+
+    # Ensure validation succeeds with ECD trainer params and ECD model type
+    validate_config(config)
+
+    # Ensure validation fails with ECD trainer params and GBM model type
+    config[MODEL_TYPE] = MODEL_GBM
+    with pytest.raises(ValidationError):
+        validate_config(config)
+
+    # Switch to trainer with valid GBM params
+    config[TRAINER] = {"tree_learner": "serial"}
+
+    # Ensure validation succeeds with GBM trainer params and GBM model type
+    validate_config(config)
+
+    # Ensure validation fails with GBM trainer params and ECD model type
+    config[MODEL_TYPE] = MODEL_ECD
+    with pytest.raises(ValidationError):
+        validate_config(config)
diff --git a/tests/ludwig/utils/test_defaults.py b/tests/ludwig/utils/test_defaults.py
index 96856117776..3b464dc6653 100644
--- a/tests/ludwig/utils/test_defaults.py
+++ b/tests/ludwig/utils/test_defaults.py
@@ -1,7 +1,6 @@
 import copy
 
 import pytest
-from marshmallow import ValidationError
 
 from ludwig.constants import (
     CATEGORY,
@@ -17,7 +16,6 @@
     MAX_POSSIBLE_BATCH_SIZE,
     MISSING_VALUE_STRATEGY,
     MODEL_ECD,
-    MODEL_GBM,
     MODEL_TYPE,
     OUTPUT_FEATURES,
     PREPROCESSING,
@@ -149,56 +147,6 @@ def test_default_model_type():
     assert merged_config[MODEL_TYPE] == MODEL_ECD
 
 
-@pytest.mark.parametrize(
-    "model_trainer_type",
-    [
-        (MODEL_ECD, "trainer"),
-        (MODEL_GBM, "lightgbm_trainer"),
-    ],
-)
-def test_default_trainer_type(model_trainer_type):
-    model_type, expected_trainer_type = model_trainer_type
-    config = {
-        INPUT_FEATURES: [category_feature()],
-        OUTPUT_FEATURES: [category_feature()],
-        MODEL_TYPE: model_type,
-    }
-
-    merged_config = merge_with_defaults(config)
-
-    assert merged_config[TRAINER][TYPE] == expected_trainer_type
-
-
-def test_overwrite_trainer_type():
-    expected_trainer_type = "ray_legacy_trainer"
-    config = {
-        INPUT_FEATURES: [category_feature()],
-        OUTPUT_FEATURES: [category_feature()],
-        MODEL_TYPE: MODEL_ECD,
-        "trainer": {"type": expected_trainer_type},
-    }
-
-    merged_config = merge_with_defaults(config)
-
-    assert merged_config[TRAINER][TYPE] == expected_trainer_type
-
-
-@pytest.mark.parametrize(
-    "model_type",
-    [MODEL_ECD, MODEL_GBM],
-)
-def test_invalid_trainer_type(model_type):
-    config = {
-        INPUT_FEATURES: [category_feature()],
-        OUTPUT_FEATURES: [category_feature()],
-        MODEL_TYPE: model_type,
-        "trainer": {"type": "invalid_trainer"},
-    }
-
-    with pytest.raises(ValidationError):
-        merge_with_defaults(config)
-
-
 def test_set_default_values():
     config = {
         INPUT_FEATURES: [number_feature(encoder={"max_sequence_length": 10})],
@@ -416,7 +364,6 @@ def test_merge_with_defaults():
             "search_alg": {"type": "variant_generator"},
         },
         "trainer": {
-            "type": "trainer",
             "learning_rate": 0.001,
             "validation_metric": "loss",
             "validation_field": "combined",

From 8aabd540d3fe457b55fe891fe464bda8500bbe1d Mon Sep 17 00:00:00 2001
From: abidwael <103003638+abidwael@users.noreply.github.com>
Date: Fri, 14 Oct 2022 15:56:36 -0700
Subject: [PATCH 23/29] Model performace in GitHub actions (#2568)

* testing runner

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix yaml

* benchmark datasets and assert their performance

* formatting

* logging resource usage

* logging resource usage

* test non kaggle datasets

* add kaggle to env

* fix for collection of unnecessary tests

* add expected results

* second run on github runner to stabilize expeceted performance

* add resource usage and final expected performance and runtime values

* fix asserts

* updated workflow to run on merge to master

* add `not benchmark` marker

* excluding benchmark

* separating to new file

* remove benchmark pytest from pytest file

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* clean dependencies

* clean dependencies

* remove unused `process_config.py`

* not running benchmark in main test

* add gbm configs

* add gbm configs

* debug: record gbm performance

* debug: printing ecd and gbm performance

* tracking scores training mutliple times

* formatting

* using ray backend instead of local

* cleaning workflow file

* printing ray backend performance

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* using jsons assert

* debug runner performance

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* debugging

* using dataclasses

* workflow yaml cleanup

* adjust tolerated range

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* changing gh action to run on push to master

* address nits

* addressing comments

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* address comments

* change to push to master

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/pytest.yml                  |   5 +-
 .github/workflows/pytest_benchmarks.yml       | 105 +++
 .../configs/adult_census_income.ecd.yaml      |  46 ++
 .../configs/adult_census_income.gbm.yaml      |  55 ++
 .../benchmark/configs/ames_housing.ecd.yaml   | 175 ++++
 .../benchmark/configs/ames_housing.gbm.yaml   | 185 +++++
 .../configs/mercedes_benz_greener.ecd.yaml    | 769 +++++++++++++++++
 .../configs/mercedes_benz_greener.gbm.yaml    | 779 ++++++++++++++++++
 .../benchmark/configs/sarcos.ecd.yaml         | 100 +++
 .../benchmark/configs/sarcos.gbm.yaml         | 109 +++
 .../benchmark/expected_metric.py              |  20 +
 .../adult_census_income.ecd.yaml              |   5 +
 .../adult_census_income.gbm.yaml              |   5 +
 .../expected_metrics/ames_housing.ecd.yaml    |   5 +
 .../expected_metrics/ames_housing.gbm.yaml    |   5 +
 .../mercedes_benz_greener.ecd.yaml            |   5 +
 .../mercedes_benz_greener.gbm.yaml            |   5 +
 .../expected_metrics/sarcos.ecd.yaml          |   5 +
 .../expected_metrics/sarcos.gbm.yaml          |   5 +
 .../benchmark/test_model_performance.py       |  62 ++
 20 files changed, 2449 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/pytest_benchmarks.yml
 create mode 100644 tests/regression_tests/benchmark/configs/adult_census_income.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/adult_census_income.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/ames_housing.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/ames_housing.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/mercedes_benz_greener.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/mercedes_benz_greener.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/sarcos.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/configs/sarcos.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metric.py
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/adult_census_income.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/adult_census_income.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/ames_housing.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/ames_housing.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/sarcos.ecd.yaml
 create mode 100644 tests/regression_tests/benchmark/expected_metrics/sarcos.gbm.yaml
 create mode 100644 tests/regression_tests/benchmark/test_model_performance.py

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 327c4a58b8e..678a9bb4ab4 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -44,6 +44,9 @@ jobs:
       NEUROPOD_VERISON: "0.3.0-rc6"
       TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
       RAY_VERSION: ${{ matrix.ray-version }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
+      EXCLUDED_MARKERS: "benchmark"
 
     name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}
     services:
@@ -162,7 +165,7 @@ jobs:
 
       - name: Tests
         run: |
-          RUN_PRIVATE=1 LUDWIG_TEST_SUITE_TIMEOUT_S=3600 pytest -v --timeout 300 --durations 100 -m "$MARKERS" --junitxml pytest.xml tests
+          RUN_PRIVATE=1 LUDWIG_TEST_SUITE_TIMEOUT_S=3600 pytest -v --timeout 300 --durations 100 -m "($MARKERS) and (not $EXCLUDED_MARKERS)" --junitxml pytest.xml tests
 
       - name: Upload Unit Test Results
         if: always()
diff --git a/.github/workflows/pytest_benchmarks.yml b/.github/workflows/pytest_benchmarks.yml
new file mode 100644
index 00000000000..10d2cdf364c
--- /dev/null
+++ b/.github/workflows/pytest_benchmarks.yml
@@ -0,0 +1,105 @@
+# This workflow will install Python dependencies, and run benchmarks on a few datasets
+# to test model performance regressions.
+
+name: benchmark
+
+on:
+  push:
+    branches: ["master", "release-*"]
+
+# we want an ongoing run of this workflow to be canceled by a later commit
+# so that there is only one concurrent run of this workflow for each branch
+concurrency:
+  group: benchmark-${{ github.head_ref || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  benchmark-pytest:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [3.8]
+        test-markers: ["benchmark"]
+        include:
+          - python-version: 3.8
+            pytorch-version: 1.12.1
+            torchscript-version: 1.10.2
+            ray-version: 2.0.0
+    env:
+      PYTORCH: ${{ matrix.pytorch-version }}
+      MARKERS: ${{ matrix.test-markers }}
+      TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
+      RAY_VERSION: ${{ matrix.ray-version }}
+
+    name: py${{ matrix.python-version  }}, torch-${{ matrix.pytorch-version }}, backend-ray-${{ matrix.ray-version }}
+
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get install -y cmake libsndfile1
+
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
+
+      - name: pip cache
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-
+
+      - name: Install dependencies
+        env:
+          HOROVOD_WITH_PYTORCH: 1
+          HOROVOD_WITHOUT_MPI: 1
+          HOROVOD_WITHOUT_TENSORFLOW: 1
+          HOROVOD_WITHOUT_MXNET: 1
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+          cmake --version
+
+          # skip installation of some requirements
+          echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
+          echo > requirements-temp && mv requirements-temp requirements_serve.txt
+          echo > requirements-temp && mv requirements-temp requirements_viz.txt
+          echo > requirements-temp && mv requirements-temp requirements_explain.txt
+
+          extra_index_url=https://download.pytorch.org/whl/cpu
+          pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
+          pip install ray==$RAY_VERSION
+          ray_expected=$(python -c "import ray; print(ray.__version__)")
+          torch_expected=$(python -c "import torch; print(torch.__version__)")
+          pip install '.[test]' --extra-index-url $extra_index_url
+          pip list
+          python -c "import torch; assert torch.__version__ == \"$torch_expected\", f\"torch {torch.__version__} != $torch_expected\""
+          python -c "import ray; assert ray.__version__ == \"$ray_expected\", f\"ray {ray.__version__} != $ray_expected\""
+        shell: bash
+
+      - name: Tests
+        env:
+          KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
+          KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
+        run: |
+          pytest -s -v -m "$MARKERS" --junitxml pytest.xml tests/regression_tests/benchmark
+
+      - name: Upload Unit Test Results
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
+          path: pytest.xml
diff --git a/tests/regression_tests/benchmark/configs/adult_census_income.ecd.yaml b/tests/regression_tests/benchmark/configs/adult_census_income.ecd.yaml
new file mode 100644
index 00000000000..3b96d88f25a
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/adult_census_income.ecd.yaml
@@ -0,0 +1,46 @@
+combiner:
+  type: tabnet
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_const
+      normalization: null
+input_features:
+- name: age
+  type: number
+- name: workclass
+  type: category
+- name: fnlwgt
+  type: number
+- name: education
+  type: category
+- name: education-num
+  type: number
+- name: marital-status
+  type: category
+- name: occupation
+  type: category
+- name: relationship
+  type: category
+- name: race
+  type: category
+- name: sex
+  type: category
+- name: capital-gain
+  type: number
+- name: capital-loss
+  type: number
+- name: hours-per-week
+  type: number
+- name: native-country
+  type: category
+output_features:
+- name: income
+  type: category
+trainer:
+  batch_size: 1345
+  decay: false
+  decay_rate: 0.629931
+  eval_batch_size: 16384
+  evaluate_training_set: false
+  learning_rate: 0.02714507227517137
diff --git a/tests/regression_tests/benchmark/configs/adult_census_income.gbm.yaml b/tests/regression_tests/benchmark/configs/adult_census_income.gbm.yaml
new file mode 100644
index 00000000000..13301ba34a0
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/adult_census_income.gbm.yaml
@@ -0,0 +1,55 @@
+input_features:
+- name: age
+  type: number
+- name: workclass
+  type: category
+- name: fnlwgt
+  type: number
+- name: education
+  type: category
+- name: education-num
+  type: number
+- name: marital-status
+  type: category
+- name: occupation
+  type: category
+- name: relationship
+  type: category
+- name: race
+  type: category
+- name: sex
+  type: category
+- name: capital-gain
+  type: number
+- name: capital-loss
+  type: number
+- name: hours-per-week
+  type: number
+- name: native-country
+  type: category
+output_features:
+- name: income
+  type: category
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_const
+      normalization: zscore
+trainer:
+  bagging_fraction: 0.33531469598825486
+  bagging_freq: 4
+  feature_fraction: 0.010508115166351847
+  lambda_l1: 5.709515289211761e-05
+  lambda_l2: 8.088926391042813e-05
+  learning_rate: 0.09523232434861406
+  max_bin: 22575
+  max_depth: 204
+  min_data_in_leaf: 26
+  min_gain_to_split: 0.2038774631952418
+  min_sum_hessian_in_leaf: 6
+  num_boost_round: 618
+  num_leaves: 92
+  early_stop: 5
+  eval_batch_size: 16384
+  evaluate_training_set: false
+type: gbm
diff --git a/tests/regression_tests/benchmark/configs/ames_housing.ecd.yaml b/tests/regression_tests/benchmark/configs/ames_housing.ecd.yaml
new file mode 100644
index 00000000000..969c58a5ed2
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/ames_housing.ecd.yaml
@@ -0,0 +1,175 @@
+combiner:
+  type: tabnet
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_mean
+      normalization: null
+input_features:
+- name: MSSubClass
+  type: category
+- name: MSZoning
+  type: category
+- name: LotFrontage
+  type: number
+- name: LotArea
+  type: number
+- name: Street
+  type: category
+- name: Alley
+  type: category
+- name: LotShape
+  type: category
+- name: LandContour
+  type: category
+- name: Utilities
+  type: category
+- name: LotConfig
+  type: category
+- name: LandSlope
+  type: category
+- name: Neighborhood
+  type: category
+- name: Condition1
+  type: category
+- name: Condition2
+  type: category
+- name: BldgType
+  type: category
+- name: HouseStyle
+  type: category
+- name: OverallQual
+  type: category
+- name: OverallCond
+  type: category
+- name: YearBuilt
+  type: number
+- name: YearRemodAdd
+  type: number
+- name: RoofStyle
+  type: category
+- name: RoofMatl
+  type: category
+- name: Exterior1st
+  type: category
+- name: Exterior2nd
+  type: category
+- name: MasVnrType
+  type: category
+- name: MasVnrArea
+  type: number
+- name: ExterQual
+  type: category
+- name: ExterCond
+  type: category
+- name: Foundation
+  type: category
+- name: BsmtQual
+  type: category
+- name: BsmtCond
+  type: category
+- name: BsmtExposure
+  type: category
+- name: BsmtFinType1
+  type: category
+- name: BsmtFinSF1
+  type: number
+- name: BsmtFinType2
+  type: category
+- name: BsmtFinSF2
+  type: number
+- name: BsmtUnfSF
+  type: number
+- name: TotalBsmtSF
+  type: number
+- name: Heating
+  type: category
+- name: HeatingQC
+  type: category
+- name: CentralAir
+  type: binary
+- name: Electrical
+  type: category
+- name: 1stFlrSF
+  type: number
+- name: 2ndFlrSF
+  type: number
+- name: LowQualFinSF
+  type: number
+- name: GrLivArea
+  type: number
+- name: BsmtFullBath
+  type: number
+- name: BsmtHalfBath
+  type: number
+- name: FullBath
+  type: number
+- name: HalfBath
+  type: number
+- name: BedroomAbvGr
+  type: number
+- name: KitchenAbvGr
+  type: number
+- name: KitchenQual
+  type: category
+- name: TotRmsAbvGrd
+  type: number
+- name: Functional
+  type: category
+- name: Fireplaces
+  type: number
+- name: FireplaceQu
+  type: category
+- name: GarageType
+  type: category
+- name: GarageYrBlt
+  type: number
+- name: GarageFinish
+  type: category
+- name: GarageCars
+  type: number
+- name: GarageArea
+  type: number
+- name: GarageQual
+  type: category
+- name: GarageCond
+  type: category
+- name: PavedDrive
+  type: category
+- name: WoodDeckSF
+  type: number
+- name: OpenPorchSF
+  type: number
+- name: EnclosedPorch
+  type: number
+- name: 3SsnPorch
+  type: number
+- name: ScreenPorch
+  type: number
+- name: PoolArea
+  type: number
+- name: PoolQC
+  type: category
+- name: Fence
+  type: category
+- name: MiscFeature
+  type: category
+- name: MiscVal
+  type: number
+- name: MoSold
+  type: category
+- name: YrSold
+  type: number
+- name: SaleType
+  type: category
+- name: SaleCondition
+  type: category
+output_features:
+- name: SalePrice
+  type: number
+trainer:
+  batch_size: 35
+  decay: false
+  eval_batch_size: 16384
+  evaluate_training_set: false
+  learning_rate: 0.0858479746528337
diff --git a/tests/regression_tests/benchmark/configs/ames_housing.gbm.yaml b/tests/regression_tests/benchmark/configs/ames_housing.gbm.yaml
new file mode 100644
index 00000000000..911b7d38ab3
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/ames_housing.gbm.yaml
@@ -0,0 +1,185 @@
+input_features:
+- name: MSSubClass
+  type: category
+- name: MSZoning
+  type: category
+- name: LotFrontage
+  type: number
+- name: LotArea
+  type: number
+- name: Street
+  type: category
+- name: Alley
+  type: category
+- name: LotShape
+  type: category
+- name: LandContour
+  type: category
+- name: Utilities
+  type: category
+- name: LotConfig
+  type: category
+- name: LandSlope
+  type: category
+- name: Neighborhood
+  type: category
+- name: Condition1
+  type: category
+- name: Condition2
+  type: category
+- name: BldgType
+  type: category
+- name: HouseStyle
+  type: category
+- name: OverallQual
+  type: category
+- name: OverallCond
+  type: category
+- name: YearBuilt
+  type: number
+- name: YearRemodAdd
+  type: number
+- name: RoofStyle
+  type: category
+- name: RoofMatl
+  type: category
+- name: Exterior1st
+  type: category
+- name: Exterior2nd
+  type: category
+- name: MasVnrType
+  type: category
+- name: MasVnrArea
+  type: number
+- name: ExterQual
+  type: category
+- name: ExterCond
+  type: category
+- name: Foundation
+  type: category
+- name: BsmtQual
+  type: category
+- name: BsmtCond
+  type: category
+- name: BsmtExposure
+  type: category
+- name: BsmtFinType1
+  type: category
+- name: BsmtFinSF1
+  type: number
+- name: BsmtFinType2
+  type: category
+- name: BsmtFinSF2
+  type: number
+- name: BsmtUnfSF
+  type: number
+- name: TotalBsmtSF
+  type: number
+- name: Heating
+  type: category
+- name: HeatingQC
+  type: category
+- name: CentralAir
+  type: binary
+- name: Electrical
+  type: category
+- name: 1stFlrSF
+  type: number
+- name: 2ndFlrSF
+  type: number
+- name: LowQualFinSF
+  type: number
+- name: GrLivArea
+  type: number
+- name: BsmtFullBath
+  type: number
+- name: BsmtHalfBath
+  type: number
+- name: FullBath
+  type: number
+- name: HalfBath
+  type: number
+- name: BedroomAbvGr
+  type: number
+- name: KitchenAbvGr
+  type: number
+- name: KitchenQual
+  type: category
+- name: TotRmsAbvGrd
+  type: number
+- name: Functional
+  type: category
+- name: Fireplaces
+  type: number
+- name: FireplaceQu
+  type: category
+- name: GarageType
+  type: category
+- name: GarageYrBlt
+  type: number
+- name: GarageFinish
+  type: category
+- name: GarageCars
+  type: number
+- name: GarageArea
+  type: number
+- name: GarageQual
+  type: category
+- name: GarageCond
+  type: category
+- name: PavedDrive
+  type: category
+- name: WoodDeckSF
+  type: number
+- name: OpenPorchSF
+  type: number
+- name: EnclosedPorch
+  type: number
+- name: 3SsnPorch
+  type: number
+- name: ScreenPorch
+  type: number
+- name: PoolArea
+  type: number
+- name: PoolQC
+  type: category
+- name: Fence
+  type: category
+- name: MiscFeature
+  type: category
+- name: MiscVal
+  type: number
+- name: MoSold
+  type: category
+- name: YrSold
+  type: number
+- name: SaleType
+  type: category
+- name: SaleCondition
+  type: category
+output_features:
+- name: SalePrice
+  type: number
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_const
+      normalization: null
+trainer:
+  bagging_fraction: 0.685852778578485
+  bagging_freq: 1
+  feature_fraction: 0.3746931879710017
+  lambda_l1: 6.528288618183996e-05
+  lambda_l2: 1.1682484449663545e-06
+  learning_rate: 0.813415893329508
+  max_bin: 48245
+  max_depth: 187
+  min_data_in_leaf: 100
+  min_gain_to_split: 0.19590801306610622
+  min_sum_hessian_in_leaf: 1
+  num_boost_round: 9431
+  num_leaves: 86
+  early_stop: 5
+  eval_batch_size: 16384
+  evaluate_training_set: false
+type: gbm
diff --git a/tests/regression_tests/benchmark/configs/mercedes_benz_greener.ecd.yaml b/tests/regression_tests/benchmark/configs/mercedes_benz_greener.ecd.yaml
new file mode 100644
index 00000000000..08e4a0c9c90
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/mercedes_benz_greener.ecd.yaml
@@ -0,0 +1,769 @@
+output_features:
+- name: y
+  type: number
+input_features:
+- name: X0
+  type: category
+- name: X1
+  type: category
+- name: X2
+  type: category
+- name: X3
+  type: category
+- name: X4
+  type: category
+- name: X5
+  type: category
+- name: X6
+  type: category
+- name: X8
+  type: category
+- name: X10
+  type: binary
+- name: X11
+  type: binary
+- name: X12
+  type: binary
+- name: X13
+  type: binary
+- name: X14
+  type: binary
+- name: X15
+  type: binary
+- name: X16
+  type: binary
+- name: X17
+  type: binary
+- name: X18
+  type: binary
+- name: X19
+  type: binary
+- name: X20
+  type: binary
+- name: X21
+  type: binary
+- name: X22
+  type: binary
+- name: X23
+  type: binary
+- name: X24
+  type: binary
+- name: X26
+  type: binary
+- name: X27
+  type: binary
+- name: X28
+  type: binary
+- name: X29
+  type: binary
+- name: X30
+  type: binary
+- name: X31
+  type: binary
+- name: X32
+  type: binary
+- name: X33
+  type: binary
+- name: X34
+  type: binary
+- name: X35
+  type: binary
+- name: X36
+  type: binary
+- name: X37
+  type: binary
+- name: X38
+  type: binary
+- name: X39
+  type: binary
+- name: X40
+  type: binary
+- name: X41
+  type: binary
+- name: X42
+  type: binary
+- name: X43
+  type: binary
+- name: X44
+  type: binary
+- name: X45
+  type: binary
+- name: X46
+  type: binary
+- name: X47
+  type: binary
+- name: X48
+  type: binary
+- name: X49
+  type: binary
+- name: X50
+  type: binary
+- name: X51
+  type: binary
+- name: X52
+  type: binary
+- name: X53
+  type: binary
+- name: X54
+  type: binary
+- name: X55
+  type: binary
+- name: X56
+  type: binary
+- name: X57
+  type: binary
+- name: X58
+  type: binary
+- name: X59
+  type: binary
+- name: X60
+  type: binary
+- name: X61
+  type: binary
+- name: X62
+  type: binary
+- name: X63
+  type: binary
+- name: X64
+  type: binary
+- name: X65
+  type: binary
+- name: X66
+  type: binary
+- name: X67
+  type: binary
+- name: X68
+  type: binary
+- name: X69
+  type: binary
+- name: X70
+  type: binary
+- name: X71
+  type: binary
+- name: X73
+  type: binary
+- name: X74
+  type: binary
+- name: X75
+  type: binary
+- name: X76
+  type: binary
+- name: X77
+  type: binary
+- name: X78
+  type: binary
+- name: X79
+  type: binary
+- name: X80
+  type: binary
+- name: X81
+  type: binary
+- name: X82
+  type: binary
+- name: X83
+  type: binary
+- name: X84
+  type: binary
+- name: X85
+  type: binary
+- name: X86
+  type: binary
+- name: X87
+  type: binary
+- name: X88
+  type: binary
+- name: X89
+  type: binary
+- name: X90
+  type: binary
+- name: X91
+  type: binary
+- name: X92
+  type: binary
+- name: X93
+  type: binary
+- name: X94
+  type: binary
+- name: X95
+  type: binary
+- name: X96
+  type: binary
+- name: X97
+  type: binary
+- name: X98
+  type: binary
+- name: X99
+  type: binary
+- name: X100
+  type: binary
+- name: X101
+  type: binary
+- name: X102
+  type: binary
+- name: X103
+  type: binary
+- name: X104
+  type: binary
+- name: X105
+  type: binary
+- name: X106
+  type: binary
+- name: X107
+  type: binary
+- name: X108
+  type: binary
+- name: X109
+  type: binary
+- name: X110
+  type: binary
+- name: X111
+  type: binary
+- name: X112
+  type: binary
+- name: X113
+  type: binary
+- name: X114
+  type: binary
+- name: X115
+  type: binary
+- name: X116
+  type: binary
+- name: X117
+  type: binary
+- name: X118
+  type: binary
+- name: X119
+  type: binary
+- name: X120
+  type: binary
+- name: X122
+  type: binary
+- name: X123
+  type: binary
+- name: X124
+  type: binary
+- name: X125
+  type: binary
+- name: X126
+  type: binary
+- name: X127
+  type: binary
+- name: X128
+  type: binary
+- name: X129
+  type: binary
+- name: X130
+  type: binary
+- name: X131
+  type: binary
+- name: X132
+  type: binary
+- name: X133
+  type: binary
+- name: X134
+  type: binary
+- name: X135
+  type: binary
+- name: X136
+  type: binary
+- name: X137
+  type: binary
+- name: X138
+  type: binary
+- name: X139
+  type: binary
+- name: X140
+  type: binary
+- name: X141
+  type: binary
+- name: X142
+  type: binary
+- name: X143
+  type: binary
+- name: X144
+  type: binary
+- name: X145
+  type: binary
+- name: X146
+  type: binary
+- name: X147
+  type: binary
+- name: X148
+  type: binary
+- name: X150
+  type: binary
+- name: X151
+  type: binary
+- name: X152
+  type: binary
+- name: X153
+  type: binary
+- name: X154
+  type: binary
+- name: X155
+  type: binary
+- name: X156
+  type: binary
+- name: X157
+  type: binary
+- name: X158
+  type: binary
+- name: X159
+  type: binary
+- name: X160
+  type: binary
+- name: X161
+  type: binary
+- name: X162
+  type: binary
+- name: X163
+  type: binary
+- name: X164
+  type: binary
+- name: X165
+  type: binary
+- name: X166
+  type: binary
+- name: X167
+  type: binary
+- name: X168
+  type: binary
+- name: X169
+  type: binary
+- name: X170
+  type: binary
+- name: X171
+  type: binary
+- name: X172
+  type: binary
+- name: X173
+  type: binary
+- name: X174
+  type: binary
+- name: X175
+  type: binary
+- name: X176
+  type: binary
+- name: X177
+  type: binary
+- name: X178
+  type: binary
+- name: X179
+  type: binary
+- name: X180
+  type: binary
+- name: X181
+  type: binary
+- name: X182
+  type: binary
+- name: X183
+  type: binary
+- name: X184
+  type: binary
+- name: X185
+  type: binary
+- name: X186
+  type: binary
+- name: X187
+  type: binary
+- name: X189
+  type: binary
+- name: X190
+  type: binary
+- name: X191
+  type: binary
+- name: X192
+  type: binary
+- name: X194
+  type: binary
+- name: X195
+  type: binary
+- name: X196
+  type: binary
+- name: X197
+  type: binary
+- name: X198
+  type: binary
+- name: X199
+  type: binary
+- name: X200
+  type: binary
+- name: X201
+  type: binary
+- name: X202
+  type: binary
+- name: X203
+  type: binary
+- name: X204
+  type: binary
+- name: X205
+  type: binary
+- name: X206
+  type: binary
+- name: X207
+  type: binary
+- name: X208
+  type: binary
+- name: X209
+  type: binary
+- name: X210
+  type: binary
+- name: X211
+  type: binary
+- name: X212
+  type: binary
+- name: X213
+  type: binary
+- name: X214
+  type: binary
+- name: X215
+  type: binary
+- name: X216
+  type: binary
+- name: X217
+  type: binary
+- name: X218
+  type: binary
+- name: X219
+  type: binary
+- name: X220
+  type: binary
+- name: X221
+  type: binary
+- name: X222
+  type: binary
+- name: X223
+  type: binary
+- name: X224
+  type: binary
+- name: X225
+  type: binary
+- name: X226
+  type: binary
+- name: X227
+  type: binary
+- name: X228
+  type: binary
+- name: X229
+  type: binary
+- name: X230
+  type: binary
+- name: X231
+  type: binary
+- name: X232
+  type: binary
+- name: X233
+  type: binary
+- name: X234
+  type: binary
+- name: X235
+  type: binary
+- name: X236
+  type: binary
+- name: X237
+  type: binary
+- name: X238
+  type: binary
+- name: X239
+  type: binary
+- name: X240
+  type: binary
+- name: X241
+  type: binary
+- name: X242
+  type: binary
+- name: X243
+  type: binary
+- name: X244
+  type: binary
+- name: X245
+  type: binary
+- name: X246
+  type: binary
+- name: X247
+  type: binary
+- name: X248
+  type: binary
+- name: X249
+  type: binary
+- name: X250
+  type: binary
+- name: X251
+  type: binary
+- name: X252
+  type: binary
+- name: X253
+  type: binary
+- name: X254
+  type: binary
+- name: X255
+  type: binary
+- name: X256
+  type: binary
+- name: X257
+  type: binary
+- name: X258
+  type: binary
+- name: X259
+  type: binary
+- name: X260
+  type: binary
+- name: X261
+  type: binary
+- name: X262
+  type: binary
+- name: X263
+  type: binary
+- name: X264
+  type: binary
+- name: X265
+  type: binary
+- name: X266
+  type: binary
+- name: X267
+  type: binary
+- name: X268
+  type: binary
+- name: X269
+  type: binary
+- name: X270
+  type: binary
+- name: X271
+  type: binary
+- name: X272
+  type: binary
+- name: X273
+  type: binary
+- name: X274
+  type: binary
+- name: X275
+  type: binary
+- name: X276
+  type: binary
+- name: X277
+  type: binary
+- name: X278
+  type: binary
+- name: X279
+  type: binary
+- name: X280
+  type: binary
+- name: X281
+  type: binary
+- name: X282
+  type: binary
+- name: X283
+  type: binary
+- name: X284
+  type: binary
+- name: X285
+  type: binary
+- name: X286
+  type: binary
+- name: X287
+  type: binary
+- name: X288
+  type: binary
+- name: X289
+  type: binary
+- name: X290
+  type: binary
+- name: X291
+  type: binary
+- name: X292
+  type: binary
+- name: X293
+  type: binary
+- name: X294
+  type: binary
+- name: X295
+  type: binary
+- name: X296
+  type: binary
+- name: X297
+  type: binary
+- name: X298
+  type: binary
+- name: X299
+  type: binary
+- name: X300
+  type: binary
+- name: X301
+  type: binary
+- name: X302
+  type: binary
+- name: X304
+  type: binary
+- name: X305
+  type: binary
+- name: X306
+  type: binary
+- name: X307
+  type: binary
+- name: X308
+  type: binary
+- name: X309
+  type: binary
+- name: X310
+  type: binary
+- name: X311
+  type: binary
+- name: X312
+  type: binary
+- name: X313
+  type: binary
+- name: X314
+  type: binary
+- name: X315
+  type: binary
+- name: X316
+  type: binary
+- name: X317
+  type: binary
+- name: X318
+  type: binary
+- name: X319
+  type: binary
+- name: X320
+  type: binary
+- name: X321
+  type: binary
+- name: X322
+  type: binary
+- name: X323
+  type: binary
+- name: X324
+  type: binary
+- name: X325
+  type: binary
+- name: X326
+  type: binary
+- name: X327
+  type: binary
+- name: X328
+  type: binary
+- name: X329
+  type: binary
+- name: X330
+  type: binary
+- name: X331
+  type: binary
+- name: X332
+  type: binary
+- name: X333
+  type: binary
+- name: X334
+  type: binary
+- name: X335
+  type: binary
+- name: X336
+  type: binary
+- name: X337
+  type: binary
+- name: X338
+  type: binary
+- name: X339
+  type: binary
+- name: X340
+  type: binary
+- name: X341
+  type: binary
+- name: X342
+  type: binary
+- name: X343
+  type: binary
+- name: X344
+  type: binary
+- name: X345
+  type: binary
+- name: X346
+  type: binary
+- name: X347
+  type: binary
+- name: X348
+  type: binary
+- name: X349
+  type: binary
+- name: X350
+  type: binary
+- name: X351
+  type: binary
+- name: X352
+  type: binary
+- name: X353
+  type: binary
+- name: X354
+  type: binary
+- name: X355
+  type: binary
+- name: X356
+  type: binary
+- name: X357
+  type: binary
+- name: X358
+  type: binary
+- name: X359
+  type: binary
+- name: X360
+  type: binary
+- name: X361
+  type: binary
+- name: X362
+  type: binary
+- name: X363
+  type: binary
+- name: X364
+  type: binary
+- name: X365
+  type: binary
+- name: X366
+  type: binary
+- name: X367
+  type: binary
+- name: X368
+  type: binary
+- name: X369
+  type: binary
+- name: X370
+  type: binary
+- name: X371
+  type: binary
+- name: X372
+  type: binary
+- name: X373
+  type: binary
+- name: X374
+  type: binary
+- name: X375
+  type: binary
+- name: X376
+  type: binary
+- name: X377
+  type: binary
+- name: X378
+  type: binary
+- name: X379
+  type: binary
+- name: X380
+  type: binary
+- name: X382
+  type: binary
+- name: X383
+  type: binary
+- name: X384
+  type: binary
+- name: X385
+  type: binary
+combiner:
+  type: tabnet
+trainer:
+  eval_batch_size: 16384
+  evaluate_training_set: false
+  learning_rate: 0.02465493752015043
+  batch_size: 33
+  decay: false
+defaults:
+  number:
+    preprocessing:
+      normalization: null
+      missing_value_strategy: fill_with_const
diff --git a/tests/regression_tests/benchmark/configs/mercedes_benz_greener.gbm.yaml b/tests/regression_tests/benchmark/configs/mercedes_benz_greener.gbm.yaml
new file mode 100644
index 00000000000..f6237351043
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/mercedes_benz_greener.gbm.yaml
@@ -0,0 +1,779 @@
+input_features:
+- name: X0
+  type: category
+- name: X1
+  type: category
+- name: X2
+  type: category
+- name: X3
+  type: category
+- name: X4
+  type: category
+- name: X5
+  type: category
+- name: X6
+  type: category
+- name: X8
+  type: category
+- name: X10
+  type: binary
+- name: X11
+  type: binary
+- name: X12
+  type: binary
+- name: X13
+  type: binary
+- name: X14
+  type: binary
+- name: X15
+  type: binary
+- name: X16
+  type: binary
+- name: X17
+  type: binary
+- name: X18
+  type: binary
+- name: X19
+  type: binary
+- name: X20
+  type: binary
+- name: X21
+  type: binary
+- name: X22
+  type: binary
+- name: X23
+  type: binary
+- name: X24
+  type: binary
+- name: X26
+  type: binary
+- name: X27
+  type: binary
+- name: X28
+  type: binary
+- name: X29
+  type: binary
+- name: X30
+  type: binary
+- name: X31
+  type: binary
+- name: X32
+  type: binary
+- name: X33
+  type: binary
+- name: X34
+  type: binary
+- name: X35
+  type: binary
+- name: X36
+  type: binary
+- name: X37
+  type: binary
+- name: X38
+  type: binary
+- name: X39
+  type: binary
+- name: X40
+  type: binary
+- name: X41
+  type: binary
+- name: X42
+  type: binary
+- name: X43
+  type: binary
+- name: X44
+  type: binary
+- name: X45
+  type: binary
+- name: X46
+  type: binary
+- name: X47
+  type: binary
+- name: X48
+  type: binary
+- name: X49
+  type: binary
+- name: X50
+  type: binary
+- name: X51
+  type: binary
+- name: X52
+  type: binary
+- name: X53
+  type: binary
+- name: X54
+  type: binary
+- name: X55
+  type: binary
+- name: X56
+  type: binary
+- name: X57
+  type: binary
+- name: X58
+  type: binary
+- name: X59
+  type: binary
+- name: X60
+  type: binary
+- name: X61
+  type: binary
+- name: X62
+  type: binary
+- name: X63
+  type: binary
+- name: X64
+  type: binary
+- name: X65
+  type: binary
+- name: X66
+  type: binary
+- name: X67
+  type: binary
+- name: X68
+  type: binary
+- name: X69
+  type: binary
+- name: X70
+  type: binary
+- name: X71
+  type: binary
+- name: X73
+  type: binary
+- name: X74
+  type: binary
+- name: X75
+  type: binary
+- name: X76
+  type: binary
+- name: X77
+  type: binary
+- name: X78
+  type: binary
+- name: X79
+  type: binary
+- name: X80
+  type: binary
+- name: X81
+  type: binary
+- name: X82
+  type: binary
+- name: X83
+  type: binary
+- name: X84
+  type: binary
+- name: X85
+  type: binary
+- name: X86
+  type: binary
+- name: X87
+  type: binary
+- name: X88
+  type: binary
+- name: X89
+  type: binary
+- name: X90
+  type: binary
+- name: X91
+  type: binary
+- name: X92
+  type: binary
+- name: X93
+  type: binary
+- name: X94
+  type: binary
+- name: X95
+  type: binary
+- name: X96
+  type: binary
+- name: X97
+  type: binary
+- name: X98
+  type: binary
+- name: X99
+  type: binary
+- name: X100
+  type: binary
+- name: X101
+  type: binary
+- name: X102
+  type: binary
+- name: X103
+  type: binary
+- name: X104
+  type: binary
+- name: X105
+  type: binary
+- name: X106
+  type: binary
+- name: X107
+  type: binary
+- name: X108
+  type: binary
+- name: X109
+  type: binary
+- name: X110
+  type: binary
+- name: X111
+  type: binary
+- name: X112
+  type: binary
+- name: X113
+  type: binary
+- name: X114
+  type: binary
+- name: X115
+  type: binary
+- name: X116
+  type: binary
+- name: X117
+  type: binary
+- name: X118
+  type: binary
+- name: X119
+  type: binary
+- name: X120
+  type: binary
+- name: X122
+  type: binary
+- name: X123
+  type: binary
+- name: X124
+  type: binary
+- name: X125
+  type: binary
+- name: X126
+  type: binary
+- name: X127
+  type: binary
+- name: X128
+  type: binary
+- name: X129
+  type: binary
+- name: X130
+  type: binary
+- name: X131
+  type: binary
+- name: X132
+  type: binary
+- name: X133
+  type: binary
+- name: X134
+  type: binary
+- name: X135
+  type: binary
+- name: X136
+  type: binary
+- name: X137
+  type: binary
+- name: X138
+  type: binary
+- name: X139
+  type: binary
+- name: X140
+  type: binary
+- name: X141
+  type: binary
+- name: X142
+  type: binary
+- name: X143
+  type: binary
+- name: X144
+  type: binary
+- name: X145
+  type: binary
+- name: X146
+  type: binary
+- name: X147
+  type: binary
+- name: X148
+  type: binary
+- name: X150
+  type: binary
+- name: X151
+  type: binary
+- name: X152
+  type: binary
+- name: X153
+  type: binary
+- name: X154
+  type: binary
+- name: X155
+  type: binary
+- name: X156
+  type: binary
+- name: X157
+  type: binary
+- name: X158
+  type: binary
+- name: X159
+  type: binary
+- name: X160
+  type: binary
+- name: X161
+  type: binary
+- name: X162
+  type: binary
+- name: X163
+  type: binary
+- name: X164
+  type: binary
+- name: X165
+  type: binary
+- name: X166
+  type: binary
+- name: X167
+  type: binary
+- name: X168
+  type: binary
+- name: X169
+  type: binary
+- name: X170
+  type: binary
+- name: X171
+  type: binary
+- name: X172
+  type: binary
+- name: X173
+  type: binary
+- name: X174
+  type: binary
+- name: X175
+  type: binary
+- name: X176
+  type: binary
+- name: X177
+  type: binary
+- name: X178
+  type: binary
+- name: X179
+  type: binary
+- name: X180
+  type: binary
+- name: X181
+  type: binary
+- name: X182
+  type: binary
+- name: X183
+  type: binary
+- name: X184
+  type: binary
+- name: X185
+  type: binary
+- name: X186
+  type: binary
+- name: X187
+  type: binary
+- name: X189
+  type: binary
+- name: X190
+  type: binary
+- name: X191
+  type: binary
+- name: X192
+  type: binary
+- name: X194
+  type: binary
+- name: X195
+  type: binary
+- name: X196
+  type: binary
+- name: X197
+  type: binary
+- name: X198
+  type: binary
+- name: X199
+  type: binary
+- name: X200
+  type: binary
+- name: X201
+  type: binary
+- name: X202
+  type: binary
+- name: X203
+  type: binary
+- name: X204
+  type: binary
+- name: X205
+  type: binary
+- name: X206
+  type: binary
+- name: X207
+  type: binary
+- name: X208
+  type: binary
+- name: X209
+  type: binary
+- name: X210
+  type: binary
+- name: X211
+  type: binary
+- name: X212
+  type: binary
+- name: X213
+  type: binary
+- name: X214
+  type: binary
+- name: X215
+  type: binary
+- name: X216
+  type: binary
+- name: X217
+  type: binary
+- name: X218
+  type: binary
+- name: X219
+  type: binary
+- name: X220
+  type: binary
+- name: X221
+  type: binary
+- name: X222
+  type: binary
+- name: X223
+  type: binary
+- name: X224
+  type: binary
+- name: X225
+  type: binary
+- name: X226
+  type: binary
+- name: X227
+  type: binary
+- name: X228
+  type: binary
+- name: X229
+  type: binary
+- name: X230
+  type: binary
+- name: X231
+  type: binary
+- name: X232
+  type: binary
+- name: X233
+  type: binary
+- name: X234
+  type: binary
+- name: X235
+  type: binary
+- name: X236
+  type: binary
+- name: X237
+  type: binary
+- name: X238
+  type: binary
+- name: X239
+  type: binary
+- name: X240
+  type: binary
+- name: X241
+  type: binary
+- name: X242
+  type: binary
+- name: X243
+  type: binary
+- name: X244
+  type: binary
+- name: X245
+  type: binary
+- name: X246
+  type: binary
+- name: X247
+  type: binary
+- name: X248
+  type: binary
+- name: X249
+  type: binary
+- name: X250
+  type: binary
+- name: X251
+  type: binary
+- name: X252
+  type: binary
+- name: X253
+  type: binary
+- name: X254
+  type: binary
+- name: X255
+  type: binary
+- name: X256
+  type: binary
+- name: X257
+  type: binary
+- name: X258
+  type: binary
+- name: X259
+  type: binary
+- name: X260
+  type: binary
+- name: X261
+  type: binary
+- name: X262
+  type: binary
+- name: X263
+  type: binary
+- name: X264
+  type: binary
+- name: X265
+  type: binary
+- name: X266
+  type: binary
+- name: X267
+  type: binary
+- name: X268
+  type: binary
+- name: X269
+  type: binary
+- name: X270
+  type: binary
+- name: X271
+  type: binary
+- name: X272
+  type: binary
+- name: X273
+  type: binary
+- name: X274
+  type: binary
+- name: X275
+  type: binary
+- name: X276
+  type: binary
+- name: X277
+  type: binary
+- name: X278
+  type: binary
+- name: X279
+  type: binary
+- name: X280
+  type: binary
+- name: X281
+  type: binary
+- name: X282
+  type: binary
+- name: X283
+  type: binary
+- name: X284
+  type: binary
+- name: X285
+  type: binary
+- name: X286
+  type: binary
+- name: X287
+  type: binary
+- name: X288
+  type: binary
+- name: X289
+  type: binary
+- name: X290
+  type: binary
+- name: X291
+  type: binary
+- name: X292
+  type: binary
+- name: X293
+  type: binary
+- name: X294
+  type: binary
+- name: X295
+  type: binary
+- name: X296
+  type: binary
+- name: X297
+  type: binary
+- name: X298
+  type: binary
+- name: X299
+  type: binary
+- name: X300
+  type: binary
+- name: X301
+  type: binary
+- name: X302
+  type: binary
+- name: X304
+  type: binary
+- name: X305
+  type: binary
+- name: X306
+  type: binary
+- name: X307
+  type: binary
+- name: X308
+  type: binary
+- name: X309
+  type: binary
+- name: X310
+  type: binary
+- name: X311
+  type: binary
+- name: X312
+  type: binary
+- name: X313
+  type: binary
+- name: X314
+  type: binary
+- name: X315
+  type: binary
+- name: X316
+  type: binary
+- name: X317
+  type: binary
+- name: X318
+  type: binary
+- name: X319
+  type: binary
+- name: X320
+  type: binary
+- name: X321
+  type: binary
+- name: X322
+  type: binary
+- name: X323
+  type: binary
+- name: X324
+  type: binary
+- name: X325
+  type: binary
+- name: X326
+  type: binary
+- name: X327
+  type: binary
+- name: X328
+  type: binary
+- name: X329
+  type: binary
+- name: X330
+  type: binary
+- name: X331
+  type: binary
+- name: X332
+  type: binary
+- name: X333
+  type: binary
+- name: X334
+  type: binary
+- name: X335
+  type: binary
+- name: X336
+  type: binary
+- name: X337
+  type: binary
+- name: X338
+  type: binary
+- name: X339
+  type: binary
+- name: X340
+  type: binary
+- name: X341
+  type: binary
+- name: X342
+  type: binary
+- name: X343
+  type: binary
+- name: X344
+  type: binary
+- name: X345
+  type: binary
+- name: X346
+  type: binary
+- name: X347
+  type: binary
+- name: X348
+  type: binary
+- name: X349
+  type: binary
+- name: X350
+  type: binary
+- name: X351
+  type: binary
+- name: X352
+  type: binary
+- name: X353
+  type: binary
+- name: X354
+  type: binary
+- name: X355
+  type: binary
+- name: X356
+  type: binary
+- name: X357
+  type: binary
+- name: X358
+  type: binary
+- name: X359
+  type: binary
+- name: X360
+  type: binary
+- name: X361
+  type: binary
+- name: X362
+  type: binary
+- name: X363
+  type: binary
+- name: X364
+  type: binary
+- name: X365
+  type: binary
+- name: X366
+  type: binary
+- name: X367
+  type: binary
+- name: X368
+  type: binary
+- name: X369
+  type: binary
+- name: X370
+  type: binary
+- name: X371
+  type: binary
+- name: X372
+  type: binary
+- name: X373
+  type: binary
+- name: X374
+  type: binary
+- name: X375
+  type: binary
+- name: X376
+  type: binary
+- name: X377
+  type: binary
+- name: X378
+  type: binary
+- name: X379
+  type: binary
+- name: X380
+  type: binary
+- name: X382
+  type: binary
+- name: X383
+  type: binary
+- name: X384
+  type: binary
+- name: X385
+  type: binary
+output_features:
+- name: y
+  type: number
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_mean
+      normalization: null
+trainer:
+  bagging_fraction: 0.6445712422006155
+  bagging_freq: 5
+  feature_fraction: 0.24524781711582067
+  lambda_l1: 2.1754085692573546e-05
+  lambda_l2: 0.00011678618679043504
+  learning_rate: 0.08587840691682849
+  max_bin: 12088
+  max_depth: 277
+  min_data_in_leaf: 33
+  min_gain_to_split: 0.26698665355271817
+  min_sum_hessian_in_leaf: 2
+  num_boost_round: 7237
+  num_leaves: 405
+  early_stop: 5
+  eval_batch_size: 16384
+  evaluate_training_set: false
+type: gbm
diff --git a/tests/regression_tests/benchmark/configs/sarcos.ecd.yaml b/tests/regression_tests/benchmark/configs/sarcos.ecd.yaml
new file mode 100644
index 00000000000..8c8755572f6
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/sarcos.ecd.yaml
@@ -0,0 +1,100 @@
+combiner:
+  type: tabnet
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_const
+      normalization: null
+input_features:
+- column: position_1
+  name: position_1
+  type: number
+- column: position_2
+  name: position_2
+  type: number
+- column: position_3
+  name: position_3
+  type: number
+- column: position_4
+  name: position_4
+  type: number
+- column: position_5
+  name: position_5
+  type: number
+- column: position_6
+  name: position_6
+  type: number
+- column: position_7
+  name: position_7
+  type: number
+- column: velocity_1
+  name: velocity_1
+  type: number
+- column: velocity_2
+  name: velocity_2
+  type: number
+- column: velocity_3
+  name: velocity_3
+  type: number
+- column: velocity_4
+  name: velocity_4
+  type: number
+- column: velocity_5
+  name: velocity_5
+  type: number
+- column: velocity_6
+  name: velocity_6
+  type: number
+- column: velocity_7
+  name: velocity_7
+  type: number
+- column: acceleration_1
+  name: acceleration_1
+  type: number
+- column: acceleration_2
+  name: acceleration_2
+  type: number
+- column: acceleration_3
+  name: acceleration_3
+  type: number
+- column: acceleration_4
+  name: acceleration_4
+  type: number
+- column: acceleration_5
+  name: acceleration_5
+  type: number
+- column: acceleration_6
+  name: acceleration_6
+  type: number
+- column: acceleration_7
+  name: acceleration_7
+  type: number
+- column: torque_2
+  name: torque_2
+  type: number
+- column: torque_3
+  name: torque_3
+  type: number
+- column: torque_4
+  name: torque_4
+  type: number
+- column: torque_5
+  name: torque_5
+  type: number
+- column: torque_6
+  name: torque_6
+  type: number
+- column: torque_7
+  name: torque_7
+  type: number
+output_features:
+- column: torque_1
+  name: torque_1
+  type: number
+trainer:
+  batch_size: 118
+  decay: true
+  decay_rate: 0.5371397744663506
+  eval_batch_size: 16384
+  evaluate_training_set: false
+  learning_rate: 0.001004563044919135
diff --git a/tests/regression_tests/benchmark/configs/sarcos.gbm.yaml b/tests/regression_tests/benchmark/configs/sarcos.gbm.yaml
new file mode 100644
index 00000000000..542ecc267df
--- /dev/null
+++ b/tests/regression_tests/benchmark/configs/sarcos.gbm.yaml
@@ -0,0 +1,109 @@
+input_features:
+- column: position_1
+  name: position_1
+  type: number
+- column: position_2
+  name: position_2
+  type: number
+- column: position_3
+  name: position_3
+  type: number
+- column: position_4
+  name: position_4
+  type: number
+- column: position_5
+  name: position_5
+  type: number
+- column: position_6
+  name: position_6
+  type: number
+- column: position_7
+  name: position_7
+  type: number
+- column: velocity_1
+  name: velocity_1
+  type: number
+- column: velocity_2
+  name: velocity_2
+  type: number
+- column: velocity_3
+  name: velocity_3
+  type: number
+- column: velocity_4
+  name: velocity_4
+  type: number
+- column: velocity_5
+  name: velocity_5
+  type: number
+- column: velocity_6
+  name: velocity_6
+  type: number
+- column: velocity_7
+  name: velocity_7
+  type: number
+- column: acceleration_1
+  name: acceleration_1
+  type: number
+- column: acceleration_2
+  name: acceleration_2
+  type: number
+- column: acceleration_3
+  name: acceleration_3
+  type: number
+- column: acceleration_4
+  name: acceleration_4
+  type: number
+- column: acceleration_5
+  name: acceleration_5
+  type: number
+- column: acceleration_6
+  name: acceleration_6
+  type: number
+- column: acceleration_7
+  name: acceleration_7
+  type: number
+- column: torque_2
+  name: torque_2
+  type: number
+- column: torque_3
+  name: torque_3
+  type: number
+- column: torque_4
+  name: torque_4
+  type: number
+- column: torque_5
+  name: torque_5
+  type: number
+- column: torque_6
+  name: torque_6
+  type: number
+- column: torque_7
+  name: torque_7
+  type: number
+output_features:
+- column: torque_1
+  name: torque_1
+  type: number
+defaults:
+  number:
+    preprocessing:
+      missing_value_strategy: fill_with_mean
+      normalization: zscore
+trainer:
+  early_stop: 5
+  eval_batch_size: 16384
+  evaluate_training_set: false
+  bagging_fraction: 0.4390832685606891
+  bagging_freq: 9
+  feature_fraction: 0.7017870817136058
+  lambda_l1: 6.050617967261626e-08
+  lambda_l2: 0.0011384945136371208
+  learning_rate: 0.004788915803024405
+  max_bin: 36961
+  max_depth: 215
+  min_data_in_leaf: 40
+  min_gain_to_split: 0.07379267914892244
+  min_sum_hessian_in_leaf: 8
+  num_boost_round: 382
+  num_leaves: 434
+type: gbm
diff --git a/tests/regression_tests/benchmark/expected_metric.py b/tests/regression_tests/benchmark/expected_metric.py
new file mode 100644
index 00000000000..1f205ba248e
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metric.py
@@ -0,0 +1,20 @@
+from dataclasses import dataclass
+from typing import Union
+
+from dataclasses_json import dataclass_json
+
+
+@dataclass_json
+@dataclass
+class ExpectedMetric:
+    # Output feature name.
+    output_feature_name: str
+
+    # Metric name.
+    metric_name: str
+
+    # Expected metric value.
+    expected_value: Union[int, float]
+
+    # The percentage change that would trigger a notification/failure.
+    tolerance_percentage: float
diff --git a/tests/regression_tests/benchmark/expected_metrics/adult_census_income.ecd.yaml b/tests/regression_tests/benchmark/expected_metrics/adult_census_income.ecd.yaml
new file mode 100644
index 00000000000..68b1bd037d8
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/adult_census_income.ecd.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: income
+    metric_name: accuracy
+    expected_value: 0.8547970652580261
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/adult_census_income.gbm.yaml b/tests/regression_tests/benchmark/expected_metrics/adult_census_income.gbm.yaml
new file mode 100644
index 00000000000..7a34604b085
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/adult_census_income.gbm.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: income
+    metric_name: accuracy
+    expected_value: 0.8283590078353882
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/ames_housing.ecd.yaml b/tests/regression_tests/benchmark/expected_metrics/ames_housing.ecd.yaml
new file mode 100644
index 00000000000..4c6f26210a1
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/ames_housing.ecd.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: SalePrice
+    metric_name: r2
+    expected_value: 0.7343850135803223
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/ames_housing.gbm.yaml b/tests/regression_tests/benchmark/expected_metrics/ames_housing.gbm.yaml
new file mode 100644
index 00000000000..e4dc7fe2c48
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/ames_housing.gbm.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: SalePrice
+    metric_name: r2
+    expected_value: 0.7808593511581421
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.ecd.yaml b/tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.ecd.yaml
new file mode 100644
index 00000000000..89b1765aaae
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.ecd.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: y
+    metric_name: r2
+    expected_value: 0.47405338287353516
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.gbm.yaml b/tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.gbm.yaml
new file mode 100644
index 00000000000..36970096d8e
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/mercedes_benz_greener.gbm.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: y
+    metric_name: r2
+    expected_value: 0.5123344659805298
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/sarcos.ecd.yaml b/tests/regression_tests/benchmark/expected_metrics/sarcos.ecd.yaml
new file mode 100644
index 00000000000..330a0861970
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/sarcos.ecd.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: torque_1
+    metric_name: r2
+    expected_value: 0.9871084690093994
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/expected_metrics/sarcos.gbm.yaml b/tests/regression_tests/benchmark/expected_metrics/sarcos.gbm.yaml
new file mode 100644
index 00000000000..35a0c4514ab
--- /dev/null
+++ b/tests/regression_tests/benchmark/expected_metrics/sarcos.gbm.yaml
@@ -0,0 +1,5 @@
+metrics:
+  - output_feature_name: torque_1
+    metric_name: r2
+    expected_value: 0.9405434131622314
+    tolerance_percentage: 0.15
diff --git a/tests/regression_tests/benchmark/test_model_performance.py b/tests/regression_tests/benchmark/test_model_performance.py
new file mode 100644
index 00000000000..1cbbddf4d2c
--- /dev/null
+++ b/tests/regression_tests/benchmark/test_model_performance.py
@@ -0,0 +1,62 @@
+import os
+from typing import List
+
+import pytest
+from expected_metric import ExpectedMetric
+
+from ludwig.benchmarking.benchmark import benchmark
+from ludwig.utils.data_utils import load_yaml
+
+
+def get_test_config_filenames() -> List[str]:
+    """Return list of the config filenames used for benchmarking."""
+    benchmark_directory = "/".join(__file__.split("/")[:-1] + ["configs"])
+    return [config_fp for config_fp in os.listdir(benchmark_directory)]
+
+
+def get_dataset_from_config_path(config_path: str) -> str:
+    """path/to/config/<dataset>.<descriptors>.yaml -> dataset."""
+    return os.path.basename(config_path).split(".")[0]
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize("config_filename", get_test_config_filenames())
+def test_performance(config_filename, tmpdir):
+    benchmark_directory = "/".join(__file__.split("/")[:-1])
+    config_path = os.path.join(benchmark_directory, "configs", config_filename)
+    expected_test_statistics_fp = os.path.join(benchmark_directory, "expected_metrics", config_filename)
+    dataset_name = get_dataset_from_config_path(config_path)
+
+    if not os.path.exists(expected_test_statistics_fp):
+        raise FileNotFoundError(
+            """No corresponding expected metrics found for benchmarking config '{config_path}'.
+            Please add a new metrics YAML file '{expected_test_statistics_fp}'. Suggested content:
+
+            metrics:
+              - output_feature_name: <YOUR_OUTPUT_FEATURE e.g. SalePrice>
+                metric_name: <YOUR METRIC NAME e.g. accuracy>
+                expected_value: <A FLOAT VALUE>
+                tolerance_percent: 0.15"""
+        )
+    expected_metrics_dict = load_yaml(expected_test_statistics_fp)
+
+    benchmarking_config = {
+        "experiment_name": "regression_test",
+        "export": {"export_artifacts": True, "export_base_path": tmpdir},
+        "experiments": [{"dataset_name": dataset_name, "config_path": config_path}],
+    }
+    benchmarking_artifacts = benchmark(benchmarking_config)
+
+    experiment_artifact = benchmarking_artifacts[dataset_name]
+    expected_metrics: List[ExpectedMetric] = [
+        ExpectedMetric.from_dict(expected_metric) for expected_metric in expected_metrics_dict["metrics"]
+    ]
+    for expected_metric in expected_metrics:
+        tolerance = expected_metric.tolerance_percentage * expected_metric.expected_value
+        output_feature_name = expected_metric.output_feature_name
+        metric_name = expected_metric.metric_name
+        experiment_metric_value = experiment_artifact.test_statistics[output_feature_name][metric_name]
+        assert abs(expected_metric.expected_value - experiment_metric_value) <= tolerance, (
+            f"The obtained {metric_name} value ({experiment_metric_value}) was not within"
+            f" {100 * expected_metric.tolerance_percentage}% of the expected value ({expected_metric.expected_value})."
+        )

From 7c9781e0a8a201ceddaae77c4ef4a0429c9525f5 Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Fri, 14 Oct 2022 20:59:06 -0700
Subject: [PATCH 24/29] Fixed race condition in schema validation (#2653)

---
 ludwig/schema/__init__.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/ludwig/schema/__init__.py b/ludwig/schema/__init__.py
index c45d50f9459..5aa05cdf616 100644
--- a/ludwig/schema/__init__.py
+++ b/ludwig/schema/__init__.py
@@ -19,6 +19,7 @@
 #
 
 from functools import lru_cache
+from threading import Lock
 
 from jsonschema import Draft7Validator, validate
 from jsonschema.validators import extend
@@ -40,6 +41,8 @@
 from ludwig.schema.preprocessing import get_preprocessing_jsonschema
 from ludwig.schema.trainer import get_model_type_jsonschema, get_trainer_jsonschema
 
+VALIDATION_LOCK = Lock()
+
 
 @lru_cache(maxsize=2)
 def get_schema(model_type: str):
@@ -82,4 +85,7 @@ def validate_config(config):
     updated_config = upgrade_to_latest_version(config)
     model_type = updated_config.get(MODEL_TYPE, MODEL_ECD)
 
-    validate(instance=updated_config, schema=get_schema(model_type), cls=get_validator())
+    with VALIDATION_LOCK:
+        # There is a race condition during schema validation that can cause the marshmallow schema class to
+        # be missing during validation if more than one thread is trying to validate at once.
+        validate(instance=updated_config, schema=get_schema(model_type), cls=get_validator())

From 5c4923e499ad340b5e8a4800753c4780aeab6c7f Mon Sep 17 00:00:00 2001
From: Travis Addair <tgaddair@gmail.com>
Date: Mon, 17 Oct 2022 10:51:06 -0700
Subject: [PATCH 25/29] Fixed --gpu_memory_limit in CLI to interpret as
 fraction of GPU memory (#2658)

---
 ludwig/api.py                           | 22 +++++++++++-----------
 ludwig/collect.py                       | 14 +++++++++-----
 ludwig/evaluate.py                      | 14 +++++++++-----
 ludwig/experiment.py                    | 14 +++++++++-----
 ludwig/hyperopt/run.py                  |  6 +++---
 ludwig/hyperopt_cli.py                  | 14 +++++++++-----
 ludwig/predict.py                       | 14 +++++++++-----
 ludwig/preprocess.py                    |  4 ++--
 ludwig/train.py                         | 14 +++++++++-----
 tests/integration_tests/test_cli.py     |  9 +++++++++
 tests/integration_tests/test_horovod.py |  2 +-
 11 files changed, 80 insertions(+), 47 deletions(-)

diff --git a/ludwig/api.py b/ludwig/api.py
index 959360feb13..a1d31d7781e 100644
--- a/ludwig/api.py
+++ b/ludwig/api.py
@@ -200,8 +200,8 @@ class LudwigModel:
         of backend to use to execute preprocessing / training steps.
     :param gpus: (Union[str, int, List[int]], default: `None`) GPUs
         to use (it uses the same syntax of CUDA_VISIBLE_DEVICES)
-    :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow Torch
         to use multithreading parallelism to improve performance at the
         cost of determinism.
@@ -263,7 +263,7 @@ def __init__(
         logging_level: int = logging.ERROR,
         backend: Union[Backend, str] = None,
         gpus: Union[str, int, List[int]] = None,
-        gpu_memory_limit: int = None,
+        gpu_memory_limit: Optional[float] = None,
         allow_parallel_threads: bool = True,
         callbacks: List[Callback] = None,
     ) -> None:
@@ -278,8 +278,8 @@ def __init__(
             of backend to use to execute preprocessing / training steps.
         :param gpus: (Union[str, int, List[int]], default: `None`) GPUs
             to use (it uses the same syntax of CUDA_VISIBLE_DEVICES)
-        :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to
-            allocate per GPU device.
+        :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+            [0, 1] allowed to allocate per GPU device.
         :param allow_parallel_threads: (bool, default: `True`) allow Torch
             to use multithreading parallelism to improve performance at the
             cost of determinism.
@@ -1414,7 +1414,7 @@ def load(
         logging_level: int = logging.ERROR,
         backend: Union[Backend, str] = None,
         gpus: Union[str, int, List[int]] = None,
-        gpu_memory_limit: int = None,
+        gpu_memory_limit: Optional[float] = None,
         allow_parallel_threads: bool = True,
         callbacks: List[Callback] = None,
     ) -> "LudwigModel":  # return is an instance of ludwig.api.LudwigModel class
@@ -1431,8 +1431,8 @@ def load(
             of backend to use to execute preprocessing / training steps.
         :param gpus: (Union[str, int, List[int]], default: `None`) GPUs
             to use (it uses the same syntax of CUDA_VISIBLE_DEVICES)
-        :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to
-            allocate per GPU device.
+        :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+            [0, 1] allowed to allocate per GPU device.
         :param allow_parallel_threads: (bool, default: `True`) allow Torch
             to use
             multithreading parallelism to improve performance at the cost of
@@ -1677,7 +1677,7 @@ def kfold_cross_validate(
     output_directory: str = "results",
     random_seed: int = default_random_seed,
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     backend: Union[Backend, str] = None,
     logging_level: int = logging.INFO,
@@ -1747,8 +1747,8 @@ def kfold_cross_validate(
            splits and any other random function.
     :param gpus: (list, default: `None`) list of GPUs that are available
             for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-            allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+            [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow Torch to
             use multithreading parallelism
            to improve performance at the cost of determinism.
diff --git a/ludwig/collect.py b/ludwig/collect.py
index 2a69767dedb..28b392bc494 100644
--- a/ludwig/collect.py
+++ b/ludwig/collect.py
@@ -17,7 +17,7 @@
 import logging
 import os
 import sys
-from typing import List, Union
+from typing import List, Optional, Union
 
 import numpy as np
 import torchinfo
@@ -43,7 +43,7 @@ def collect_activations(
     batch_size: int = 128,
     output_directory: str = "results",
     gpus: List[str] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -75,8 +75,8 @@ def collect_activations(
         model and the training progress files.
     :param gpus: (list, default: `None`) list of GPUs that are available
         for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
         to use multithreading parallelism to improve performance at
         the cost of determinism.
@@ -267,7 +267,11 @@ def cli_collect_activations(sys_argv):
     # ------------------
     parser.add_argument("-g", "--gpus", type=int, default=0, help="list of gpu to use")
     parser.add_argument(
-        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
+        "-gml",
+        "--gpu_memory_limit",
+        type=float,
+        default=None,
+        help="maximum memory fraction [0, 1] allowed to allocate per GPU device",
     )
     parser.add_argument(
         "-dpt",
diff --git a/ludwig/evaluate.py b/ludwig/evaluate.py
index c4113f744dc..c5444f382b0 100644
--- a/ludwig/evaluate.py
+++ b/ludwig/evaluate.py
@@ -16,7 +16,7 @@
 import argparse
 import logging
 import sys
-from typing import List, Union
+from typing import List, Optional, Union
 
 import pandas as pd
 
@@ -44,7 +44,7 @@ def evaluate_cli(
     skip_collect_overall_stats: bool = False,
     output_directory: str = "results",
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -87,8 +87,8 @@ def evaluate_cli(
          model and the training progress files.
      :param gpus: (list, default: `None`) list of GPUs that are available
          for training.
-     :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-         allocate per GPU device.
+     :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+            [0, 1] allowed to allocate per GPU device.
      :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
          to use multithreading parallelism to improve performance at
          the cost of determinism.
@@ -212,7 +212,11 @@ def cli(sys_argv):
     # ------------------
     parser.add_argument("-g", "--gpus", type=int, default=0, help="list of gpu to use")
     parser.add_argument(
-        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
+        "-gml",
+        "--gpu_memory_limit",
+        type=float,
+        default=None,
+        help="maximum memory fraction [0, 1] allowed to allocate per GPU device",
     )
     parser.add_argument(
         "-dpt",
diff --git a/ludwig/experiment.py b/ludwig/experiment.py
index f11ea247bf0..1c2ec28c634 100644
--- a/ludwig/experiment.py
+++ b/ludwig/experiment.py
@@ -17,7 +17,7 @@
 import logging
 import os
 import sys
-from typing import List, Union
+from typing import List, Optional, Union
 
 import pandas as pd
 
@@ -60,7 +60,7 @@ def experiment_cli(
     skip_collect_overall_stats: bool = False,
     output_directory: str = "results",
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -160,8 +160,8 @@ def experiment_cli(
          model and the training progress files.
      :param gpus: (list, default: `None`) list of GPUs that are available
          for training.
-     :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-         allocate per GPU device.
+     :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+            [0, 1] allowed to allocate per GPU device.
      :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
          to use multithreading parallelism to improve performance at
          the cost of determinism.
@@ -480,7 +480,11 @@ def cli(sys_argv):
     )
     parser.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="list of GPUs to use")
     parser.add_argument(
-        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
+        "-gml",
+        "--gpu_memory_limit",
+        type=float,
+        default=None,
+        help="maximum memory fraction [0, 1] allowed to allocate per GPU device",
     )
     parser.add_argument(
         "-dpt",
diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py
index b502d1e0e54..a18ab32dc4b 100644
--- a/ludwig/hyperopt/run.py
+++ b/ludwig/hyperopt/run.py
@@ -82,7 +82,7 @@ def hyperopt(
     skip_save_hyperopt_statistics: bool = False,
     output_directory: str = "results",
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -173,8 +173,8 @@ def hyperopt(
         model and the training progress files.
     :param gpus: (list, default: `None`) list of GPUs that are available
         for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
         to use multithreading parallelism to improve performance at
         the cost of determinism.
diff --git a/ludwig/hyperopt_cli.py b/ludwig/hyperopt_cli.py
index fb81311770f..ab0bd23baab 100644
--- a/ludwig/hyperopt_cli.py
+++ b/ludwig/hyperopt_cli.py
@@ -16,7 +16,7 @@
 import argparse
 import logging
 import sys
-from typing import List, Union
+from typing import List, Optional, Union
 
 from ludwig.backend import ALL_BACKENDS, Backend, initialize_backend
 from ludwig.callbacks import Callback
@@ -54,7 +54,7 @@ def hyperopt_cli(
     skip_save_hyperopt_statistics: bool = False,
     output_directory: str = "results",
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -139,8 +139,8 @@ def hyperopt_cli(
         model and the training progress files.
     :param gpus: (list, default: `None`) list of GPUs that are available
         for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
         to use multithreading parallelism to improve performance at
         the cost of determinism.
@@ -373,7 +373,11 @@ def cli(sys_argv):
     )
     parser.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="list of gpus to use")
     parser.add_argument(
-        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
+        "-gml",
+        "--gpu_memory_limit",
+        type=float,
+        default=None,
+        help="maximum memory fraction [0, 1] allowed to allocate per GPU device",
     )
     parser.add_argument(
         "-b",
diff --git a/ludwig/predict.py b/ludwig/predict.py
index 7fb55476f29..7b2e2855027 100644
--- a/ludwig/predict.py
+++ b/ludwig/predict.py
@@ -16,7 +16,7 @@
 import argparse
 import logging
 import sys
-from typing import List, Union
+from typing import List, Optional, Union
 
 import pandas as pd
 
@@ -41,7 +41,7 @@ def predict_cli(
     skip_save_predictions: bool = False,
     output_directory: str = "results",
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -78,8 +78,8 @@ def predict_cli(
         model and the training progress files.
     :param gpus: (list, default: `None`) list of GPUs that are available
         for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
         to use multithreading parallelism to improve performance at
         the cost of determinism.
@@ -189,7 +189,11 @@ def cli(sys_argv):
     # ------------------
     parser.add_argument("-g", "--gpus", type=int, default=0, help="list of gpu to use")
     parser.add_argument(
-        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
+        "-gml",
+        "--gpu_memory_limit",
+        type=float,
+        default=None,
+        help="maximum memory fraction [0, 1] allowed to allocate per GPU device",
     )
     parser.add_argument(
         "-dpt",
diff --git a/ludwig/preprocess.py b/ludwig/preprocess.py
index a216ce025ef..1e4cf6be0b3 100644
--- a/ludwig/preprocess.py
+++ b/ludwig/preprocess.py
@@ -122,8 +122,8 @@ def preprocess_cli(
         model and the training progress files.
     :param gpus: (list, default: `None`) list of GPUs that are available
         for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
         to use multithreading parallelism to improve performance at
         the cost of determinism.
diff --git a/ludwig/train.py b/ludwig/train.py
index 629b7ee2999..17ceb4a4aab 100644
--- a/ludwig/train.py
+++ b/ludwig/train.py
@@ -16,7 +16,7 @@
 import argparse
 import logging
 import sys
-from typing import List, Union
+from typing import List, Optional, Union
 
 import pandas as pd
 
@@ -53,7 +53,7 @@ def train_cli(
     skip_save_processed_input: bool = False,
     output_directory: str = "results",
     gpus: Union[str, int, List[int]] = None,
-    gpu_memory_limit: int = None,
+    gpu_memory_limit: Optional[float] = None,
     allow_parallel_threads: bool = True,
     callbacks: List[Callback] = None,
     backend: Union[Backend, str] = None,
@@ -136,8 +136,8 @@ def train_cli(
         model and the training progress files.
     :param gpus: (list, default: `None`) list of GPUs that are available
         for training.
-    :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to
-        allocate per GPU device.
+    :param gpu_memory_limit: (float: default: `None`) maximum memory fraction
+        [0, 1] allowed to allocate per GPU device.
     :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow
         to use multithreading parallelism to improve performance at
         the cost of determinism.
@@ -348,7 +348,11 @@ def cli(sys_argv):
     )
     parser.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="list of gpus to use")
     parser.add_argument(
-        "-gml", "--gpu_memory_limit", type=int, default=None, help="maximum memory in MB to allocate per GPU device"
+        "-gml",
+        "--gpu_memory_limit",
+        type=float,
+        default=None,
+        help="maximum memory fraction [0, 1] allowed to allocate per GPU device",
     )
     parser.add_argument(
         "-dpt",
diff --git a/tests/integration_tests/test_cli.py b/tests/integration_tests/test_cli.py
index 346b3afc91b..12c804403a0 100644
--- a/tests/integration_tests/test_cli.py
+++ b/tests/integration_tests/test_cli.py
@@ -120,6 +120,15 @@ def test_train_cli_dataset(tmpdir, csv_filename):
     _run_ludwig("train", dataset=dataset_filename, config=config_filename, output_directory=str(tmpdir))
 
 
+def test_train_cli_gpu_memory_limit(tmpdir, csv_filename):
+    """Test training using `ludwig train --dataset --gpu_memory_limit`."""
+    config_filename = os.path.join(tmpdir, "config.yaml")
+    dataset_filename = _prepare_data(csv_filename, config_filename)
+    _run_ludwig(
+        "train", dataset=dataset_filename, config=config_filename, output_directory=str(tmpdir), gpu_memory_limit="0.5"
+    )
+
+
 def test_train_cli_training_set(tmpdir, csv_filename):
     """Test training using `ludwig train --training_set`."""
     config_filename = os.path.join(tmpdir, "config.yaml")
diff --git a/tests/integration_tests/test_horovod.py b/tests/integration_tests/test_horovod.py
index d4b8d3e6313..78597dc7b5b 100644
--- a/tests/integration_tests/test_horovod.py
+++ b/tests/integration_tests/test_horovod.py
@@ -96,5 +96,5 @@ def test_horovod_implicit(csv_filename):
 @pytest.mark.distributed
 def test_horovod_gpu_memory_limit(csv_filename):
     """Test Horovod with explicit GPU memory limit set."""
-    ludwig_kwargs = dict(gpu_memory_limit=128)
+    ludwig_kwargs = dict(gpu_memory_limit="0.5")
     _run_horovod(csv_filename, **ludwig_kwargs)

From c2fb70223dfb8e16e342f4aa0a139101f2e5e2e8 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Mon, 17 Oct 2022 20:20:25 -0700
Subject: [PATCH 26/29] Stopgap solution for test_training_determinism (#2665)

* Added eq to fix training determinism failure

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update num samples so no nans

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 tests/ludwig/models/test_training_determinism.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ludwig/models/test_training_determinism.py b/tests/ludwig/models/test_training_determinism.py
index 1ee57dbe59c..4621dbf9d0f 100644
--- a/tests/ludwig/models/test_training_determinism.py
+++ b/tests/ludwig/models/test_training_determinism.py
@@ -73,7 +73,7 @@ def train_twice(backend, csv_filename, tmpdir):
     config = {"input_features": input_features, "output_features": output_features, TRAINER: {"epochs": 2}}
 
     # Generate training data
-    training_data_csv_path = generate_data(input_features, output_features, csv_filename)
+    training_data_csv_path = generate_data(input_features, output_features, csv_filename, num_examples=100)
 
     ludwig_model_1 = LudwigModel(config, logging_level=logging.ERROR, backend=backend)
     ludwig_model_2 = LudwigModel(config, logging_level=logging.ERROR, backend=backend)

From f131f124ca3c0f785aa57c3525f050195409a9c9 Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Tue, 18 Oct 2022 10:58:27 -0700
Subject: [PATCH 27/29] Added min and max to sample ratio (#2655)

* Added min and max to sample ratio

* Added split default
---
 ludwig/schema/preprocessing.py | 5 ++++-
 ludwig/schema/split.py         | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ludwig/schema/preprocessing.py b/ludwig/schema/preprocessing.py
index 266944e556e..df1a9efd922 100644
--- a/ludwig/schema/preprocessing.py
+++ b/ludwig/schema/preprocessing.py
@@ -14,8 +14,11 @@ class PreprocessingConfig(schema_utils.BaseMarshmallowConfig):
         default=RANDOM,
     )
 
-    sample_ratio: float = schema_utils.NonNegativeFloat(
+    sample_ratio: float = schema_utils.FloatRange(
         default=1.0,
+        min=0,
+        max=1,
+        min_inclusive=False,
         description="Ratio of the dataset to use for training. If 1.0, all the data is used for training.",
         parameter_metadata=PREPROCESSING_METADATA["sample_ratio"],
     )
diff --git a/ludwig/schema/split.py b/ludwig/schema/split.py
index 4973a8bf4de..21482d277eb 100644
--- a/ludwig/schema/split.py
+++ b/ludwig/schema/split.py
@@ -3,7 +3,7 @@
 from marshmallow import fields, ValidationError
 from marshmallow_dataclass import dataclass
 
-from ludwig.constants import TYPE
+from ludwig.constants import SPLIT, TYPE
 from ludwig.schema import utils as schema_utils
 from ludwig.utils.registry import Registry
 
@@ -51,6 +51,7 @@ class FixedSplitConfig(BaseSplitConfig):
     )
 
     column: str = schema_utils.String(
+        default=SPLIT,
         description="The column name to use for fixed splitting.",
     )
 

From 0cc14b8c8af0445ee34619305b6dbe6a37f9788c Mon Sep 17 00:00:00 2001
From: connor-mccorm <97468934+connor-mccorm@users.noreply.github.com>
Date: Tue, 18 Oct 2022 11:41:19 -0700
Subject: [PATCH 28/29] Set internal only flags (#2659)

* Set internal only flags

* Update descriptions

* Fixed dropout rate

* Fixed typ

* Add title to preprocessing

* add titles

* Removed additional properties constraint on defaults
---
 ludwig/schema/combiners/utils.py              |  2 +-
 ludwig/schema/defaults/defaults.py            |  9 +++-
 ludwig/schema/encoders/sequence_encoders.py   |  4 +-
 ludwig/schema/encoders/text_encoders.py       |  6 ++-
 ludwig/schema/features/preprocessing/utils.py |  1 +
 ludwig/schema/features/utils.py               |  2 +
 ludwig/schema/metadata/combiner_metadata.py   |  2 +-
 ludwig/schema/metadata/encoder_metadata.py    | 48 +++++++++----------
 ludwig/schema/preprocessing.py                |  2 +-
 ludwig/schema/trainer.py                      |  2 +-
 10 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/ludwig/schema/combiners/utils.py b/ludwig/schema/combiners/utils.py
index d1f2edb338f..84cf11ad0a1 100644
--- a/ludwig/schema/combiners/utils.py
+++ b/ludwig/schema/combiners/utils.py
@@ -27,7 +27,7 @@ def get_combiner_jsonschema():
                 "type": "string",
                 "enum": combiner_types,
                 "default": "concat",
-                "title": "type",
+                "title": "combiner_options",
                 "description": "Select the combiner type.",
                 "parameter_metadata": parameter_metadata,
             },
diff --git a/ludwig/schema/defaults/defaults.py b/ludwig/schema/defaults/defaults.py
index 777baa1ae89..6bac8ba7d58 100644
--- a/ludwig/schema/defaults/defaults.py
+++ b/ludwig/schema/defaults/defaults.py
@@ -37,4 +37,11 @@ class DefaultsConfig(schema_utils.BaseMarshmallowConfig):
 def get_defaults_jsonschema():
     """Returns a JSON schema structured to only require a `type` key and then conditionally apply a corresponding
     combiner's field constraints."""
-    return schema_utils.unload_jsonschema_from_marshmallow_class(DefaultsConfig)
+    preproc_schema = schema_utils.unload_jsonschema_from_marshmallow_class(DefaultsConfig)
+    props = preproc_schema["properties"]
+    return {
+        "type": "object",
+        "properties": props,
+        "title": "global_defaults_options",
+        "description": "Set global defaults for input and output features",
+    }
diff --git a/ludwig/schema/encoders/sequence_encoders.py b/ludwig/schema/encoders/sequence_encoders.py
index 76f37988f19..72c83ce1739 100644
--- a/ludwig/schema/encoders/sequence_encoders.py
+++ b/ludwig/schema/encoders/sequence_encoders.py
@@ -735,7 +735,7 @@ class StackedRNNConfig(BaseEncoderConfig):
 
     num_layers: int = schema_utils.PositiveInteger(
         default=1,
-        description="the number of stacked recurrent layers.",
+        description="The number of stacked recurrent layers.",
         parameter_metadata=ENCODER_METADATA["StackedRNN"]["num_layers"],
     )
 
@@ -1234,7 +1234,7 @@ class StackedTransformerConfig(BaseEncoderConfig):
 
     num_layers: int = schema_utils.PositiveInteger(
         default=1,
-        description="the number of stacked recurrent layers.",
+        description="The number of transformer layers.",
         parameter_metadata=ENCODER_METADATA["StackedTransformer"]["num_layers"],
     )
 
diff --git a/ludwig/schema/encoders/text_encoders.py b/ludwig/schema/encoders/text_encoders.py
index 0d875558303..20e68d01c6e 100644
--- a/ludwig/schema/encoders/text_encoders.py
+++ b/ludwig/schema/encoders/text_encoders.py
@@ -309,8 +309,10 @@ class MT5Config(BaseEncoderConfig):
         parameter_metadata=ENCODER_METADATA["MT5Encoder"]["relative_attention_num_buckets"],
     )
 
-    dropout_rate: float = schema_utils.NonNegativeFloat(
+    dropout_rate: float = schema_utils.FloatRange(
         default=0.1,
+        min=0,
+        max=1,
         description="The ratio for all dropout layers.",
         parameter_metadata=ENCODER_METADATA["MT5Encoder"]["dropout_rate"],
     )
@@ -2276,6 +2278,8 @@ class T5Config(BaseEncoderConfig):
 
     dropout_rate: float = schema_utils.FloatRange(
         default=0.1,
+        min=0,
+        max=1,
         description="The ratio for all dropout layers.",
         parameter_metadata=ENCODER_METADATA["T5Encoder"]["dropout_rate"],
     )
diff --git a/ludwig/schema/features/preprocessing/utils.py b/ludwig/schema/features/preprocessing/utils.py
index cd5331b4d62..0b55835dcaf 100644
--- a/ludwig/schema/features/preprocessing/utils.py
+++ b/ludwig/schema/features/preprocessing/utils.py
@@ -52,6 +52,7 @@ def _jsonschema_type_mapping():
             return {
                 "type": "object",
                 "properties": props,
+                "title": "preprocessing_options",
                 "additionalProperties": False,
             }
 
diff --git a/ludwig/schema/features/utils.py b/ludwig/schema/features/utils.py
index 09bd4a78830..14575bad953 100644
--- a/ludwig/schema/features/utils.py
+++ b/ludwig/schema/features/utils.py
@@ -39,6 +39,7 @@ def get_input_feature_jsonschema():
             "additionalProperties": True,
             "allOf": get_input_feature_conds(),
             "required": ["name", "type"],
+            "title": "input_features",
         },
     }
 
@@ -84,6 +85,7 @@ def get_output_feature_jsonschema():
             "additionalProperties": True,
             "allOf": get_output_feature_conds(),
             "required": ["name", "type"],
+            "title": "output_features",
         },
     }
 
diff --git a/ludwig/schema/metadata/combiner_metadata.py b/ludwig/schema/metadata/combiner_metadata.py
index 7f7ebf5519b..3442f17c7ea 100644
--- a/ludwig/schema/metadata/combiner_metadata.py
+++ b/ludwig/schema/metadata/combiner_metadata.py
@@ -127,7 +127,7 @@
             other_information=None,
             description_implications="The more layers that are specified the deeper and higher capacity the model "
             "will be. This makes it possible to potentially achieve better performance when "
-            "a big anough amount of data is provided, but also makes the model more "
+            "a big enough amount of data is provided, but also makes the model more "
             "computationally expensive and potentially more prone to overfitting.",
             suggested_values=None,
             suggested_values_reasoning="It is easier to define a stack of fully connected layers by just specifying "
diff --git a/ludwig/schema/metadata/encoder_metadata.py b/ludwig/schema/metadata/encoder_metadata.py
index 8cf3539103c..374b343d617 100644
--- a/ludwig/schema/metadata/encoder_metadata.py
+++ b/ludwig/schema/metadata/encoder_metadata.py
@@ -463,7 +463,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "AutoTransformerEncoder": {
@@ -564,7 +564,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "BERTEncoder": {
@@ -945,7 +945,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "BagEmbedWeightedEncoder": {
@@ -1615,7 +1615,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "CamemBERTEncoder": {
@@ -1994,7 +1994,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "CategoricalEmbedEncoder": {
@@ -3378,7 +3378,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "ELECTRAEncoder": {
@@ -3759,7 +3759,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "FlauBERTEncoder": {
@@ -4260,7 +4260,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "GPT2Encoder": {
@@ -4559,7 +4559,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "GPTEncoder": {
@@ -4847,7 +4847,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "H3Embed": {
@@ -5961,7 +5961,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "MLPMixerEncoder": {
@@ -6528,7 +6528,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "ParallelCNN": {
@@ -6947,7 +6947,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
         "use_bias": ParameterMetadata(
             ui_display_name="Use Bias",
@@ -7602,7 +7602,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "SequenceEmbedEncoder": {
@@ -9225,7 +9225,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
         "strides": ParameterMetadata(
             ui_display_name="Stride",
@@ -9959,7 +9959,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
         "state_size": ParameterMetadata(
             ui_display_name=None,
@@ -10488,7 +10488,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
         "stacked_layers": ParameterMetadata(
             ui_display_name=None,
@@ -11031,7 +11031,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
         "state_size": ParameterMetadata(
             ui_display_name=None,
@@ -11531,7 +11531,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
         "transformer_output_size": ParameterMetadata(
             ui_display_name="Transformer Output Size",
@@ -11919,7 +11919,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "TransformerXLEncoder": {
@@ -12409,7 +12409,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "ViTEncoder": {
@@ -13253,7 +13253,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "XLMRoBERTaEncoder": {
@@ -13441,7 +13441,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "XLNetEncoder": {
@@ -13951,7 +13951,7 @@
             commonly_used=False,
             expected_impact=ExpectedImpact.UNKNOWN,
             literature_references=None,
-            internal_only=False,
+            internal_only=True,
         ),
     },
     "encoder": {
diff --git a/ludwig/schema/preprocessing.py b/ludwig/schema/preprocessing.py
index df1a9efd922..a9abaa76d12 100644
--- a/ludwig/schema/preprocessing.py
+++ b/ludwig/schema/preprocessing.py
@@ -49,6 +49,6 @@ def get_preprocessing_jsonschema():
         "type": "object",
         "properties": props,
         "additionalProperties": False,
-        "title": "type",
+        "title": "global_preprocessing_options",
         "description": "Select the preprocessing type.",
     }
diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py
index 2f5924575be..b06bb18bb14 100644
--- a/ludwig/schema/trainer.py
+++ b/ludwig/schema/trainer.py
@@ -521,7 +521,7 @@ def get_model_type_jsonschema():
         "type": "string",
         "enum": [MODEL_ECD, MODEL_GBM, "ecd_ray_legacy"],
         "default": MODEL_ECD,
-        "title": "type",
+        "title": "model_type",
         "description": "Select the model type.",
     }
 

From c9a5c0a29bb79599ee5ce60c49f9eed409e0d0c0 Mon Sep 17 00:00:00 2001
From: Daniel Treiman <dan.treiman@gmail.com>
Date: Tue, 18 Oct 2022 12:24:30 -0700
Subject: [PATCH 29/29] Add support for running pytest github action locally
 with act (#2661)

* Got pytest mostly working with act for mac.

* Reverts installing node for act.

* Better name for extra act action.

* Adds .actrc
---
 .actrc                       | 1 +
 .github/workflows/pytest.yml | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 .actrc

diff --git a/.actrc b/.actrc
new file mode 100644
index 00000000000..68c3d775469
--- /dev/null
+++ b/.actrc
@@ -0,0 +1 @@
+-P ubuntu-latest=ludwigai/ludwig-ray
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 678a9bb4ab4..2a6c73eba4b 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -60,6 +60,13 @@ jobs:
 
     timeout-minutes: 80
     steps:
+      - name: Setup ludwigai/ludwig-ray container for local testing with act.
+        if: ${{ env.ACT }}
+        run: |
+          curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
+          sudo apt-get install -y nodejs
+          sudo mkdir -p /opt/hostedtoolcache/
+          sudo chmod 777 -R /opt/hostedtoolcache/
       - uses: actions/checkout@v2
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2