ludwig-ai · jeffkinnison · Feb 21, 2023 · Feb 15, 2023 · Feb 15, 2023 · Feb 17, 2023
@@ -23,6 +23,7 @@
     SEQUENCE,
     SET,
     TEXT,
+    TIMESERIES,
     TRAINER,
     TYPE,
     VECTOR,
@@ -342,3 +343,27 @@ def check_hf_encoder_requirements(config: "ModelConfig") -> None:  # noqa: F821
                     raise ConfigValidationError(
                         "Pretrained model name or path must be specified for HuggingFace encoder."
                     )
+
+
+@register_config_check
+def check_stacked_transformer_requirements(config: "ModelConfig") -> None:  # noqa: F821
+    """Checks that the transformer encoder type correctly configures `num_heads` and `hidden_size`"""
+
+    def is_divisible(hidden_size: int, num_heads: int) -> bool:
+        """Checks that hidden_size is divisible by num_heads."""
+        return hidden_size % num_heads == 0
+
+    sequence_types = [SEQUENCE, TEXT, TIMESERIES]
+
+    for input_feature in config.input_features:
+        if_type = input_feature.type
+        encoder = input_feature.encoder
+        if (
+            if_type in sequence_types
+            and encoder.type == "transformer"
+            and not is_divisible(encoder.hidden_size, encoder.num_heads)
+        ):
+            raise ConfigValidationError(
+                f"Input feature {input_feature.name} transformer encoder requires encoder.hidden_size to be divisible "
+                f"by encoder.num_heads. Found hidden_size {encoder.hidden_size} and num_heads {encoder.num_heads}."
+            )
@@ -0,0 +1,59 @@
+import pytest
+
+from ludwig.constants import DEFAULTS, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, SEQUENCE, TEXT, TIMESERIES, TYPE
+from ludwig.error import ConfigValidationError
+from ludwig.schema.model_config import ModelConfig
+from tests.integration_tests.utils import (
+    binary_feature,
+    number_feature,
+    sequence_feature,
+    text_feature,
+    timeseries_feature,
+)
+
+
+@pytest.mark.parametrize("feature_type", [SEQUENCE, TEXT, TIMESERIES])
+def test_default_transformer_encoder(feature_type):
+    """Tests that a transformer hyperparameter divisibility error is correctly recognized in feature defaults.
+
+    Transformers require that `hidden_size % num_heads == 0`. 9 and 18 were selected as test values because they were
+    the values from the original error.
+    """
+    config = {
+        INPUT_FEATURES: [number_feature(), {TYPE: feature_type, NAME: f"test_{feature_type}"}],
+        OUTPUT_FEATURES: [binary_feature()],
+        DEFAULTS: {feature_type: {ENCODER: {TYPE: "transformer", "hidden_size": 9, "num_heads": 18}}},
+    }
+
+    with pytest.raises(ConfigValidationError):
+        m = ModelConfig.from_dict(config)
+        print(m)
+
+    config[DEFAULTS][feature_type][ENCODER]["hidden_size"] = 18
+    config[DEFAULTS][feature_type][ENCODER]["num_heads"] = 9
+
+    ModelConfig.from_dict(config)
+
+
+@pytest.mark.parametrize("feature_gen", [sequence_feature, text_feature, timeseries_feature])
+def test_input_feature_transformer_encoder(feature_gen):
+    """Tests that a transformer hyperparameter divisibility error is correctly recognized for a specific feature.
+
+    Transformers require that `hidden_size % num_heads == 0`. 9 and 18 were selected as test values because they were
+    the values from the original error.
+    """
+    config = {
+        INPUT_FEATURES: [
+            number_feature(),
+            feature_gen(**{ENCODER: {TYPE: "transformer", "hidden_size": 9, "num_heads": 18}}),
+        ],
+        OUTPUT_FEATURES: [binary_feature()],
+    }
+
+    with pytest.raises(ConfigValidationError):
+        ModelConfig.from_dict(config)
+
+    config[INPUT_FEATURES][1][ENCODER]["hidden_size"] = 18
+    config[INPUT_FEATURES][1][ENCODER]["num_heads"] = 9
+
+    ModelConfig.from_dict(config)