Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transformer divisibility error validation #3105

Merged
merged 6 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions ludwig/config_validation/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
SEQUENCE,
SET,
TEXT,
TIMESERIES,
TRAINER,
TYPE,
VECTOR,
Expand Down Expand Up @@ -342,3 +343,27 @@ def check_hf_encoder_requirements(config: "ModelConfig") -> None: # noqa: F821
raise ConfigValidationError(
"Pretrained model name or path must be specified for HuggingFace encoder."
)


@register_config_check
def check_stacked_transformer_requirements(config: "ModelConfig") -> None: # noqa: F821
"""Checks that the transformer encoder type correctly configures `num_heads` and `hidden_size`"""

def is_divisible(hidden_size: int, num_heads: int) -> bool:
"""Checks that hidden_size is divisible by num_heads."""
return hidden_size % num_heads == 0

sequence_types = [SEQUENCE, TEXT, TIMESERIES]

for input_feature in config.input_features:
if_type = input_feature.type
encoder = input_feature.encoder
if (
if_type in sequence_types
and encoder.type == "transformer"
and not is_divisible(encoder.hidden_size, encoder.num_heads)
):
raise ConfigValidationError(
f"Input feature {input_feature.name} transformer encoder requires encoder.hidden_size to be divisible "
f"by encoder.num_heads. Found hidden_size {encoder.hidden_size} and num_heads {encoder.num_heads}."
)
59 changes: 59 additions & 0 deletions tests/ludwig/config_validation/test_validate_config_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pytest

from ludwig.constants import DEFAULTS, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, SEQUENCE, TEXT, TIMESERIES, TYPE
from ludwig.error import ConfigValidationError
from ludwig.schema.model_config import ModelConfig
from tests.integration_tests.utils import (
binary_feature,
number_feature,
sequence_feature,
text_feature,
timeseries_feature,
)


@pytest.mark.parametrize("feature_type", [SEQUENCE, TEXT, TIMESERIES])
def test_default_transformer_encoder(feature_type):
"""Tests that a transformer hyperparameter divisibility error is correctly recognized in feature defaults.

Transformers require that `hidden_size % num_heads == 0`. 9 and 18 were selected as test values because they were
the values from the original error.
"""
config = {
INPUT_FEATURES: [number_feature(), {TYPE: feature_type, NAME: f"test_{feature_type}"}],
OUTPUT_FEATURES: [binary_feature()],
DEFAULTS: {feature_type: {ENCODER: {TYPE: "transformer", "hidden_size": 9, "num_heads": 18}}},
}

with pytest.raises(ConfigValidationError):
m = ModelConfig.from_dict(config)
print(m)

config[DEFAULTS][feature_type][ENCODER]["hidden_size"] = 18
config[DEFAULTS][feature_type][ENCODER]["num_heads"] = 9

ModelConfig.from_dict(config)


@pytest.mark.parametrize("feature_gen", [sequence_feature, text_feature, timeseries_feature])
def test_input_feature_transformer_encoder(feature_gen):
"""Tests that a transformer hyperparameter divisibility error is correctly recognized for a specific feature.

Transformers require that `hidden_size % num_heads == 0`. 9 and 18 were selected as test values because they were
the values from the original error.
"""
config = {
INPUT_FEATURES: [
number_feature(),
feature_gen(**{ENCODER: {TYPE: "transformer", "hidden_size": 9, "num_heads": 18}}),
],
OUTPUT_FEATURES: [binary_feature()],
}

with pytest.raises(ConfigValidationError):
ModelConfig.from_dict(config)

config[INPUT_FEATURES][1][ENCODER]["hidden_size"] = 18
config[INPUT_FEATURES][1][ENCODER]["num_heads"] = 9

ModelConfig.from_dict(config)