diff --git a/ludwig/constants.py b/ludwig/constants.py index 9dac2ea2f11..26a5f2d2c89 100644 --- a/ludwig/constants.py +++ b/ludwig/constants.py @@ -100,7 +100,6 @@ MISSING_VALUE_STRATEGY_OPTIONS = [ FILL_WITH_CONST, FILL_WITH_MODE, - FILL_WITH_MEAN, BFILL, FFILL, DROP_ROW, diff --git a/ludwig/schema/features/preprocessing/bag.py b/ludwig/schema/features/preprocessing/bag.py index b6b7fcb03d3..6f6402b95c7 100644 --- a/ludwig/schema/features/preprocessing/bag.py +++ b/ludwig/schema/features/preprocessing/bag.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BAG, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import BAG, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -26,7 +26,7 @@ class BagPreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a set column", parameter_metadata=FEATURE_METADATA[BAG][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/binary.py b/ludwig/schema/features/preprocessing/binary.py index b3ac179a349..4fd0db8bea9 100644 --- a/ludwig/schema/features/preprocessing/binary.py +++ b/ludwig/schema/features/preprocessing/binary.py @@ -1,7 +1,7 @@ from typing import Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BINARY, DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import BINARY, DROP_ROW, FILL_WITH_FALSE, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -17,8 +17,8 @@ class BinaryPreprocessingConfig(BasePreprocessingConfig): """BinaryPreprocessingConfig is a dataclass that configures the parameters used for a binary input feature.""" missing_value_strategy: str = schema_utils.StringOptions( - MISSING_VALUE_STRATEGY_OPTIONS + ["fill_with_false"], - default="fill_with_false", + MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_FALSE], + default=FILL_WITH_FALSE, allow_none=False, description="What strategy to follow when there's a missing value in a binary column", parameter_metadata=FEATURE_METADATA[BINARY][PREPROCESSING]["missing_value_strategy"], @@ -63,7 +63,7 @@ class BinaryPreprocessingConfig(BasePreprocessingConfig): @ludwig_dataclass class BinaryOutputPreprocessingConfig(BinaryPreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( - MISSING_VALUE_STRATEGY_OPTIONS + ["fill_with_false"], + MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_FALSE], default=DROP_ROW, allow_none=False, description="What strategy to follow when there's a missing value in a binary output feature", diff --git a/ludwig/schema/features/preprocessing/category.py b/ludwig/schema/features/preprocessing/category.py index 7c1a9439cbf..ff7c4c999de 100644 --- a/ludwig/schema/features/preprocessing/category.py +++ b/ludwig/schema/features/preprocessing/category.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import CATEGORY, DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import CATEGORY, DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -17,7 +17,7 @@ class CategoryPreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a category column", parameter_metadata=FEATURE_METADATA[CATEGORY][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/date.py b/ludwig/schema/features/preprocessing/date.py index 132925df12c..597ea8d53be 100644 --- a/ludwig/schema/features/preprocessing/date.py +++ b/ludwig/schema/features/preprocessing/date.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DATE, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import BFILL, DATE, DROP_ROW, FFILL, FILL_WITH_CONST, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -12,8 +12,8 @@ @ludwig_dataclass class DatePreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( - MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + [FILL_WITH_CONST, BFILL, FFILL, DROP_ROW], + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a date column", parameter_metadata=FEATURE_METADATA[DATE][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/h3.py b/ludwig/schema/features/preprocessing/h3.py index 3aa9068bd9e..51b57ee3984 100644 --- a/ludwig/schema/features/preprocessing/h3.py +++ b/ludwig/schema/features/preprocessing/h3.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import H3, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import FILL_WITH_CONST, H3, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -13,7 +13,7 @@ class H3PreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in an h3 column", parameter_metadata=FEATURE_METADATA[H3][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/number.py b/ludwig/schema/features/preprocessing/number.py index 16b0a6b8ab1..01e5f5b3a10 100644 --- a/ludwig/schema/features/preprocessing/number.py +++ b/ludwig/schema/features/preprocessing/number.py @@ -1,5 +1,12 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, NUMBER, PREPROCESSING +from ludwig.constants import ( + DROP_ROW, + FILL_WITH_CONST, + FILL_WITH_MEAN, + MISSING_VALUE_STRATEGY_OPTIONS, + NUMBER, + PREPROCESSING, +) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -14,8 +21,8 @@ class NumberPreprocessingConfig(BasePreprocessingConfig): """NumberPreprocessingConfig is a dataclass that configures the parameters used for a number input feature.""" missing_value_strategy: str = schema_utils.StringOptions( - MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_MEAN], + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a number column", parameter_metadata=FEATURE_METADATA[NUMBER][PREPROCESSING]["missing_value_strategy"], @@ -50,7 +57,7 @@ class NumberPreprocessingConfig(BasePreprocessingConfig): @ludwig_dataclass class NumberOutputPreprocessingConfig(NumberPreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( - MISSING_VALUE_STRATEGY_OPTIONS, + MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_MEAN], default=DROP_ROW, allow_none=False, description="What strategy to follow when there's a missing value in a number output feature", diff --git a/ludwig/schema/features/preprocessing/sequence.py b/ludwig/schema/features/preprocessing/sequence.py index 59272ff46b4..210ffff1ce1 100644 --- a/ludwig/schema/features/preprocessing/sequence.py +++ b/ludwig/schema/features/preprocessing/sequence.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SEQUENCE +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SEQUENCE from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -75,7 +75,7 @@ class SequencePreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a text column", parameter_metadata=FEATURE_METADATA[SEQUENCE][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/set.py b/ludwig/schema/features/preprocessing/set.py index f5ca286111e..9d9ef513dcd 100644 --- a/ludwig/schema/features/preprocessing/set.py +++ b/ludwig/schema/features/preprocessing/set.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SET +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SET from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -24,7 +24,7 @@ class SetPreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a set column", parameter_metadata=FEATURE_METADATA[SET][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/text.py b/ludwig/schema/features/preprocessing/text.py index a14779b7e74..9f2505bf62f 100644 --- a/ludwig/schema/features/preprocessing/text.py +++ b/ludwig/schema/features/preprocessing/text.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TEXT +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TEXT from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -86,7 +86,7 @@ class TextPreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a text column", parameter_metadata=FEATURE_METADATA[TEXT][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/timeseries.py b/ludwig/schema/features/preprocessing/timeseries.py index 2f2d8a51cda..58a0ae0f8b6 100644 --- a/ludwig/schema/features/preprocessing/timeseries.py +++ b/ludwig/schema/features/preprocessing/timeseries.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TIMESERIES +from ludwig.constants import FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TIMESERIES from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -44,7 +44,7 @@ class TimeseriesPreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a text column", parameter_metadata=FEATURE_METADATA[TIMESERIES][PREPROCESSING]["missing_value_strategy"], diff --git a/ludwig/schema/features/preprocessing/vector.py b/ludwig/schema/features/preprocessing/vector.py index 1ab7b6e1aad..ca83daa14c3 100644 --- a/ludwig/schema/features/preprocessing/vector.py +++ b/ludwig/schema/features/preprocessing/vector.py @@ -1,5 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, VECTOR +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, VECTOR from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor @@ -20,7 +20,7 @@ class VectorPreprocessingConfig(BasePreprocessingConfig): missing_value_strategy: str = schema_utils.StringOptions( MISSING_VALUE_STRATEGY_OPTIONS, - default="fill_with_const", + default=FILL_WITH_CONST, allow_none=False, description="What strategy to follow when there's a missing value in a vector column", parameter_metadata=FEATURE_METADATA[VECTOR][PREPROCESSING]["missing_value_strategy"],