Skip to content

Commit

Permalink
Update schema to correctly reflect supported missing value strategies…
Browse files Browse the repository at this point in the history
… for different feature types (#3053)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
arnavgarg1 and pre-commit-ci[bot] authored Feb 8, 2023
1 parent 5846573 commit 0d646ca
Show file tree
Hide file tree
Showing 12 changed files with 34 additions and 28 deletions.
1 change: 0 additions & 1 deletion ludwig/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@
MISSING_VALUE_STRATEGY_OPTIONS = [
FILL_WITH_CONST,
FILL_WITH_MODE,
FILL_WITH_MEAN,
BFILL,
FFILL,
DROP_ROW,
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/bag.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import BAG, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.constants import BAG, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -26,7 +26,7 @@ class BagPreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a set column",
parameter_metadata=FEATURE_METADATA[BAG][PREPROCESSING]["missing_value_strategy"],
Expand Down
8 changes: 4 additions & 4 deletions ludwig/schema/features/preprocessing/binary.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Union

from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import BINARY, DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.constants import BINARY, DROP_ROW, FILL_WITH_FALSE, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -17,8 +17,8 @@ class BinaryPreprocessingConfig(BasePreprocessingConfig):
"""BinaryPreprocessingConfig is a dataclass that configures the parameters used for a binary input feature."""

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS + ["fill_with_false"],
default="fill_with_false",
MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_FALSE],
default=FILL_WITH_FALSE,
allow_none=False,
description="What strategy to follow when there's a missing value in a binary column",
parameter_metadata=FEATURE_METADATA[BINARY][PREPROCESSING]["missing_value_strategy"],
Expand Down Expand Up @@ -63,7 +63,7 @@ class BinaryPreprocessingConfig(BasePreprocessingConfig):
@ludwig_dataclass
class BinaryOutputPreprocessingConfig(BinaryPreprocessingConfig):
missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS + ["fill_with_false"],
MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_FALSE],
default=DROP_ROW,
allow_none=False,
description="What strategy to follow when there's a missing value in a binary output feature",
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/category.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import CATEGORY, DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.constants import CATEGORY, DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -17,7 +17,7 @@ class CategoryPreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a category column",
parameter_metadata=FEATURE_METADATA[CATEGORY][PREPROCESSING]["missing_value_strategy"],
Expand Down
6 changes: 3 additions & 3 deletions ludwig/schema/features/preprocessing/date.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import DATE, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.constants import BFILL, DATE, DROP_ROW, FFILL, FILL_WITH_CONST, PREPROCESSING
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -12,8 +12,8 @@
@ludwig_dataclass
class DatePreprocessingConfig(BasePreprocessingConfig):
missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
[FILL_WITH_CONST, BFILL, FFILL, DROP_ROW],
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a date column",
parameter_metadata=FEATURE_METADATA[DATE][PREPROCESSING]["missing_value_strategy"],
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/h3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import H3, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.constants import FILL_WITH_CONST, H3, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -13,7 +13,7 @@
class H3PreprocessingConfig(BasePreprocessingConfig):
missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in an h3 column",
parameter_metadata=FEATURE_METADATA[H3][PREPROCESSING]["missing_value_strategy"],
Expand Down
15 changes: 11 additions & 4 deletions ludwig/schema/features/preprocessing/number.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, NUMBER, PREPROCESSING
from ludwig.constants import (
DROP_ROW,
FILL_WITH_CONST,
FILL_WITH_MEAN,
MISSING_VALUE_STRATEGY_OPTIONS,
NUMBER,
PREPROCESSING,
)
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -14,8 +21,8 @@ class NumberPreprocessingConfig(BasePreprocessingConfig):
"""NumberPreprocessingConfig is a dataclass that configures the parameters used for a number input feature."""

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_MEAN],
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a number column",
parameter_metadata=FEATURE_METADATA[NUMBER][PREPROCESSING]["missing_value_strategy"],
Expand Down Expand Up @@ -50,7 +57,7 @@ class NumberPreprocessingConfig(BasePreprocessingConfig):
@ludwig_dataclass
class NumberOutputPreprocessingConfig(NumberPreprocessingConfig):
missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
MISSING_VALUE_STRATEGY_OPTIONS + [FILL_WITH_MEAN],
default=DROP_ROW,
allow_none=False,
description="What strategy to follow when there's a missing value in a number output feature",
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/sequence.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SEQUENCE
from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SEQUENCE
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand Down Expand Up @@ -75,7 +75,7 @@ class SequencePreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a text column",
parameter_metadata=FEATURE_METADATA[SEQUENCE][PREPROCESSING]["missing_value_strategy"],
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/set.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SET
from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SET
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -24,7 +24,7 @@ class SetPreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a set column",
parameter_metadata=FEATURE_METADATA[SET][PREPROCESSING]["missing_value_strategy"],
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/text.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TEXT
from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TEXT
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand Down Expand Up @@ -86,7 +86,7 @@ class TextPreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a text column",
parameter_metadata=FEATURE_METADATA[TEXT][PREPROCESSING]["missing_value_strategy"],
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/timeseries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TIMESERIES
from ludwig.constants import FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TIMESERIES
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand Down Expand Up @@ -44,7 +44,7 @@ class TimeseriesPreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a text column",
parameter_metadata=FEATURE_METADATA[TIMESERIES][PREPROCESSING]["missing_value_strategy"],
Expand Down
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/vector.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ludwig.api_annotations import DeveloperAPI
from ludwig.constants import DROP_ROW, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, VECTOR
from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, VECTOR
from ludwig.schema import utils as schema_utils
from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig
from ludwig.schema.features.preprocessing.utils import register_preprocessor
Expand All @@ -20,7 +20,7 @@ class VectorPreprocessingConfig(BasePreprocessingConfig):

missing_value_strategy: str = schema_utils.StringOptions(
MISSING_VALUE_STRATEGY_OPTIONS,
default="fill_with_const",
default=FILL_WITH_CONST,
allow_none=False,
description="What strategy to follow when there's a missing value in a vector column",
parameter_metadata=FEATURE_METADATA[VECTOR][PREPROCESSING]["missing_value_strategy"],
Expand Down

0 comments on commit 0d646ca

Please sign in to comment.