Skip to content

Commit

Permalink
feat: Hyperopt schema v0, part 1: Move output feature metrics from fe…
Browse files Browse the repository at this point in the history
…ature classes to feature configs. (#2759)

* add get_output_feature_metrics

* fix
  • Loading branch information
ksbrar authored Nov 18, 2022
1 parent faeba6f commit 007a6a4
Show file tree
Hide file tree
Showing 15 changed files with 91 additions and 86 deletions.
17 changes: 2 additions & 15 deletions ludwig/features/binary_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,7 @@
import numpy as np
import torch

from ludwig.constants import (
ACCURACY,
BINARY,
COLUMN,
HIDDEN,
LOGITS,
LOSS,
NAME,
PREDICTIONS,
PROBABILITIES,
PROBABILITY,
PROC_COLUMN,
ROC_AUC,
)
from ludwig.constants import BINARY, COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROBABILITIES, PROBABILITY, PROC_COLUMN
from ludwig.error import InputDataError
from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
from ludwig.schema.features.binary_feature import BinaryInputFeatureConfig, BinaryOutputFeatureConfig
Expand Down Expand Up @@ -252,7 +239,7 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:


class BinaryOutputFeature(BinaryFeatureMixin, OutputFeature):
metric_functions = {LOSS: None, ACCURACY: None, ROC_AUC: None}
metric_functions = BinaryOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
5 changes: 1 addition & 4 deletions ludwig/features/category_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,10 @@
import torch

from ludwig.constants import (
ACCURACY,
CATEGORY,
COLUMN,
HIDDEN,
HITS_AT_K,
LOGITS,
LOSS,
NAME,
PREDICTIONS,
PROBABILITIES,
Expand Down Expand Up @@ -240,7 +237,7 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:


class CategoryOutputFeature(CategoryFeatureMixin, OutputFeature):
metric_functions = {LOSS: None, ACCURACY: None, HITS_AT_K: None}
metric_functions = CategoryOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
25 changes: 2 additions & 23 deletions ludwig/features/number_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,7 @@
import torch
from torch import nn

from ludwig.constants import (
COLUMN,
HIDDEN,
LOGITS,
LOSS,
MEAN_ABSOLUTE_ERROR,
MEAN_SQUARED_ERROR,
NAME,
NUMBER,
PREDICTIONS,
PROC_COLUMN,
R2,
ROOT_MEAN_SQUARED_ERROR,
ROOT_MEAN_SQUARED_PERCENTAGE_ERROR,
)
from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, NUMBER, PREDICTIONS, PROC_COLUMN
from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
from ludwig.schema.features.number_feature import NumberInputFeatureConfig, NumberOutputFeatureConfig
from ludwig.utils import output_feature_utils
Expand Down Expand Up @@ -329,14 +315,7 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:


class NumberOutputFeature(NumberFeatureMixin, OutputFeature):
metric_functions = {
LOSS: None,
MEAN_SQUARED_ERROR: None,
MEAN_ABSOLUTE_ERROR: None,
ROOT_MEAN_SQUARED_ERROR: None,
ROOT_MEAN_SQUARED_PERCENTAGE_ERROR: None,
R2: None,
}
metric_functions = NumberOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
15 changes: 1 addition & 14 deletions ludwig/features/sequence_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,14 @@

from ludwig.constants import (
COLUMN,
EDIT_DISTANCE,
LAST_ACCURACY,
LAST_PREDICTIONS,
LENGTHS,
LOSS,
NAME,
PERPLEXITY,
PREDICTIONS,
PROBABILITIES,
PROBABILITY,
PROC_COLUMN,
SEQUENCE,
SEQUENCE_ACCURACY,
TOKEN_ACCURACY,
)
from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities
Expand Down Expand Up @@ -295,14 +289,7 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:


class SequenceOutputFeature(SequenceFeatureMixin, OutputFeature):
metric_functions = {
LOSS: None,
TOKEN_ACCURACY: None,
SEQUENCE_ACCURACY: None,
LAST_ACCURACY: None,
PERPLEXITY: None,
EDIT_DISTANCE: None,
}
metric_functions = SequenceOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
4 changes: 2 additions & 2 deletions ludwig/features/set_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import torch

from ludwig.constants import COLUMN, HIDDEN, JACCARD, LOGITS, LOSS, NAME, PREDICTIONS, PROBABILITIES, PROC_COLUMN, SET
from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROBABILITIES, PROC_COLUMN, SET
from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule
from ludwig.features.feature_utils import set_str_to_idx
from ludwig.schema.features.set_feature import SetInputFeatureConfig, SetOutputFeatureConfig
Expand Down Expand Up @@ -228,7 +228,7 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:


class SetOutputFeature(SetFeatureMixin, OutputFeature):
metric_functions = {LOSS: None, JACCARD: None}
metric_functions = SetOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
7 changes: 1 addition & 6 deletions ludwig/features/text_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,14 @@

from ludwig.constants import (
COLUMN,
EDIT_DISTANCE,
LAST_ACCURACY,
LAST_PREDICTIONS,
LENGTHS,
LOSS,
NAME,
PERPLEXITY,
PREDICTIONS,
PROBABILITIES,
PROBABILITY,
PROC_COLUMN,
TEXT,
TOKEN_ACCURACY,
)
from ludwig.features.base_feature import BaseFeatureMixin, OutputFeature
from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities
Expand Down Expand Up @@ -224,7 +219,7 @@ def create_preproc_module(metadata: Dict[str, Any]) -> torch.nn.Module:


class TextOutputFeature(TextFeatureMixin, SequenceOutputFeature):
metric_functions = {LOSS: None, TOKEN_ACCURACY: None, LAST_ACCURACY: None, PERPLEXITY: None, EDIT_DISTANCE: None}
metric_functions = TextOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
17 changes: 2 additions & 15 deletions ludwig/features/vector_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,7 @@
import numpy as np
import torch

from ludwig.constants import (
COLUMN,
ERROR,
HIDDEN,
LOGITS,
LOSS,
MEAN_ABSOLUTE_ERROR,
MEAN_SQUARED_ERROR,
NAME,
PREDICTIONS,
PROC_COLUMN,
R2,
VECTOR,
)
from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROC_COLUMN, VECTOR
from ludwig.features.base_feature import InputFeature, OutputFeature, PredictModule
from ludwig.schema.features.vector_feature import VectorInputFeatureConfig, VectorOutputFeatureConfig
from ludwig.utils import output_feature_utils
Expand Down Expand Up @@ -173,7 +160,7 @@ def get_schema_cls():


class VectorOutputFeature(VectorFeatureMixin, OutputFeature):
metric_functions = {LOSS: None, ERROR: None, MEAN_SQUARED_ERROR: None, MEAN_ABSOLUTE_ERROR: None, R2: None}
metric_functions = VectorOutputFeatureConfig.get_output_metric_functions()

def __init__(
self,
Expand Down
6 changes: 6 additions & 0 deletions ludwig/schema/features/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from abc import abstractmethod
from typing import List

from marshmallow_dataclass import dataclass
Expand Down Expand Up @@ -135,3 +136,8 @@ class BaseOutputFeatureConfig(BaseFeatureConfig):
description="Size of the input to the decoder.",
parameter_metadata=ParameterMetadata(internal_only=True),
)

@staticmethod
@abstractmethod
def get_output_metric_functions():
pass
6 changes: 5 additions & 1 deletion ludwig/schema/features/binary_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from marshmallow_dataclass import dataclass

from ludwig.constants import BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, ROC_AUC
from ludwig.constants import ACCURACY, BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, LOSS, ROC_AUC
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -102,3 +102,7 @@ class BinaryOutputFeatureConfig(BaseOutputFeatureConfig, BinaryOutputFeatureConf
description="The threshold used to convert output probabilities to predictions. Predicted probabilities greater"
"than or equal to threshold are mapped to True.",
)

@staticmethod
def get_output_metric_functions():
return {LOSS: None, ACCURACY: None, ROC_AUC: None}
6 changes: 5 additions & 1 deletion ludwig/schema/features/category_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from marshmallow_dataclass import dataclass

from ludwig.constants import ACCURACY, CATEGORY, SOFTMAX_CROSS_ENTROPY
from ludwig.constants import ACCURACY, CATEGORY, HITS_AT_K, LOSS, SOFTMAX_CROSS_ENTROPY
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -103,3 +103,7 @@ class CategoryOutputFeatureConfig(BaseOutputFeatureConfig, CategoryOutputFeature
"measure. It computes accuracy but considering as a match if the true category appears in the "
"first k predicted categories ranked by decoder's confidence.",
)

@staticmethod
def get_output_metric_functions():
return {LOSS: None, ACCURACY: None, HITS_AT_K: None}
21 changes: 20 additions & 1 deletion ludwig/schema/features/number_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@

from marshmallow_dataclass import dataclass

from ludwig.constants import MEAN_SQUARED_ERROR, NUMBER
from ludwig.constants import (
LOSS,
MEAN_ABSOLUTE_ERROR,
MEAN_SQUARED_ERROR,
NUMBER,
R2,
ROOT_MEAN_SQUARED_ERROR,
ROOT_MEAN_SQUARED_PERCENTAGE_ERROR,
)
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -101,3 +109,14 @@ class NumberOutputFeatureConfig(BaseOutputFeatureConfig, NumberOutputFeatureConf
)

preprocessing: BasePreprocessingConfig = PreprocessingDataclassField(feature_type="number_output")

@staticmethod
def get_output_metric_functions():
return {
LOSS: None,
MEAN_SQUARED_ERROR: None,
MEAN_ABSOLUTE_ERROR: None,
ROOT_MEAN_SQUARED_ERROR: None,
ROOT_MEAN_SQUARED_PERCENTAGE_ERROR: None,
R2: None,
}
22 changes: 21 additions & 1 deletion ludwig/schema/features/sequence_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from marshmallow_dataclass import dataclass

from ludwig.constants import LOSS, SEQUENCE, SEQUENCE_SOFTMAX_CROSS_ENTROPY
from ludwig.constants import (
EDIT_DISTANCE,
LAST_ACCURACY,
LOSS,
PERPLEXITY,
SEQUENCE,
SEQUENCE_ACCURACY,
SEQUENCE_SOFTMAX_CROSS_ENTROPY,
TOKEN_ACCURACY,
)
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -91,3 +100,14 @@ class SequenceOutputFeatureConfig(BaseOutputFeatureConfig, SequenceOutputFeature
description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
"dimension (second if you count the batch dimension)",
)

@staticmethod
def get_output_metric_functions():
return {
LOSS: None,
TOKEN_ACCURACY: None,
SEQUENCE_ACCURACY: None,
LAST_ACCURACY: None,
PERPLEXITY: None,
EDIT_DISTANCE: None,
}
6 changes: 5 additions & 1 deletion ludwig/schema/features/set_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from marshmallow_dataclass import dataclass

from ludwig.constants import JACCARD, SET, SIGMOID_CROSS_ENTROPY
from ludwig.constants import JACCARD, LOSS, SET, SIGMOID_CROSS_ENTROPY
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -97,3 +97,7 @@ class SetOutputFeatureConfig(BaseOutputFeatureConfig, SetOutputFeatureConfigMixi
description="The threshold used to convert output probabilities to predictions. Tokens with predicted"
"probabilities greater than or equal to threshold are predicted to be in the output set (True).",
)

@staticmethod
def get_output_metric_functions():
return {LOSS: None, JACCARD: None}
14 changes: 13 additions & 1 deletion ludwig/schema/features/text_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from marshmallow_dataclass import dataclass

from ludwig.constants import LOSS, SEQUENCE_SOFTMAX_CROSS_ENTROPY, TEXT
from ludwig.constants import (
EDIT_DISTANCE,
LAST_ACCURACY,
LOSS,
PERPLEXITY,
SEQUENCE_SOFTMAX_CROSS_ENTROPY,
TEXT,
TOKEN_ACCURACY,
)
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -96,3 +104,7 @@ class TextOutputFeatureConfig(BaseOutputFeatureConfig, TextOutputFeatureConfigMi
description="How to reduce an input that is not a vector, but a matrix or a higher order tensor, on the first "
"dimension (second if you count the batch dimension)",
)

@staticmethod
def get_output_metric_functions():
return {LOSS: None, TOKEN_ACCURACY: None, LAST_ACCURACY: None, PERPLEXITY: None, EDIT_DISTANCE: None}
6 changes: 5 additions & 1 deletion ludwig/schema/features/vector_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from marshmallow_dataclass import dataclass

from ludwig.constants import MEAN_SQUARED_ERROR, VECTOR
from ludwig.constants import ERROR, LOSS, MEAN_ABSOLUTE_ERROR, MEAN_SQUARED_ERROR, R2, VECTOR
from ludwig.schema import utils as schema_utils
from ludwig.schema.decoders.base import BaseDecoderConfig
from ludwig.schema.decoders.utils import DecoderDataclassField
Expand Down Expand Up @@ -101,3 +101,7 @@ class VectorOutputFeatureConfig(BaseOutputFeatureConfig, VectorOutputFeatureConf
allow_none=True,
description="The size of the vector. If None, the vector size will be inferred from the data.",
)

@staticmethod
def get_output_metric_functions():
return {LOSS: None, ERROR: None, MEAN_SQUARED_ERROR: None, MEAN_ABSOLUTE_ERROR: None, R2: None}

0 comments on commit 007a6a4

Please sign in to comment.