Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

serializable presets #1254

Merged
merged 10 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/evidently/base_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ def result_type(cls) -> Type[MetricResult]:
return typing_inspect.get_args(next(b for b in cls.__orig_bases__ if typing_inspect.is_generic_type(b)))[0]


class BasePreset(EvidentlyBaseModel):
class Config:
is_base_type = True


class Metric(WithTestAndMetricDependencies, Generic[TResult], metaclass=WithResultFieldPathMetaclass):
_context: Optional["Context"] = None

Expand Down
3 changes: 3 additions & 0 deletions src/evidently/features/generated_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ def on(self, columns: List[str]) -> "ColumnName":


class FeatureDescriptor(EvidentlyBaseModel):
class Config:
is_base_type = True

display_name: Optional[str] = None

def for_column(self, column_name: str) -> "ColumnName":
Expand Down
9 changes: 6 additions & 3 deletions src/evidently/metric_preset/classification_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List
from typing import Optional

from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import ClassificationClassBalance
from evidently.metrics import ClassificationClassSeparationPlot
Expand Down Expand Up @@ -35,13 +36,15 @@ class ClassificationPreset(MetricPreset):
def __init__(
self, columns: Optional[List[str]] = None, probas_threshold: Optional[float] = None, k: Optional[int] = None
):
super().__init__()
self.columns = columns
self.probas_threshold = probas_threshold
self.k = k
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
result = [
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
result: List[AnyMetric] = [
ClassificationQualityMetric(probas_threshold=self.probas_threshold, k=self.k),
ClassificationClassBalance(),
ClassificationConfusionMatrix(probas_threshold=self.probas_threshold, k=self.k),
Expand Down
13 changes: 8 additions & 5 deletions src/evidently/metric_preset/data_drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional

from evidently.calculations.stattests import PossibleStatTestType
from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import DataDriftTable
from evidently.metrics import DatasetDriftMetric
Expand Down Expand Up @@ -54,7 +55,6 @@ def __init__(
text_stattest_threshold: Optional[float] = None,
per_column_stattest_threshold: Optional[Dict[str, float]] = None,
):
super().__init__()
self.columns = columns
self.embeddings = embeddings
self.embeddings_drift_method = embeddings_drift_method
Expand All @@ -69,9 +69,12 @@ def __init__(
self.num_stattest_threshold = num_stattest_threshold
self.text_stattest_threshold = text_stattest_threshold
self.per_column_stattest_threshold = per_column_stattest_threshold
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
result = [
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
result: List[AnyMetric] = [
DatasetDriftMetric(
columns=self.columns,
drift_share=self.drift_share,
Expand Down Expand Up @@ -103,7 +106,7 @@ def generate_metrics(self, data_definition: DataDefinition, additional_data: Opt
embeddings_data = data_definition.embeddings
if embeddings_data is None:
return result
result = add_emb_drift_to_reports(
result, embeddings_data, self.embeddings, self.embeddings_drift_method, EmbeddingsDriftMetric
result += add_emb_drift_to_reports(
embeddings_data, self.embeddings, self.embeddings_drift_method, EmbeddingsDriftMetric
)
return result
7 changes: 5 additions & 2 deletions src/evidently/metric_preset/data_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List
from typing import Optional

from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import ColumnSummaryMetric
from evidently.metrics import DatasetSummaryMetric
Expand All @@ -27,10 +28,12 @@ class DataQualityPreset(MetricPreset):
columns: Optional[List[str]]

def __init__(self, columns: Optional[List[str]] = None):
super().__init__()
self.columns = columns
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
return [
DatasetSummaryMetric(),
generate_column_metrics(ColumnSummaryMetric, columns=self.columns, skip_id_column=True),
Expand Down
16 changes: 11 additions & 5 deletions src/evidently/metric_preset/metric_preset.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
import abc
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

from evidently.base_metric import BasePreset
from evidently.base_metric import Metric
from evidently.utils.data_preprocessing import DataDefinition
from evidently.utils.generators import BaseGenerator

AnyMetric = Union[Metric, BaseGenerator[Metric]]

class MetricPreset:
"""Base class for metric presets"""

def __init__(self):
pass
class MetricPreset(BasePreset):
"""Base class for metric presets"""

@abc.abstractmethod
def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
raise NotImplementedError()
9 changes: 6 additions & 3 deletions src/evidently/metric_preset/recsys.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional
from typing import Union

from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import DiversityMetric
from evidently.metrics import FBetaTopKMetric
Expand Down Expand Up @@ -69,7 +70,6 @@ def __init__(
user_bias_columns: Optional[List[str]] = None,
item_bias_columns: Optional[List[str]] = None,
):
super().__init__()
self.k = k
self.min_rel_score = min_rel_score
self.no_feedback_users = no_feedback_users
Expand All @@ -79,12 +79,15 @@ def __init__(
self.item_features = item_features
self.user_bias_columns = user_bias_columns
self.item_bias_columns = item_bias_columns
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
is_train_data = False
if additional_data is not None:
is_train_data = "current_train_data" in additional_data.keys()
metrics = [
metrics: List[AnyMetric] = [
PrecisionTopKMetric(k=self.k, min_rel_score=self.min_rel_score, no_feedback_users=self.no_feedback_users),
RecallTopKMetric(k=self.k, min_rel_score=self.min_rel_score, no_feedback_users=self.no_feedback_users),
FBetaTopKMetric(k=self.k, min_rel_score=self.min_rel_score, no_feedback_users=self.no_feedback_users),
Expand Down
7 changes: 5 additions & 2 deletions src/evidently/metric_preset/regression_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List
from typing import Optional

from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import RegressionAbsPercentageErrorPlot
from evidently.metrics import RegressionErrorBiasTable
Expand Down Expand Up @@ -34,10 +35,12 @@ class RegressionPreset(MetricPreset):
columns: Optional[List[str]]

def __init__(self, columns: Optional[List[str]] = None):
super().__init__()
self.columns = columns
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
return [
RegressionQualityMetric(),
RegressionPredictedVsActualScatter(),
Expand Down
9 changes: 4 additions & 5 deletions src/evidently/metric_preset/target_drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence

from evidently.base_metric import Metric
from evidently.calculations.stattests import PossibleStatTestType
from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import ColumnCorrelationsMetric
from evidently.metrics import ColumnDriftMetric
Expand Down Expand Up @@ -52,7 +51,6 @@ def __init__(
text_stattest_threshold: Optional[float] = None,
per_column_stattest_threshold: Optional[Dict[str, float]] = None,
):
super().__init__()
self.columns = columns
self.stattest = stattest
self.cat_stattest = cat_stattest
Expand All @@ -64,13 +62,14 @@ def __init__(
self.num_stattest_threshold = num_stattest_threshold
self.text_stattest_threshold = text_stattest_threshold
self.per_column_stattest_threshold = per_column_stattest_threshold
super().__init__()

def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> Sequence[Metric]:
) -> List[AnyMetric]:
target = data_definition.get_target_column()
prediction = data_definition.get_prediction_columns()
result: List[Metric] = []
result: List[AnyMetric] = []
columns_by_target = []

if target is not None:
Expand Down
10 changes: 8 additions & 2 deletions src/evidently/metric_preset/text_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,24 @@
from evidently.descriptors import Sentiment
from evidently.descriptors import TextLength
from evidently.features.generated_features import FeatureDescriptor
from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import ColumnSummaryMetric
from evidently.utils.data_preprocessing import DataDefinition


class TextEvals(MetricPreset):
column_name: str
descriptors: Optional[List[FeatureDescriptor]] = None

def __init__(self, column_name: str, descriptors: Optional[List[FeatureDescriptor]] = None):
super().__init__()
self.column_name: str = column_name
self.descriptors: Optional[List[FeatureDescriptor]] = descriptors
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
descriptors = self.descriptors or [
TextLength(),
SentenceCount(),
Expand Down
10 changes: 7 additions & 3 deletions src/evidently/metric_preset/text_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from evidently.descriptors import Sentiment
from evidently.descriptors import TextLength
from evidently.features.generated_features import FeatureDescriptor
from evidently.metric_preset.metric_preset import AnyMetric
from evidently.metric_preset.metric_preset import MetricPreset
from evidently.metrics import ColumnSummaryMetric
from evidently.utils.data_preprocessing import DataDefinition
Expand All @@ -30,14 +31,14 @@ class TextOverviewPreset(MetricPreset):
"""

columns: List[str]
descriptors: Optional[List[FeatureDescriptor]] = None

def __init__(
self,
column_name: Optional[str] = None,
columns: Optional[List[str]] = None,
descriptors: Optional[List[FeatureDescriptor]] = None,
):
super().__init__()
if column_name is not None and columns is not None:
raise ValueError("Cannot specify both `columns` and `columns`.")
if columns is not None:
Expand All @@ -47,9 +48,12 @@ def __init__(
else:
raise ValueError("Must specify either `columns` or `columns`.")
self.descriptors = descriptors
super().__init__()

def generate_metrics(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
result = []
def generate_metrics(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyMetric]:
result: List[AnyMetric] = []
if self.descriptors is None:
descriptors = [
TextLength(),
Expand Down
2 changes: 1 addition & 1 deletion src/evidently/metrics/base_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def generate_column_metrics(
columns: Optional[Union[str, list]] = None,
parameters: Optional[Dict] = None,
skip_id_column: bool = False,
) -> BaseGenerator:
) -> BaseGenerator[Metric]:
"""Function for generating metrics for columns"""
return make_generator_by_columns(
base_class=metric_class,
Expand Down
2 changes: 1 addition & 1 deletion src/evidently/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def run(
self.metadata[METRIC_GENERATORS] = []
self.metadata[METRIC_GENERATORS].append(item.__class__.__name__) # type: ignore[union-attr]
elif isinstance(item, MetricPreset):
metrics = []
metrics: List[Metric] = []

for metric_item in item.generate_metrics(data_definition, additional_data=additional_data):
if isinstance(metric_item, BaseGenerator):
Expand Down
12 changes: 10 additions & 2 deletions src/evidently/test_preset/classification_binary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

from evidently.calculations.stattests import PossibleStatTestType
from evidently.test_preset.test_preset import AnyTest
from evidently.test_preset.test_preset import TestPreset
from evidently.tests import TestAccuracyScore
from evidently.tests import TestColumnDrift
Expand All @@ -29,18 +31,24 @@ class BinaryClassificationTestPreset(TestPreset):
- `TestAccuracyScore`
"""

stattest: Optional[PossibleStatTestType] = None
stattest_threshold: Optional[float] = None
probas_threshold: Optional[float] = None

def __init__(
self,
stattest: Optional[PossibleStatTestType] = None,
stattest_threshold: Optional[float] = None,
probas_threshold: Optional[float] = None,
):
super().__init__()
self.stattest = stattest
self.stattest_threshold = stattest_threshold
self.probas_threshold = probas_threshold
super().__init__()

def generate_tests(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_tests(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyTest]:
target = data_definition.get_target_column()

if target is None:
Expand Down
9 changes: 7 additions & 2 deletions src/evidently/test_preset/classification_binary_topk.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

from evidently.calculations.stattests import PossibleStatTestType
from evidently.test_preset.test_preset import AnyTest
from evidently.test_preset.test_preset import TestPreset
from evidently.tests import TestAccuracyScore
from evidently.tests import TestColumnDrift
Expand All @@ -29,6 +31,7 @@ class BinaryClassificationTopKTestPreset(TestPreset):
- `TestAccuracyScore`
"""

k: int
stattest: Optional[PossibleStatTestType]
stattest_threshold: Optional[float]

Expand All @@ -38,12 +41,14 @@ def __init__(
stattest: Optional[PossibleStatTestType] = None,
stattest_threshold: Optional[float] = None,
):
super().__init__()
self.k = k
self.stattest = stattest
self.stattest_threshold = stattest_threshold
super().__init__()

def generate_tests(self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]):
def generate_tests(
self, data_definition: DataDefinition, additional_data: Optional[Dict[str, Any]]
) -> List[AnyTest]:
target = data_definition.get_target_column()
if target is None:
raise ValueError("Target column should be set in mapping and be present in data")
Expand Down
Loading
Loading