Skip to content

Commit

Permalink
Feature: add List and Frequent Items Comparison Operators (#31)
Browse files Browse the repository at this point in the history
* Adding FrequentItems and ListComparison
* Adding ComparisonConfig
  • Loading branch information
murilommen committed Jul 20, 2023
1 parent d64c30a commit 490de9e
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 11 deletions.
43 changes: 43 additions & 0 deletions examples/presets.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,49 @@ manager = MonitorManager(setup=monitor_setup)
manager.save()
```

### List Comparison

```python
from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
from whylabs_toolkit.monitor.models import *

setup = MonitorSetup(monitor_id="monitor_list_comparison")
setup.config = ListComparisonConfig(
operator=ListComparisonOperator.in_list,
expected=[
ExpectedValue(
str="expected"
),
ExpectedValue(
int=123229
)
],
baseline=TrailingWindowBaseline(size=7),
metric=SimpleColumnMetric.count_bool
)
setup.apply()

mm = MonitorManager(setup=setup)
mm.save()
```

### Frequent Items
```python
from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
from whylabs_toolkit.monitor.models import *


setup = MonitorSetup(monitor_id="frequent_items")
setup.config = FrequentStringComparisonConfig(
operator=FrequentStringComparisonOperator.eq,
baseline=TrailingWindowBaseline(size=7)
)
setup.apply()

mm = MonitorManager(setup=setup)
mm.save()
```

## Model Performance

### F1 Score
Expand Down
18 changes: 17 additions & 1 deletion whylabs_toolkit/monitor/manager/monitor_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def __init__(self, monitor_id: str, dataset_id: Optional[str] = None, config: Co
FixedThresholdsConfig,
StddevConfig,
DriftConfig,
ComparisonConfig,
SeasonalConfig,
FrequentStringComparisonConfig,
ListComparisonConfig,
]
] = None
self._target_columns: Optional[List[str]] = []
Expand Down Expand Up @@ -100,7 +103,18 @@ def target_matrix(self, target: Union[ColumnMatrix, DatasetMatrix]) -> None:
@property
def config(
self,
) -> Optional[Union[DiffConfig, FixedThresholdsConfig, StddevConfig, DriftConfig, SeasonalConfig,]]:
) -> Optional[
Union[
DiffConfig,
FixedThresholdsConfig,
StddevConfig,
DriftConfig,
ComparisonConfig,
SeasonalConfig,
FrequentStringComparisonConfig,
ListComparisonConfig,
]
]:
return self._analyzer_config

@config.setter
Expand All @@ -112,6 +126,8 @@ def config(
StddevConfig,
DriftConfig,
SeasonalConfig,
FrequentStringComparisonConfig,
ListComparisonConfig,
],
) -> None:
self._analyzer_config = config
Expand Down
5 changes: 5 additions & 0 deletions whylabs_toolkit/monitor/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
"DriftConfig",
"ComparisonConfig",
"ComparisonOperator",
"FrequentStringComparisonConfig",
"FrequentStringComparisonOperator",
"ListComparisonOperator",
"ListComparisonConfig",
"ExperimentalConfig",
"FixedThresholdsConfig",
"ColumnListChangeConfig",
Expand Down Expand Up @@ -73,4 +77,5 @@
"DatasetMetric",
"SimpleColumnMetric",
"ComplexMetrics",
"ExpectedValue",
]
5 changes: 5 additions & 0 deletions whylabs_toolkit/monitor/models/analyzer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
"FixedThresholdsConfig",
"ColumnListChangeConfig",
"SeasonalConfig",
"ListComparisonConfig",
"FrequentStringComparisonConfig",
"StddevConfig",
# enums
"DiffMode",
Expand All @@ -34,8 +36,11 @@
"DatasetMetric",
"SimpleColumnMetric",
"ComplexMetrics",
"ListComparisonOperator",
"FrequentStringComparisonOperator",
# targets
"DatasetMatrix",
"ColumnMatrix",
"TargetLevel",
"ExpectedValue",
]
56 changes: 47 additions & 9 deletions whylabs_toolkit/monitor/models/analyzer/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class AlgorithmType(str, Enum):
expected = "expected"
column_list = "column_list"
comparison = "comparison"
list_comparison = "list_comparison"
frequent_string_comparison = "frequent_string_comparison"
diff = "diff"
drift = "drift"
stddev = "stddev"
Expand Down Expand Up @@ -148,13 +150,26 @@ class ComparisonOperator(str, Enum):
"""Operators for performing a comparison."""

eq = "eq"
# Not Yet Implemented:
# gt = "gt"
# lt = "lt"
# ge = "ge"
# le = "le"


# Not Yet Implemented:
# gt = "gt"
# lt = "lt"
# ge = "ge"
# le = "le"
class ListComparisonOperator(str, Enum):
"""Operators for performing a comparison."""

in_list = "in"
not_in_list = "not_in"


class FrequentStringComparisonOperator(str, Enum):
"""Operators for performing a comparison."""

eq = "eq"
target_includes_all_baseline = "target_includes_all_baseline"
baseline_includes_all_target = "baseline_includes_all_target"


class ComparisonConfig(AlgorithmConfig):
Expand Down Expand Up @@ -248,9 +263,7 @@ class SeasonalConfig(_ThresholdBaseConfig):
"""

type: Literal[AlgorithmType.seasonal] = AlgorithmType.seasonal
algorithm: Literal["arima", "rego", "stastforecast"] = Field(
"arima", description="The algorithm implementation for seasonal analysis"
)
algorithm: Literal["arima"] = Field("arima", description="The algorithm implementation for seasonal analysis")
minBatchSize: Optional[int] = Field(
30,
title="MinBatchSize",
Expand Down Expand Up @@ -287,7 +300,7 @@ class DriftConfig(AlgorithmConfig):
"""

type: Literal[AlgorithmType.drift] = AlgorithmType.drift
algorithm: Literal["hellinger", "ks_test", "kl_divergence", "variation_distance"] = Field(
algorithm: Literal["hellinger", "jensenshannon", "kl_divergence", "psi"] = Field(
"hellinger", description="The algorithm to use when calculating drift."
)
metric: Literal[ComplexMetrics.histogram, ComplexMetrics.frequent_items]
Expand All @@ -314,6 +327,31 @@ class ExperimentalConfig(AlgorithmConfig):
stub: Optional[AlgorithmType] = Field(description="Stub field to flow algoirthm type into the schema. Do not use.")


class ListComparisonConfig(AlgorithmConfig):
"""Compare a target list of values against a baseline list of values."""

type: Literal[AlgorithmType.list_comparison] = AlgorithmType.list_comparison
operator: ListComparisonOperator = Field(
description="The operator for the comparison. The right hand side is the target batch's metric. The left hand"
"side is the expected value or a baseline's metric."
)
expected: Optional[List[ExpectedValue]] = Field(
None,
description="The expected values of the equality. If the value is not set we will extract the corresponding "
"metric from the baseline and perform the comparison",
)
baseline: Optional[Union[TrailingWindowBaseline, ReferenceProfileId, TimeRangeBaseline, SingleBatchBaseline]]


class FrequentStringComparisonConfig(AlgorithmConfig):
"""Compare whether target against a list of values."""

type: Literal[AlgorithmType.frequent_string_comparison] = AlgorithmType.frequent_string_comparison
metric: Literal[ComplexMetrics.frequent_items] = ComplexMetrics.frequent_items
operator: FrequentStringComparisonOperator = Field(description="The operator for the comparison.")
baseline: Union[TrailingWindowBaseline, ReferenceProfileId, TimeRangeBaseline, SingleBatchBaseline]


class DiffMode(str, Enum):
"""Whether to use the absolute difference or the percentage to calculate the difference."""

Expand Down
6 changes: 5 additions & 1 deletion whylabs_toolkit/monitor/models/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from .algorithms import (
ColumnListChangeConfig,
ComparisonConfig,
ListComparisonConfig,
FrequentStringComparisonConfig,
DiffConfig,
DriftConfig,
ExperimentalConfig,
Expand Down Expand Up @@ -90,15 +92,17 @@ class Analyzer(NoExtrasBaseModel):
)

# NOT YET IMPLEMENTED:
# ComparisonConfig,
# ExperimentalConfig,
# ColumnListChangeConfig,

config: Union[
DiffConfig,
FixedThresholdsConfig,
ListComparisonConfig,
FrequentStringComparisonConfig,
StddevConfig,
DriftConfig,
ComparisonConfig,
SeasonalConfig,
] = Field(description="The configuration map of the analyzer", discriminator="type")

Expand Down

0 comments on commit 490de9e

Please sign in to comment.