Skip to content

Commit

Permalink
add min max count (#327)
Browse files Browse the repository at this point in the history
  • Loading branch information
RogerHYang authored Mar 3, 2023
1 parent a802866 commit b01b182
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 6 deletions.
3 changes: 3 additions & 0 deletions app/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ enum DataQualityMetric {
percentEmpty
mean
sum
min
max
count
}

type DataQualityTimeSeries implements TimeSeries {
Expand Down
10 changes: 10 additions & 0 deletions src/phoenix/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,16 @@ def calc(self, df: pd.DataFrame) -> Union[float, npt.NDArray[np.float64]]:
)


class Min(UnaryOperator, BaseMetric):
def calc(self, df: pd.DataFrame) -> float:
return cast(float, df.loc[:, self.operand].min())


class Max(UnaryOperator, BaseMetric):
def calc(self, df: pd.DataFrame) -> float:
return cast(float, df.loc[:, self.operand].max())


class Cardinality(UnaryOperator, BaseMetric):
def calc(self, df: pd.DataFrame) -> int:
return df.loc[:, self.operand].nunique()
Expand Down
3 changes: 3 additions & 0 deletions src/phoenix/server/api/types/DataQualityMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ class DataQualityMetric(Enum):
percentEmpty = "PercentEmpty"
mean = "Mean"
sum = "Sum"
min = "Min"
max = "Max"
count = "Count"
13 changes: 7 additions & 6 deletions src/phoenix/server/api/types/Dimension.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from datetime import timedelta
from typing import List, Optional

Expand Down Expand Up @@ -34,12 +35,12 @@ class Dimension(Node):
async def dataQualityMetric(
self, metric: DataQualityMetric, info: Info[Context, None]
) -> Optional[float]:
dimension_name = self.name
if metric is DataQualityMetric.cardinality:
return await info.context.loaders.cardinality.load(dimension_name)
elif metric is DataQualityMetric.percentEmpty:
return await info.context.loaders.percent_empty.load(dimension_name)
raise NotImplementedError(f"Metric {metric} is not implemented.")
metric_cls = METRICS.get(metric.value, None)
if not metric_cls or not issubclass(metric_cls, UnaryOperator):
raise NotImplementedError(f"Metric {metric} is not implemented.")
df = info.context.model.primary_dataset.dataframe
_, ans = metric_cls(self.name)(df)
return None if math.isnan(ans) else ans

@strawberry.field(
description=(
Expand Down

0 comments on commit b01b182

Please sign in to comment.