Skip to content

Commit

Permalink
feat: time-windowed euclidean distance (#247)
Browse files Browse the repository at this point in the history
* feat: gql interface for timeseries

* format fix

* correct docstrings

* tmp

* tmp

* tmp

* added tests and got them passing

* update graphql schema

* hard-code time range in driftMetric query

* run prettier

* readability improvements

* readability improvements

* update graphql schema

* simplify code to not round to nearest hour

---------

Co-authored-by: Mikyo King <mikyo@arize.com>
  • Loading branch information
axiomofjoy and mikeldking authored Feb 15, 2023
1 parent 2d1834b commit cea246f
Show file tree
Hide file tree
Showing 14 changed files with 750 additions and 58 deletions.
6 changes: 4 additions & 2 deletions app/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,13 @@ type DriftTimeSeries implements TimeSeries {
type EmbeddingDimension implements Node {
id: GlobalID!
name: String!
driftMetric(metric: DriftMetric!, timeRange: TimeRange!): Float
driftTimeSeries(
metric: DriftMetric!

"""The time range of the primary dataset"""
timeRange: TimeRange!
): DriftTimeSeries!
): DriftTimeSeries
UMAPPoints(
"""The time range of the primary dataset to generate the UMAP points for"""
timeRange: TimeRange!
Expand All @@ -78,7 +81,6 @@ type EmbeddingDimension implements Node {
"""UMAP N samples"""
nSamples: Int = 500
): UMAPPoints!
driftMetric(metric: DriftMetric!): Float
}

type EmbeddingDimensionConnection {
Expand Down
8 changes: 7 additions & 1 deletion app/src/components/model/ModelEmbeddingsTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ export function ModelEmbeddingsTable(props: ModelEmbeddingsTable) {
embedding: node {
id
name
euclideanDistance: driftMetric(metric: euclideanDistance)
euclideanDistance: driftMetric(
metric: euclideanDistance
timeRange: {
start: "1970-01-20 02:00:00"
end: "1970-01-20 04:00:00"
}
)
}
}
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 12 additions & 4 deletions app/src/pages/__generated__/HomeQuery.graphql.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions src/phoenix/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ def _get_embedding_feature_column_names(
raise err.SchemaError(err.MissingEmbeddingFeatureColumnNames(embedding_feature_name))
return embedding_feature_column_names[embedding_feature_name]

def get_timestamp_column(self) -> "Series[Any]":
timestamp_column_name = self.schema.timestamp_column_name
if timestamp_column_name is None:
raise err.SchemaError(err.MissingTimestampColumnName())
return self.dataframe[timestamp_column_name]

# TODO(mikeldking): add strong vector type
def get_embedding_vector_column(self, embedding_feature_name: str) -> "Series[Any]":
column_names = self._get_embedding_feature_column_names(embedding_feature_name)
Expand Down
10 changes: 10 additions & 0 deletions src/phoenix/datasets/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ def error_message(self) -> str:
)


class MissingTimestampColumnName(ValidationError):
"""
An error raised when trying to access a timestamp column that is absent from
the schema.
"""

def error_message(self) -> str:
return "Schema is missing timestamp_column_name."


class SchemaError(Exception):
"""An error raised when the Schema is invalid or incomplete"""

Expand Down
8 changes: 4 additions & 4 deletions src/phoenix/metrics/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@


def euclidean_distance(
array0: npt.NDArray[np.float64],
array1: npt.NDArray[np.float64],
pt0: npt.NDArray[np.float64],
pt1: npt.NDArray[np.float64],
) -> Optional[float]:
"""
Computes Euclidean distance between the centroids of two arrays.
Computes Euclidean distance between two points.
"""
return cast(float, euclidean(np.mean(array0, axis=0), np.mean(array1, axis=0)))
return cast(float, euclidean(pt0, pt1))
3 changes: 3 additions & 0 deletions src/phoenix/server/api/input_types/TimeRange.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
class TimeRange:
start: datetime.datetime
end: datetime.datetime

def is_valid(self) -> bool:
return self.start < self.end
2 changes: 1 addition & 1 deletion src/phoenix/server/api/types/DriftTimeSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
class DriftTimeSeries(TimeSeries):
"""A time series of drift metrics"""

pass
...
Loading

0 comments on commit cea246f

Please sign in to comment.