-
Notifications
You must be signed in to change notification settings - Fork 285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: plumb Dataset start/end times through graphQL #194
Changes from 12 commits
e50e432
b9014b5
962bdd9
13f3b95
3f6a198
77d9df4
bfe7ed2
cee6796
bfe257c
3859376
1c46785
6b133c5
3db71b6
a47bb20
da3e96b
158135a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,5 +7,6 @@ module.exports = { | |
noFutureProofEnums: true, | ||
customScalars: { | ||
GlobalID: "String", | ||
"DateTime": "string", | ||
}, | ||
}; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,8 @@ | |
import uuid | ||
from copy import deepcopy | ||
from dataclasses import fields, replace | ||
from typing import Any, Dict, List, Optional, Set, Tuple, Union | ||
from datetime import datetime | ||
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast | ||
|
||
from pandas import DataFrame, Series, Timestamp, read_parquet, to_datetime | ||
from pandas.api.types import is_numeric_dtype | ||
|
@@ -72,6 +73,20 @@ def __init__( | |
self.to_disc() | ||
logger.info(f"""Dataset: {self.__name} initialized""") | ||
|
||
@property | ||
def start_time(self) -> datetime: | ||
"""Returns the datetime of the earliest inference in the dataset""" | ||
ts_col_name: str = cast(str, self.schema.timestamp_column_name) | ||
dt: datetime = self.__dataframe[ts_col_name].min() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this never changes you can use dynamic programming to compute once There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming you're referring to the caching aspect of dp? how about just using the @cached_property annotation? or do we have a preexisting convention for how we'd like to do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh that's great. Very cool |
||
return dt | ||
|
||
@property | ||
def end_time(self) -> datetime: | ||
"""Returns the datetime of the latest inference in the dataset""" | ||
ts_col_name: str = cast(str, self.schema.timestamp_column_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer dynamic programming to compute once |
||
dt: datetime = self.__dataframe[ts_col_name].max() | ||
return dt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Been advocating for non abbreviated variable names right now https://youtu.be/-J3wNP6u5YU There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 -- done |
||
|
||
@property | ||
def dataframe(self) -> DataFrame: | ||
return self.__dataframe | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,23 @@ | ||
from datetime import datetime | ||
|
||
import strawberry | ||
|
||
from phoenix.datasets import Dataset as InternalDataset | ||
|
||
|
||
@strawberry.type | ||
class Dataset: | ||
name: str | ||
start_time: datetime | ||
end_time: datetime | ||
|
||
|
||
def to_gql_dataset(dataset: InternalDataset) -> Dataset: | ||
""" | ||
Converts a phoenix.datasets.Dataset to a phoenix.server.api.types.Dataset | ||
""" | ||
return Dataset( | ||
name=dataset.name, | ||
start_time=dataset.start_time, | ||
end_time=dataset.end_time, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import pytest | ||
from pandas import DataFrame, Timestamp | ||
|
||
from phoenix.datasets import Dataset as InputDataset | ||
from phoenix.datasets import Schema | ||
from phoenix.server.api.types import Dataset | ||
|
||
|
||
@pytest.fixture | ||
def input_dataset(): | ||
input_df = DataFrame( | ||
{ | ||
"prediction_label": ["apple", "orange", "grape"], | ||
"timestamp": [ | ||
Timestamp(year=2023, month=1, day=1, hour=2, second=30), | ||
Timestamp(year=2023, month=1, day=5, hour=4, second=25), | ||
Timestamp(year=2023, month=1, day=10, hour=6, second=20), | ||
], | ||
} | ||
) | ||
|
||
input_schema = Schema( | ||
prediction_label_column_name="prediction_label", | ||
timestamp_column_name="timestamp", | ||
) | ||
return InputDataset(dataframe=input_df, schema=input_schema) | ||
|
||
|
||
def test_dataset_serialization(input_dataset): | ||
converted_gql_dataset = Dataset.to_gql_dataset(input_dataset) | ||
|
||
expected_dataset = input_dataset | ||
assert converted_gql_dataset.start_time == expected_dataset.start_time | ||
assert converted_gql_dataset.end_time == expected_dataset.end_time |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's mapping to another base primitive so it should be uppercase String