Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Deprecate datasets module, rename to inferences #2785

Merged
merged 13 commits into from
Apr 8, 2024
Merged
2 changes: 1 addition & 1 deletion examples/anthropic_evals_persona.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
"metadata": {},
"outputs": [],
"source": [
"px.launch_app(px.Dataset(df, schema))"
"px.launch_app(px.Inference(df, schema))"
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@
"outputs": [],
"source": [
"model_name = model_type.split(\"/\")[1]\n",
"ds = px.Dataset(dataframe=post_eval_df, schema=schema, name=model_name)"
"ds = px.Inference(dataframe=post_eval_df, schema=schema, name=model_name)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions examples/dolly-pythia-fine-tuned/dolly_vs_pythia.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@
"metadata": {},
"outputs": [],
"source": [
"pythia_ds = px.Dataset(dataframe=pythia_df, schema=schema, name=\"pythia\")\n",
"dolly_ds = px.Dataset(dataframe=dolly_df, schema=schema, name=\"dolly\")"
"pythia_ds = px.Inference(dataframe=pythia_df, schema=schema, name=\"pythia\")\n",
"dolly_ds = px.Inference(dataframe=dolly_df, schema=schema, name=\"dolly\")"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion examples/taylor_swift_lyrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
"metadata": {},
"outputs": [],
"source": [
"px.launch_app(px.Dataset(df, schema))"
"px.launch_app(px.Inference(df, schema))"
]
},
{
Expand Down
10 changes: 7 additions & 3 deletions src/phoenix/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from typing import Any, Optional

from .datasets.dataset import Dataset
from .datasets.fixtures import ExampleDatasets, load_example
from .datasets.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
from .datasets.fixtures import ExampleDatasets
from .inferences.fixtures import ExampleInferences, load_example
from .inferences.inference import Inference
from .inferences.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
from .session.client import Client
from .session.evaluation import log_evaluations
from .session.session import NotebookEnvironment, Session, active_session, close_app, launch_app
Expand All @@ -33,11 +35,13 @@
__all__ = [
"__version__",
"Dataset",
"ExampleDatasets",
"Inference",
"EmbeddingColumnNames",
"RetrievalEmbeddingColumnNames",
"Schema",
"load_example",
"ExampleDatasets",
"ExampleInferences",
"active_session",
"close_app",
"launch_app",
Expand Down
14 changes: 8 additions & 6 deletions src/phoenix/core/model.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import List, Optional, Union

from phoenix.datasets.dataset import Dataset
from phoenix.datasets.schema import EmbeddingColumnNames, EmbeddingFeatures
from phoenix.inferences.inference import Inference
from phoenix.inferences.schema import EmbeddingColumnNames, EmbeddingFeatures

from .embedding_dimension import EmbeddingDimension


def _get_embedding_dimensions(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can rename these arguments for clarity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to leave renames alone I think, this is a big refactor and I'm already having a tough time keeping it all straight

primary_dataset: Dataset, reference_dataset: Optional[Dataset]
primary_dataset: Inference, reference_dataset: Optional[Inference]
) -> List[EmbeddingDimension]:
embedding_dimensions: List[EmbeddingDimension] = []
embedding_features: EmbeddingFeatures = {}
Expand Down Expand Up @@ -58,8 +58,8 @@ def _get_embedding_dimensions(
def _check_embedding_vector_lengths_match_across_datasets(
embedding_feature_name: str,
embedding_column_names: EmbeddingColumnNames,
primary_dataset: Dataset,
reference_dataset: Dataset,
primary_dataset: Inference,
reference_dataset: Inference,
) -> None:
"""
Ensure that for each embedding feature, the vector lengths match across the primary
Expand All @@ -86,7 +86,9 @@ def _check_embedding_vector_lengths_match_across_datasets(
)


def _get_column_vector_length(dataset: Dataset, embedding_vector_column_name: str) -> Optional[int]:
def _get_column_vector_length(
dataset: Inference, embedding_vector_column_name: str
) -> Optional[int]:
"""
Because a dataset has already been constructed, we can assume that the lengths
of the vectors for any given embedding feature in the dataset are the same.
Expand Down
12 changes: 6 additions & 6 deletions src/phoenix/core/model_schema_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
from pandas.api.types import is_object_dtype
from typing_extensions import TypeAlias, TypeGuard

from phoenix import Dataset, EmbeddingColumnNames
from phoenix import EmbeddingColumnNames, Inference
from phoenix.core.model import _get_embedding_dimensions
from phoenix.core.model_schema import Embedding, Model, RetrievalEmbedding, Schema
from phoenix.datasets.schema import RetrievalEmbeddingColumnNames
from phoenix.datasets.schema import Schema as DatasetSchema
from phoenix.inferences.schema import RetrievalEmbeddingColumnNames
from phoenix.inferences.schema import Schema as DatasetSchema

DatasetName: TypeAlias = str
ColumnName: TypeAlias = str
DisplayName: TypeAlias = str


def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
def create_model_from_datasets(*datasets: Optional[Inference]) -> Model:
# TODO: move this validation into model_schema.Model.
if len(datasets) > 1 and datasets[0] is not None:
# Check that for each embedding dimension all vectors
Expand Down Expand Up @@ -132,8 +132,8 @@ def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
)


def _is_dataset(obj: Optional[Dataset]) -> TypeGuard[Dataset]:
return type(obj) is Dataset
def _is_dataset(obj: Optional[Inference]) -> TypeGuard[Inference]:
return type(obj) is Inference
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These probably deserve to be renamed.



def _take_first_str(iterator: Iterable[str]) -> str:
Expand Down
Loading
Loading