Arize-ai · anticorrelator · Apr 8, 2024 · Apr 4, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/examples/anthropic_evals_persona.ipynb b/examples/anthropic_evals_persona.ipynb
@@ -95,7 +95,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.launch_app(px.Dataset(df, schema))"
+    "px.launch_app(px.Inference(df, schema))"
    ]
   }
  ],

diff --git a/examples/dolly-pythia-fine-tuned/create_data_with_dolly_and_pythia.ipynb b/examples/dolly-pythia-fine-tuned/create_data_with_dolly_and_pythia.ipynb
@@ -596,7 +596,7 @@
    "outputs": [],
    "source": [
     "model_name = model_type.split(\"/\")[1]\n",
-    "ds = px.Dataset(dataframe=post_eval_df, schema=schema, name=model_name)"
+    "ds = px.Inference(dataframe=post_eval_df, schema=schema, name=model_name)"
    ]
   },
   {

diff --git a/examples/dolly-pythia-fine-tuned/dolly_vs_pythia.ipynb b/examples/dolly-pythia-fine-tuned/dolly_vs_pythia.ipynb
@@ -227,8 +227,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pythia_ds = px.Dataset(dataframe=pythia_df, schema=schema, name=\"pythia\")\n",
-    "dolly_ds = px.Dataset(dataframe=dolly_df, schema=schema, name=\"dolly\")"
+    "pythia_ds = px.Inference(dataframe=pythia_df, schema=schema, name=\"pythia\")\n",
+    "dolly_ds = px.Inference(dataframe=dolly_df, schema=schema, name=\"dolly\")"
    ]
   },
   {

diff --git a/examples/taylor_swift_lyrics.ipynb b/examples/taylor_swift_lyrics.ipynb
@@ -110,7 +110,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.launch_app(px.Dataset(df, schema))"
+    "px.launch_app(px.Inference(df, schema))"
    ]
   },
   {

diff --git a/src/phoenix/__init__.py b/src/phoenix/__init__.py
@@ -5,8 +5,10 @@
 from typing import Any, Optional
 
 from .datasets.dataset import Dataset
-from .datasets.fixtures import ExampleDatasets, load_example
-from .datasets.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
+from .datasets.fixtures import ExampleDatasets
+from .inferences.fixtures import ExampleInferences, load_example
+from .inferences.inference import Inference
+from .inferences.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
 from .session.client import Client
 from .session.evaluation import log_evaluations
 from .session.session import NotebookEnvironment, Session, active_session, close_app, launch_app
@@ -33,11 +35,13 @@
 __all__ = [
     "__version__",
     "Dataset",
+    "ExampleDatasets",
+    "Inference",
     "EmbeddingColumnNames",
     "RetrievalEmbeddingColumnNames",
     "Schema",
     "load_example",
-    "ExampleDatasets",
+    "ExampleInferences",
     "active_session",
     "close_app",
     "launch_app",

diff --git a/src/phoenix/core/model.py b/src/phoenix/core/model.py
@@ -1,13 +1,13 @@
 from typing import List, Optional, Union
 
-from phoenix.datasets.dataset import Dataset
-from phoenix.datasets.schema import EmbeddingColumnNames, EmbeddingFeatures
+from phoenix.inferences.inference import Inference
+from phoenix.inferences.schema import EmbeddingColumnNames, EmbeddingFeatures
 
 from .embedding_dimension import EmbeddingDimension
 
 
 def _get_embedding_dimensions(
-    primary_dataset: Dataset, reference_dataset: Optional[Dataset]
+    primary_dataset: Inference, reference_dataset: Optional[Inference]
 ) -> List[EmbeddingDimension]:
     embedding_dimensions: List[EmbeddingDimension] = []
     embedding_features: EmbeddingFeatures = {}
@@ -58,8 +58,8 @@ def _get_embedding_dimensions(
 def _check_embedding_vector_lengths_match_across_datasets(
     embedding_feature_name: str,
     embedding_column_names: EmbeddingColumnNames,
-    primary_dataset: Dataset,
-    reference_dataset: Dataset,
+    primary_dataset: Inference,
+    reference_dataset: Inference,
 ) -> None:
     """
     Ensure that for each embedding feature, the vector lengths match across the primary
@@ -86,7 +86,9 @@ def _check_embedding_vector_lengths_match_across_datasets(
         )
 
 
-def _get_column_vector_length(dataset: Dataset, embedding_vector_column_name: str) -> Optional[int]:
+def _get_column_vector_length(
+    dataset: Inference, embedding_vector_column_name: str
+) -> Optional[int]:
     """
     Because a dataset has already been constructed, we can assume that the lengths
     of the vectors for any given embedding feature in the dataset are the same.

diff --git a/src/phoenix/core/model_schema_adapter.py b/src/phoenix/core/model_schema_adapter.py
@@ -6,18 +6,18 @@
 from pandas.api.types import is_object_dtype
 from typing_extensions import TypeAlias, TypeGuard
 
-from phoenix import Dataset, EmbeddingColumnNames
+from phoenix import EmbeddingColumnNames, Inference
 from phoenix.core.model import _get_embedding_dimensions
 from phoenix.core.model_schema import Embedding, Model, RetrievalEmbedding, Schema
-from phoenix.datasets.schema import RetrievalEmbeddingColumnNames
-from phoenix.datasets.schema import Schema as DatasetSchema
+from phoenix.inferences.schema import RetrievalEmbeddingColumnNames
+from phoenix.inferences.schema import Schema as DatasetSchema
 
 DatasetName: TypeAlias = str
 ColumnName: TypeAlias = str
 DisplayName: TypeAlias = str
 
 
-def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
+def create_model_from_datasets(*datasets: Optional[Inference]) -> Model:
     # TODO: move this validation into model_schema.Model.
     if len(datasets) > 1 and datasets[0] is not None:
         # Check that for each embedding dimension all vectors
@@ -132,8 +132,8 @@ def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
     )
 
 
-def _is_dataset(obj: Optional[Dataset]) -> TypeGuard[Dataset]:
-    return type(obj) is Dataset
+def _is_dataset(obj: Optional[Inference]) -> TypeGuard[Inference]:
+    return type(obj) is Inference
 
 
 def _take_first_str(iterator: Iterable[str]) -> str: