Arize-ai · mikeldking · Mar 9, 2023 · Mar 9, 2023 · Mar 9, 2023 · Mar 9, 2023
diff --git a/README.md b/README.md
@@ -25,12 +25,12 @@ Phoenix provides MLOps insights at lightning speed with zero-config observabilit
 
 **_Phoenix is under active development. APIs may change at any time._**
 
-- [Installation](#installation)
-- [Getting Started](#getting-started)
-- [Documentation](#documentation)
-- [Community](#community)
-- [Contributing](#contributing)
-- [License](#license)
+-   [Installation](#installation)
+-   [Getting Started](#getting-started)
+-   [Documentation](#documentation)
+-   [Community](#community)
+-   [Contributing](#contributing)
+-   [License](#license)
 
 ## Installation
 
@@ -47,15 +47,17 @@ After installing `arize-phoenix` in your Jupyter or Colab environment, open your
 ```python
 import phoenix as px
 
-train_ds, prod_ds = px.load_dataset("sentiment_classification_language_drift")
-px.launch_app(train_ds, prod_ds)
+datasets= px.load_datasets("sentiment_classification_language_drift")
+session = px.launch_app(datasets.primary, reference_dataset)
+session.view()
 ```
 
 Next, visualize your embeddings and inspect problematic clusters of your production data.
 
 TODO(#297): Include GIF where we navigate to embeddings, zoom in and rotate, and select a cluster.
 
 Don't forget to close the app when you're done.
+
 ```
 px.close_app()
 ```
@@ -69,21 +71,23 @@ For in-depth examples and explanations, read the [docs](https://docs.arize.com/p
 ## Community
 
 Join our community to connect with thousands of machine learning practitioners and ML observability enthusiasts.
-- 🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
-- 💡 Ask questions and provide feedback in the *#phoenix-support* channel.
-- 🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
-- 🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
-- 🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
-- 🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
-- ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
-- ✉️ Subscribe to our mailing list. TODO(#294): Add link
-- 🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
-- 👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
+
+-   🌍 Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q).
+-   💡 Ask questions and provide feedback in the _#phoenix-support_ channel.
+-   🌟 Leave a star on our [GitHub](https://github.com/Arize-ai/phoenix).
+-   🐞 Report bugs with [GitHub Issues](https://github.com/Arize-ai/phoenix/issues).
+-   🗺️ Check out our [roadmap](https://github.com/orgs/Arize-ai/projects/45) to see where we're heading next.
+-   🎓 Learn the fundamentals of ML observability with our [introductory](https://arize.com/ml-observability-fundamentals/) and [advanced](https://arize.com/blog-course/) courses.
+-   ✏️ Check out our [blog](https://arize.com/blog/). TODO(#291): Add blog filter for Phoenix
+-   ✉️ Subscribe to our mailing list. TODO(#294): Add link
+-   🐦 Follow us on [Twitter](https://twitter.com/ArizePhoenix).
+-   👔 Check out our LinkedIn. TODO(#292): Add link, fix badge
 
 ## Contributing
 
-- 💻 Read our [developer's guide](./DEVELOPMENT.md).
-- 🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the *#phoenix-devs* channel.
+-   💻 Read our [developer's guide](./DEVELOPMENT.md).
+-   🗣️ Join our [Slack community](https://join.slack.com/t/arize-ai/shared_invite/zt-1px8dcmlf-fmThhDFD_V_48oU7ALan4Q) and chat with us in the _#phoenix-devs_ channel.
 
 ## License
+
 Arize-Phoenix is licensed under the [Elastic License 2.0 (ELv2)](./LICENSE).
diff --git a/src/phoenix/__init__.py b/src/phoenix/__init__.py
@@ -1,2 +1,2 @@
-from .datasets import Dataset, EmbeddingColumnNames, Schema
+from .datasets import Dataset, EmbeddingColumnNames, Schema, load_datasets
 from .session.session import close_app, launch_app
diff --git a/src/phoenix/datasets/__init__.py b/src/phoenix/datasets/__init__.py
@@ -1,4 +1,5 @@
 from .dataset import Dataset
+from .fixtures import load_datasets
 from .schema import EmbeddingColumnNames, Schema
 
-__all__ = ["Dataset", "Schema", "EmbeddingColumnNames"]
+__all__ = ["Dataset", "Schema", "EmbeddingColumnNames", "load_datasets"]
diff --git a/src/phoenix/server/fixtures.py → src/phoenix/datasets/fixtures.py b/src/phoenix/server/fixtures.py → src/phoenix/datasets/fixtures.py
@@ -1,11 +1,12 @@
 import logging
 import os
 from dataclasses import dataclass, replace
-from typing import Tuple
+from typing import Dict, Tuple
 
 from pandas import read_parquet
 
-from phoenix.datasets import Dataset, EmbeddingColumnNames, Schema
+from .dataset import Dataset
+from .schema import EmbeddingColumnNames, Schema
 
 logger = logging.getLogger(__name__)
 
@@ -189,23 +190,24 @@ class Fixture:
 NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
 
 
-def download_fixture_if_missing(fixture_name: str) -> None:
+def download_fixture_if_missing(fixture_name: str) -> Tuple[Dataset, Dataset]:
     """
     Downloads primary and reference datasets for a fixture if they are not found
     locally.
     """
     fixture = _get_fixture_by_name(fixture_name=fixture_name)
     primary_dataset_name, reference_dataset_name = get_dataset_names_from_fixture_name(fixture_name)
-    _download_and_persist_dataset_if_missing(
+    primary_dataset = _download_and_persist_dataset_if_missing(
         dataset_name=primary_dataset_name,
         dataset_url=fixture.primary_dataset_url,
         schema=fixture.primary_schema,
     )
-    _download_and_persist_dataset_if_missing(
+    reference_dataset = _download_and_persist_dataset_if_missing(
         dataset_name=reference_dataset_name,
         dataset_url=fixture.reference_dataset_url,
         schema=fixture.reference_schema,
     )
+    return primary_dataset, reference_dataset
 
 
 def get_dataset_names_from_fixture_name(fixture_name: str) -> Tuple[str, str]:
@@ -223,27 +225,62 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
     if the input fixture name does not match any known fixture names.
     """
     if fixture_name not in NAME_TO_FIXTURE:
-        raise ValueError(f'"{fixture_name}" is not a valid fixture name.')
+        valid_fixture_names = ", ".join(NAME_TO_FIXTURE.keys())
+        raise ValueError(f'"{fixture_name}" is invalid. Valid names are: {valid_fixture_names}')
     return NAME_TO_FIXTURE[fixture_name]
 
 
 def _download_and_persist_dataset_if_missing(
     dataset_name: str, dataset_url: str, schema: Schema
-) -> None:
+) -> Dataset:
     """
     Downloads a dataset from the given URL if it is not found locally.
     """
     try:
-        Dataset.from_name(dataset_name)
-        return
+        return Dataset.from_name(dataset_name)
     except FileNotFoundError:
         pass
 
     logger.info(f'Downloading dataset: "{dataset_name}"')
-    Dataset(
+    dataset = Dataset(
         dataframe=read_parquet(dataset_url),
         schema=schema,
         name=dataset_name,
         persist_to_disc=True,
     )
     logger.info("Download complete.")
+    return dataset
+
+
+@dataclass(frozen=True)
+class DatasetDict(Dict[str, Dataset]):
+    """A dictionary of datasets, split out by dataset type (primary, reference)."""
+
+    primary: Dataset
+    reference: Dataset
+
+
+def load_datasets(use_case: str) -> DatasetDict:
+    """
+    Loads the primary and reference datasets for a given use-case.
+
+    Parameters
+    ----------
+        use_case: str
+            Name of the phoenix supported use case
+            Valid values include:
+                - "sentiment_classification_language_drift"
+                - "fashion_mnist"
+                - "ner_token_drift"
+                - "credit_card_fraud"
+                - "click_through_rate"
+
+
+    Returns
+    _______
+        datasets: DatasetDict
+            A dictionary of datasets, split out by dataset type (primary, reference).
+
+    """
+    primary_dataset, reference_dataset = download_fixture_if_missing(use_case)
+    return DatasetDict(primary=primary_dataset, reference=reference_dataset)
diff --git a/src/phoenix/server/main.py b/src/phoenix/server/main.py
@@ -8,12 +8,12 @@
 import uvicorn
 
 import phoenix.config as config
-from phoenix.server.app import create_app
-from phoenix.server.fixtures import (
+from phoenix.datasets.fixtures import (
     FIXTURES,
     download_fixture_if_missing,
     get_dataset_names_from_fixture_name,
 )
+from phoenix.server.app import create_app
 
 logger = logging.getLogger(__name__)
 
@@ -66,7 +66,7 @@ def _get_pid_file() -> str:
         primary_dataset_name, reference_dataset_name = get_dataset_names_from_fixture_name(
             fixture_name
         )
-        print(f'🌎 Downloading fixture "{fixture_name}" if missing')
+        print(f'🌎 Initializing fixture: "{fixture_name}"')
         download_fixture_if_missing(fixture_name)
 
     print(f"1️⃣ primary dataset: {primary_dataset_name}")

diff --git a/tutorials/quickstart.ipynb b/tutorials/quickstart.ipynb
@@ -0,0 +1,78 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# <center>Quickstart Guide</center>\n",
+    "## <center>Gain insights into your model via Phoenix</center>\n",
+    "\n",
+    "Phoenix first and foremost is an application that can run alongside your notebook environment. It takes in up to two sets of data and surfaces up drift, performance, and data quality insights.\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 📚 Install `arize-phoenix` "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -q arize-phoenix"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using a built-in dataset to view the application\n",
+    "\n",
+    "To get familiar with the application itself, the easiest way to get started is to use one of phoenix's example datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import phoenix as px\n",
+    "\n",
+    "# Get the fixture datasets via a specific use case. Some valid values are \"fashion_mnist\", \"sentiment_classification_language_drift\", and \"credit_card_fraud\"\n",
+    "datasets = px.load_datasets(\"sentiment_classification_language_drift\")\n",
+    "session = px.launch_app(datasets.primary, datasets.reference)\n",
+    "session.view()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "phoenix",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.3"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}