Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: add LLM fixtures for demo dataset (fine-tuning dataset), fix demo notebook #4286

Merged
merged 18 commits into from
Aug 20, 2024
Merged
9 changes: 8 additions & 1 deletion src/phoenix/trace/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class TracesFixture:
demo_llama_index_rag_fixture = TracesFixture(
name="demo_llama_index_rag",
description="Traces and evaluations of a RAG chatbot using LlamaIndex.",
file_name="demo_llama_index_rag.parquet",
file_name="demo_llama_index_rag_traces.parquet",
evaluation_fixtures=(
EvaluationFixture(
evaluation_name="Q&A Correctness",
Expand All @@ -102,6 +102,12 @@ class TracesFixture:
),
)

demo_llama_index_rag_llm_fixture = TracesFixture(
name="demo_llama_index_rag_llm",
description="LLM traces for RAG chatbot using LlamaIndex.",
file_name="demo_llama_index_llm_all_spans.parquet",
)

llama_index_rag_fixture = TracesFixture(
name="llama_index_rag",
description="Traces from running the llama_index on a RAG use case.",
Expand Down Expand Up @@ -196,6 +202,7 @@ class TracesFixture:

TRACES_FIXTURES: List[TracesFixture] = [
demo_llama_index_rag_fixture,
demo_llama_index_rag_llm_fixture,
llama_index_rag_fixture,
llama_index_rag_fixture_with_davinci,
langchain_rag_stuff_document_chain_fixture,
Expand Down
128 changes: 67 additions & 61 deletions tutorials/internal/demo_llama_index/demo_llama_index_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,8 @@
},
"outputs": [],
"source": [
"import phoenix as px"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import phoenix as px\n",
"\n",
"session = px.launch_app()"
]
},
Expand Down Expand Up @@ -154,7 +147,9 @@
"id": "PWDCQJN1co7-"
},
"source": [
"Enable Phoenix tracing via `LlamaIndexInstrumentor`. Phoenix uses OpenInference traces - an open-source standard for capturing and storing LLM application traces that enables LLM applications to seamlessly integrate with LLM observability solutions such as Phoenix."
"Enable Phoenix tracing via `LlamaIndexInstrumentor`. \n",
"\n",
"Phoenix uses OpenInference traces - an open-source standard for capturing and storing LLM application traces that enables LLM applications to seamlessly integrate with LLM observability solutions such as Phoenix."
]
},
{
Expand Down Expand Up @@ -318,22 +313,7 @@
},
"outputs": [],
"source": [
"questions_df = pd.read_parquet(\"fixtures/demo_llama_index_rag_questions.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 424
},
"id": "qKpbdBFwsKuC",
"outputId": "cd5feb82-1643-440b-dd21-9294cdb09394"
},
"outputs": [],
"source": [
"questions_df = pd.read_parquet(\"demo_llama_index_rag_questions.parquet\")\n",
"questions_df"
]
},
Expand Down Expand Up @@ -460,14 +440,22 @@
"eval_model = OpenAIModel(model=\"gpt-4\")\n",
"relevance_evaluator = RelevanceEvaluator(eval_model)\n",
"hallucination_evaluator = HallucinationEvaluator(eval_model)\n",
"qa_evaluator = QAEvaluator(eval_model)\n",
"\n",
"qa_evaluator = QAEvaluator(eval_model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"retrieved_documents_relevance_df = run_evals(\n",
" evaluators=[relevance_evaluator],\n",
" dataframe=retrieved_documents_df,\n",
" provide_explanation=True,\n",
" concurrency=20,\n",
")[0]"
")[0]\n",
"retrieved_documents_relevance_df"
]
},
{
Expand Down Expand Up @@ -501,42 +489,17 @@
" evaluators=[hallucination_evaluator, qa_evaluator],\n",
" provide_explanation=True,\n",
" concurrency=20,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 455
},
"id": "RaZwCSFico8I",
"outputId": "14ef12b7-af63-4ab1-ec14-c9b7d198ed54"
},
"outputs": [],
"source": [
"retrieved_documents_relevance_df = retrieved_documents_relevance_df.reset_index().set_index(\n",
" \"context.span_id\"\n",
")\n",
"retrieved_documents_relevance_df"
"hallucination_eval_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 238
},
"id": "cCPWGtd0zPIl",
"outputId": "cc6e820f-7b58-4f84-dd9e-2a926f48b163"
},
"metadata": {},
"outputs": [],
"source": [
"hallucination_eval_df.head()"
"qa_eval_df"
]
},
{
Expand All @@ -556,12 +519,14 @@
},
"outputs": [],
"source": [
"from phoenix.trace import SpanEvaluations\n",
"from phoenix.trace import DocumentEvaluations, SpanEvaluations\n",
"\n",
"px.Client().log_evaluations(\n",
" SpanEvaluations(eval_name=\"Hallucination\", dataframe=hallucination_eval_df),\n",
" SpanEvaluations(eval_name=\"QA Correctness\", dataframe=qa_eval_df),\n",
" SpanEvaluations(eval_name=\"Retrieval Relevance\", dataframe=retrieved_documents_relevance_df),\n",
" DocumentEvaluations(\n",
" eval_name=\"Retrieval Relevance\", dataframe=retrieved_documents_relevance_df\n",
" ),\n",
")"
]
},
Expand Down Expand Up @@ -590,6 +555,13 @@
"## Save the Trace and Evals"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"All spans and evals"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -606,7 +578,41 @@
"os.makedirs(directory, exist_ok=True)\n",
"\n",
"# Save the Trace Dataset\n",
"trace_id = px.Client().get_trace_dataset().save(directory=directory)"
"# WARNING: limit should be set to None to get all spans but `get_trace_dataset` may timeout\n",
"trace_id = px.Client().get_trace_dataset(limit=None).save(directory=directory)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"LLM Spans"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from phoenix.trace.dsl import SpanQuery\n",
"\n",
"llm_open_ai = px.Client().query_spans(\n",
" SpanQuery().where(\"span_kind == 'LLM' and name == 'OpenAI.chat'\")\n",
")\n",
"\n",
"llm_predict = px.Client().query_spans(\n",
" SpanQuery().where(\"span_kind == 'LLM' and name == 'LLM.predict'\")\n",
")\n",
"\n",
"all_llm = px.Client().query_spans(SpanQuery().where(\"span_kind == 'LLM'\"))\n",
"\n",
"llm_open_ai.to_parquet(\"fixtures/demo_llama_index_llm_open_ai.parquet\")\n",
"llm_open_ai.to_json(\"fixtures/demo_llama_index_llm_open_ai.json\")\n",
"llm_predict.to_parquet(\"fixtures/demo_llama_index_llm_predict.parquet\")\n",
"llm_predict.to_json(\"fixtures/demo_llama_index_llm_predict.json\")\n",
"all_llm.to_parquet(\"fixtures/demo_llama_index_llm_all_spans.parquet\")\n",
"all_llm.to_json(\"fixtures/demo_llama_index_llm_all_spans.json\")"
]
}
],
Expand All @@ -628,7 +634,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
"version": "3.12.4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
Loading