diff --git a/src/phoenix/trace/fixtures.py b/src/phoenix/trace/fixtures.py index 34632f4f31..fc17e2a417 100644 --- a/src/phoenix/trace/fixtures.py +++ b/src/phoenix/trace/fixtures.py @@ -85,7 +85,7 @@ class TracesFixture: demo_llama_index_rag_fixture = TracesFixture( name="demo_llama_index_rag", description="Traces and evaluations of a RAG chatbot using LlamaIndex.", - file_name="demo_llama_index_rag.parquet", + file_name="demo_llama_index_rag_traces.parquet", evaluation_fixtures=( EvaluationFixture( evaluation_name="Q&A Correctness", @@ -102,6 +102,12 @@ class TracesFixture: ), ) +demo_llama_index_rag_llm_fixture = TracesFixture( + name="demo_llama_index_rag_llm", + description="LLM traces for RAG chatbot using LlamaIndex.", + file_name="demo_llama_index_llm_all_spans.parquet", +) + llama_index_rag_fixture = TracesFixture( name="llama_index_rag", description="Traces from running the llama_index on a RAG use case.", @@ -196,6 +202,7 @@ class TracesFixture: TRACES_FIXTURES: List[TracesFixture] = [ demo_llama_index_rag_fixture, + demo_llama_index_rag_llm_fixture, llama_index_rag_fixture, llama_index_rag_fixture_with_davinci, langchain_rag_stuff_document_chain_fixture, diff --git a/tutorials/internal/demo_llama_index/demo_llama_index_rag.ipynb b/tutorials/internal/demo_llama_index/demo_llama_index_rag.ipynb index 96e9bcb9fe..35bf475965 100644 --- a/tutorials/internal/demo_llama_index/demo_llama_index_rag.ipynb +++ b/tutorials/internal/demo_llama_index/demo_llama_index_rag.ipynb @@ -77,15 +77,8 @@ }, "outputs": [], "source": [ - "import phoenix as px" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "import phoenix as px\n", + "\n", "session = px.launch_app()" ] }, @@ -154,7 +147,9 @@ "id": "PWDCQJN1co7-" }, "source": [ - "Enable Phoenix tracing via `LlamaIndexInstrumentor`. Phoenix uses OpenInference traces - an open-source standard for capturing and storing LLM application traces that enables LLM applications to seamlessly integrate with LLM observability solutions such as Phoenix." + "Enable Phoenix tracing via `LlamaIndexInstrumentor`. \n", + "\n", + "Phoenix uses OpenInference traces - an open-source standard for capturing and storing LLM application traces that enables LLM applications to seamlessly integrate with LLM observability solutions such as Phoenix." ] }, { @@ -318,22 +313,7 @@ }, "outputs": [], "source": [ - "questions_df = pd.read_parquet(\"fixtures/demo_llama_index_rag_questions.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "qKpbdBFwsKuC", - "outputId": "cd5feb82-1643-440b-dd21-9294cdb09394" - }, - "outputs": [], - "source": [ + "questions_df = pd.read_parquet(\"demo_llama_index_rag_questions.parquet\")\n", "questions_df" ] }, @@ -460,14 +440,22 @@ "eval_model = OpenAIModel(model=\"gpt-4\")\n", "relevance_evaluator = RelevanceEvaluator(eval_model)\n", "hallucination_evaluator = HallucinationEvaluator(eval_model)\n", - "qa_evaluator = QAEvaluator(eval_model)\n", - "\n", + "qa_evaluator = QAEvaluator(eval_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "retrieved_documents_relevance_df = run_evals(\n", " evaluators=[relevance_evaluator],\n", " dataframe=retrieved_documents_df,\n", " provide_explanation=True,\n", " concurrency=20,\n", - ")[0]" + ")[0]\n", + "retrieved_documents_relevance_df" ] }, { @@ -501,42 +489,17 @@ " evaluators=[hallucination_evaluator, qa_evaluator],\n", " provide_explanation=True,\n", " concurrency=20,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 455 - }, - "id": "RaZwCSFico8I", - "outputId": "14ef12b7-af63-4ab1-ec14-c9b7d198ed54" - }, - "outputs": [], - "source": [ - "retrieved_documents_relevance_df = retrieved_documents_relevance_df.reset_index().set_index(\n", - " \"context.span_id\"\n", ")\n", - "retrieved_documents_relevance_df" + "hallucination_eval_df" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 238 - }, - "id": "cCPWGtd0zPIl", - "outputId": "cc6e820f-7b58-4f84-dd9e-2a926f48b163" - }, + "metadata": {}, "outputs": [], "source": [ - "hallucination_eval_df.head()" + "qa_eval_df" ] }, { @@ -556,12 +519,14 @@ }, "outputs": [], "source": [ - "from phoenix.trace import SpanEvaluations\n", + "from phoenix.trace import DocumentEvaluations, SpanEvaluations\n", "\n", "px.Client().log_evaluations(\n", " SpanEvaluations(eval_name=\"Hallucination\", dataframe=hallucination_eval_df),\n", " SpanEvaluations(eval_name=\"QA Correctness\", dataframe=qa_eval_df),\n", - " SpanEvaluations(eval_name=\"Retrieval Relevance\", dataframe=retrieved_documents_relevance_df),\n", + " DocumentEvaluations(\n", + " eval_name=\"Retrieval Relevance\", dataframe=retrieved_documents_relevance_df\n", + " ),\n", ")" ] }, @@ -590,6 +555,13 @@ "## Save the Trace and Evals" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All spans and evals" + ] + }, { "cell_type": "code", "execution_count": null, @@ -606,7 +578,41 @@ "os.makedirs(directory, exist_ok=True)\n", "\n", "# Save the Trace Dataset\n", - "trace_id = px.Client().get_trace_dataset().save(directory=directory)" + "# WARNING: limit should be set to None to get all spans but `get_trace_dataset` may timeout\n", + "trace_id = px.Client().get_trace_dataset(limit=None).save(directory=directory)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LLM Spans" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from phoenix.trace.dsl import SpanQuery\n", + "\n", + "llm_open_ai = px.Client().query_spans(\n", + " SpanQuery().where(\"span_kind == 'LLM' and name == 'OpenAI.chat'\")\n", + ")\n", + "\n", + "llm_predict = px.Client().query_spans(\n", + " SpanQuery().where(\"span_kind == 'LLM' and name == 'LLM.predict'\")\n", + ")\n", + "\n", + "all_llm = px.Client().query_spans(SpanQuery().where(\"span_kind == 'LLM'\"))\n", + "\n", + "llm_open_ai.to_parquet(\"fixtures/demo_llama_index_llm_open_ai.parquet\")\n", + "llm_open_ai.to_json(\"fixtures/demo_llama_index_llm_open_ai.json\")\n", + "llm_predict.to_parquet(\"fixtures/demo_llama_index_llm_predict.parquet\")\n", + "llm_predict.to_json(\"fixtures/demo_llama_index_llm_predict.json\")\n", + "all_llm.to_parquet(\"fixtures/demo_llama_index_llm_all_spans.parquet\")\n", + "all_llm.to_json(\"fixtures/demo_llama_index_llm_all_spans.json\")" ] } ], @@ -628,7 +634,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.12.4" }, "widgets": { "application/vnd.jupyter.widget-state+json": {