From ff36bd459f5db65a8d3ae6aec377e5efe28894ed Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 15 Feb 2024 17:10:47 -0800 Subject: [PATCH] docs: update markdownss for px.Client().log_evaluations() --- docs/how-to/define-your-schema/llm-evaluations.md | 8 ++++---- docs/llm-evals/quickstart-retrieval-evals/README.md | 4 ++-- docs/quickstart/evals.md | 4 ++-- docs/use-cases/rag-evaluation.md | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/how-to/define-your-schema/llm-evaluations.md b/docs/how-to/define-your-schema/llm-evaluations.md index c73d3cd8462..ed281f5bbb4 100644 --- a/docs/how-to/define-your-schema/llm-evaluations.md +++ b/docs/how-to/define-your-schema/llm-evaluations.md @@ -24,7 +24,7 @@ The evaluations dataframe can be sent to Phoenix as follows. Note that the name ```python from phoenix.trace import SpanEvaluations -px.log_evaluations( +px.Client().log_evaluations( SpanEvaluations( dataframe=qa_correctness_eval_df, eval_name="Q&A Correctness", @@ -43,7 +43,7 @@ The evaluations dataframe can be sent to Phoenix as follows. Note that the name ```python from phoenix.trace import DocumentEvaluations -px.log_evaluations( +px.Client().log_evaluations( DocumentEvaluations( dataframe=document_relevance_eval_df, eval_name="Relevance", @@ -53,10 +53,10 @@ px.log_evaluations( ## Logging Multiple Evaluation DataFrames -Multiple evaluation datasets can be logged by the same `px.log_evaluations()` function call. +Multiple evaluation datasets can be logged by the same `px.Client().log_evaluations()` function call. ``` -px.log_evaluations( +px.Client().log_evaluations( SpanEvaluations( dataframe=qa_correctness_eval_df, eval_name="Q&A Correctness", diff --git a/docs/llm-evals/quickstart-retrieval-evals/README.md b/docs/llm-evals/quickstart-retrieval-evals/README.md index 89ec4cfb07a..64f027c86ac 100644 --- a/docs/llm-evals/quickstart-retrieval-evals/README.md +++ b/docs/llm-evals/quickstart-retrieval-evals/README.md @@ -62,7 +62,7 @@ qa_correctness_eval["score"] = ( ).astype(int) # Logs the Evaluations back to the Phoenix User Interface (Optional) -px.log_evaluations( +px.Client().log_evaluations( SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval), SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval), ) @@ -100,7 +100,7 @@ retrieved_documents_eval["score"] = ( retrieved_documents_eval.label[~retrieved_documents_eval.label.isna()] == "relevant" ).astype(int) -px.log_evaluations(DocumentEvaluations(eval_name="Relevance", dataframe=retrieved_documents_eval)) +px.Client().log_evaluations(DocumentEvaluations(eval_name="Relevance", dataframe=retrieved_documents_eval)) ``` diff --git a/docs/quickstart/evals.md b/docs/quickstart/evals.md index 2f8d5a1d77c..4fd74d61984 100644 --- a/docs/quickstart/evals.md +++ b/docs/quickstart/evals.md @@ -117,11 +117,11 @@ Log your evaluations to your running Phoenix session. ```python from phoenix.trace import DocumentEvaluations, SpanEvaluations -px.log_evaluations( +px.Client().log_evaluations( SpanEvaluations(eval_name="Hallucination", dataframe=hallucination_eval_df), SpanEvaluations(eval_name="QA Correctness", dataframe=qa_correctness_eval_df), + DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df), ) -px.log_evaluations(DocumentEvaluations(eval_name="Relevance", dataframe=relevance_eval_df)) ``` Your evaluations should now appear as annotations on your spans in Phoenix! diff --git a/docs/use-cases/rag-evaluation.md b/docs/use-cases/rag-evaluation.md index 3e4b00df23b..80d5243e23c 100644 --- a/docs/use-cases/rag-evaluation.md +++ b/docs/use-cases/rag-evaluation.md @@ -500,7 +500,7 @@ We have now evaluated our RAG system's retrieval performance. Let's send these e ```python from phoenix.trace import DocumentEvaluations, SpanEvaluations -px.log_evaluations( +px.Client().log_evaluations( SpanEvaluations(dataframe=ndcg_at_2, eval_name="ndcg@2"), SpanEvaluations(dataframe=precision_at_2, eval_name="precision@2"), DocumentEvaluations(dataframe=retrieved_documents_relevance_df, eval_name="relevance"), @@ -578,7 +578,7 @@ Since we have evaluated our RAG system's QA performance and Hallucinations perfo ```python from phoenix.trace import SpanEvaluations -px.log_evaluations( +px.Client().log_evaluations( SpanEvaluations(dataframe=qa_correctness_eval_df, eval_name="Q&A Correctness"), SpanEvaluations(dataframe=hallucination_eval_df, eval_name="Hallucination"), )