Arize-ai
diff --git a/‎.dockerignore
+8 b/‎.dockerignore
+8
diff --git a/‎CHANGELOG.md
+27 b/‎CHANGELOG.md
+27
diff --git a/‎Dockerfile
+33 b/‎Dockerfile
+33
diff --git a/‎app/src/RelayEnvironment.ts
+2-1 b/‎app/src/RelayEnvironment.ts
+2-1
diff --git a/‎app/src/Routes.tsx
+4-1 b/‎app/src/Routes.tsx
+4-1
diff --git a/‎app/src/pages/trace/TracePage.tsx
+1-1 b/‎app/src/pages/trace/TracePage.tsx
+1-1
diff --git a/‎app/src/window.d.ts
+3 b/‎app/src/window.d.ts
+3
diff --git a/‎cspell.json
+2 b/‎cspell.json
+2
diff --git a/‎docs/api/evaluation-models.md
+38-1 b/‎docs/api/evaluation-models.md
+38-1
diff --git a/‎docs/llm-evals/running-pre-tested-evals/README.md
+1-1 b/‎docs/llm-evals/running-pre-tested-evals/README.md
+1-1
diff --git a/‎integration-tests/trace/llama_index/test_callback.py
+11-10 b/‎integration-tests/trace/llama_index/test_callback.py
+11-10
diff --git a/‎pyproject.toml
+10-3 b/‎pyproject.toml
+10-3
diff --git a/‎scripts/rag/llama_index_w_evals_and_qa.py
+1-1 b/‎scripts/rag/llama_index_w_evals_and_qa.py
+1-1
diff --git a/‎src/phoenix/__init__.py
+1-1 b/‎src/phoenix/__init__.py
+1-1
diff --git a/‎src/phoenix/config.py
+1-1 b/‎src/phoenix/config.py
+1-1
diff --git a/‎src/phoenix/core/traces.py
+7-7 b/‎src/phoenix/core/traces.py
+7-7
@@ -0,0 +1,8 @@
+docs/
+examples/
+dist/
+tests/
+integration-tests/
+scripts/
+.github
+.tours
@@ -1,5 +1,32 @@
 # Changelog
 
+## [1.2.0](https://github.com/Arize-ai/phoenix/compare/v1.1.1...v1.2.0) (2023-11-17)
+
+
+### Features
+
+* Add dockerfile ([#1761](https://github.com/Arize-ai/phoenix/issues/1761)) ([4fa8929](https://github.com/Arize-ai/phoenix/commit/4fa8929f4103e9961a8df0eb059b8df149ed648f))
+* **evals:** return partial results when llm function is interrupted ([#1755](https://github.com/Arize-ai/phoenix/issues/1755)) ([1fb0849](https://github.com/Arize-ai/phoenix/commit/1fb0849a4e5f39c6afc90a1417300747a0bf4bf6))
+* LiteLLM model support for evals ([#1675](https://github.com/Arize-ai/phoenix/issues/1675)) ([5f2a999](https://github.com/Arize-ai/phoenix/commit/5f2a9991059e060423853567a20789eba832f65a))
+* sagemaker nobebook support ([#1772](https://github.com/Arize-ai/phoenix/issues/1772)) ([2c0ffbc](https://github.com/Arize-ai/phoenix/commit/2c0ffbc1479ae0255b72bc2d31d5f3204fd8e32c))
+
+
+### Bug Fixes
+
+* unpin llama-index version in tutorial notebooks ([#1766](https://github.com/Arize-ai/phoenix/issues/1766)) ([5ff74e3](https://github.com/Arize-ai/phoenix/commit/5ff74e3895f1b0c5642bd0897dd65e6f2913a7bd))
+
+
+### Documentation
+
+* add instructions for docker build ([#1770](https://github.com/Arize-ai/phoenix/issues/1770)) ([45eb5f2](https://github.com/Arize-ai/phoenix/commit/45eb5f244997d0ff0e991879c297b564e46c9a18))
+
+## [1.1.1](https://github.com/Arize-ai/phoenix/compare/v1.1.0...v1.1.1) (2023-11-16)
+
+
+### Bug Fixes
+
+* update tracer for llama-index 0.9.0 ([#1750](https://github.com/Arize-ai/phoenix/issues/1750)) ([48d0996](https://github.com/Arize-ai/phoenix/commit/48d09960855d59419edfd10925aaa895fd370a0d))
+
 ## [1.1.0](https://github.com/Arize-ai/phoenix/compare/v1.0.0...v1.1.0) (2023-11-14)
 
 
 
@@ -0,0 +1,33 @@
+# This dockerfile is provided for convenience if you wish to run
+# Phoenix in a docker container / sidecar. 
+# To use this dockerfile, you must first build the phoenix image
+# using the following command:
+# > docker build -t phoenix .
+# You can then run that image with the following command:
+# > docker run -d --name phoenix -p 6006:6006 phoenix
+# If you have a production use-case for phoenix, please get in touch!
+
+# Use an official Python runtime as a parent image
+FROM python:3.10
+
+# Install nodejs
+RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
+RUN apt-get install -y nodejs
+
+# Set the phoenix directory in the container to /phoenix
+WORKDIR /phoenix
+
+# Add the current directory contents into the container at /phoenix
+ADD . /phoenix
+
+# Install the app by building the typescript package
+RUN cd /phoenix/app && npm install && npm run build
+
+# Install any needed packages 
+RUN pip install .
+
+# Make port 6006 available to the world outside this container
+EXPOSE 6006
+
+# Run server.py when the container launches
+CMD ["python", "src/phoenix/server/main.py", "--host", "0.0.0.0", "--port", "6006", "serve"]
@@ -6,13 +6,14 @@ import {
   Store,
 } from "relay-runtime";
 
+const graphQLPath = window.Config.basename + "/graphql";
 /**
  * Relay requires developers to configure a "fetch" function that tells Relay how to load
  * the results of GraphQL queries from your server (or other data source). See more at
  * https://relay.dev/docs/en/quick-start-guide#relay-environment.
  */
 const fetchRelay: FetchFunction = async (params, variables, _cacheConfig) => {
-  const response = await fetch("/graphql", {
+  const response = await fetch(graphQLPath, {
     method: "POST",
     headers: {
       "Content-Type": "application/json",
 
@@ -65,7 +65,10 @@ const router = createBrowserRouter(
         </Route>
       </Route>
     </Route>
-  )
+  ),
+  {
+    basename: window.Config.basename,
+  }
 );
 
 export function AppRoutes() {
 
@@ -918,7 +918,7 @@ function LLMMessage({ message }: { message: AttributeMessage }) {
         backgroundColor: "indigo-100",
         borderColor: "indigo-700",
       };
-    } else if (role === "function") {
+    } else if (["function", "tool"].includes(role)) {
       return {
         backgroundColor: "yellow-100",
         borderColor: "yellow-700",
 
@@ -3,6 +3,9 @@ export {};
 declare global {
   interface Window {
     Config: {
+      // basename for the app. This can be the proxy path for
+      // Remote notebooks like SageMaker
+      basename: string;
       hasCorpus: boolean;
       UMAP: {
         minDist: number;
 
@@ -19,11 +19,13 @@
         "NDJSON",
         "numpy",
         "openai",
+        "openinference",
         "pydantic",
         "quickstart",
         "RERANKER",
         "respx",
         "rgba",
+        "tensorboard",
         "tiktoken",
         "tracedataset",
         "UMAP"
 
@@ -72,7 +72,6 @@ Here is an example of how to initialize `OpenAIModel` for Azure:
 
 ```python
 model = OpenAIModel(
-    model = OpenAIModel(
     model_name="gpt-4-32k",
     azure_endpoint="https://YOUR_SUBDOMAIN.openai.azure.com/",
     api_version="2023-03-15-preview"
@@ -210,6 +209,44 @@ model = BedrockModel(client=client_bedrock)
 
 ```
 
+
+### phoenix.experimental.evals.LiteLLMModel
+Need to install the extra dependency ``litellm>=1.0.3``
+```python
+class LiteLLMModel(BaseEvalModel):
+    model_name: str = "gpt-3.5-turbo"
+    """The model name to use."""
+    temperature: float = 0.0
+    """What sampling temperature to use."""
+    max_tokens: int = 256
+    """The maximum number of tokens to generate in the completion."""
+    top_p: float = 1
+    """Total probability mass of tokens to consider at each step."""
+    num_retries: int = 6
+    """Maximum number to retry a model if an RateLimitError, OpenAIError, or
+    ServiceUnavailableError occurs."""
+    request_timeout: int = 60
+    """Maximum number of seconds to wait when retrying."""
+    model_kwargs: Dict[str, Any] = field(default_factory=dict)
+    """Model specific params"""
+
+    # non-LiteLLM params
+    retry_min_seconds: int = 10
+    """Minimum number of seconds to wait when retrying."""
+    max_content_size: Optional[int] = None
+    """If you're using a fine-tuned model, set this to the maximum content size"""
+```
+You can choose among [multiple models](https://docs.litellm.ai/docs/providers) supported by LiteLLM. Make sure you have set the right environment variables set prior to initializing the model. For additional information about the environment variables for specific model providers visit: [LiteLLM provider specific params](https://docs.litellm.ai/docs/completion/input#provider-specific-params)
+
+Here is an example of how to initialize `LiteLLMModel` for model "gpt-3.5-turbo":
+
+```python
+model = LiteLLMModel(model_name="gpt-3.5-turbo", temperature=0.0)
+model("Hello world, this is a test if you are working?")
+# Output: 'Hello! Yes, I am here and ready to assist you. How can I help you today?'
+```
+
+
 ## **Usage**
 
 In this section, we will showcase the methods and properties that our `EvalModels` have. First, instantiate your model from the[#supported-llm-providers](evaluation-models.md#supported-llm-providers "mention"). Once you've instantiated your `model`, you can get responses from the LLM by simply calling the model and passing a text string.
 
@@ -19,7 +19,7 @@ model("What is the largest costal city in France?")
 
 We currently support a growing set of models for LLM Evals, please check out the [API section for usage](../../api/evaluation-models.md).&#x20;
 
-<table data-full-width="false"><thead><tr><th width="357">Model</th><th>Support </th></tr></thead><tbody><tr><td>GPT-4 </td><td>✔</td></tr><tr><td>GPT-3.5 Turbo</td><td>✔</td></tr><tr><td>GPT-3.5 Instruct</td><td>✔</td></tr><tr><td>Azure Hosted Open AI </td><td>✔</td></tr><tr><td>Palm 2 Vertex</td><td>✔</td></tr><tr><td>AWS Bedrock</td><td>✔</td></tr><tr><td>Litellm</td><td>(coming soon)</td></tr><tr><td>Huggingface Llama7B</td><td>(coming soon)</td></tr><tr><td>Anthropic</td><td>(coming soon)</td></tr><tr><td>Cohere</td><td>(coming soon)</td></tr></tbody></table>
+<table data-full-width="false"><thead><tr><th width="357">Model</th><th>Support </th></tr></thead><tbody><tr><td>GPT-4 </td><td>✔</td></tr><tr><td>GPT-3.5 Turbo</td><td>✔</td></tr><tr><td>GPT-3.5 Instruct</td><td>✔</td></tr><tr><td>Azure Hosted Open AI </td><td>✔</td></tr><tr><td>Palm 2 Vertex</td><td>✔</td></tr><tr><td>AWS Bedrock</td><td>✔</td></tr><tr><td>Litellm</td><td>✔</td></tr><tr><td>Huggingface Llama7B</td><td>(coming soon)</td></tr><tr><td>Anthropic</td><td>(coming soon)</td></tr><tr><td>Cohere</td><td>(coming soon)</td></tr></tbody></table>
 
 ## How we benchmark pre-tested evals&#x20;
 
 
@@ -11,7 +11,7 @@
 from llama_index.callbacks import CallbackManager
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.graph_stores.simple import SimpleGraphStore
-from llama_index.indices.postprocessor.cohere_rerank import CohereRerank
+from llama_index.indices.postprocessor import CohereRerank
 from llama_index.indices.vector_store import VectorStoreIndex
 from llama_index.llms import OpenAI
 from llama_index.query_engine import RetrieverQueryEngine
@@ -180,11 +180,9 @@ def add(a: int, b: int) -> int:
             "temperature": 0,
         }
     assert llm_spans[0].attributes[OUTPUT_MIME_TYPE] is MimeType.JSON
-    assert json.loads(llm_spans[0].attributes[OUTPUT_VALUE]) == {
-        "function_call": {
-            "name": "multiply",
-            "arguments": '{\n  "a": 2,\n  "b": 3\n}',
-        }
+    assert json.loads(llm_spans[0].attributes[OUTPUT_VALUE])["tool_calls"][0]["function"] == {
+        "name": "multiply",
+        "arguments": '{\n  "a": 2,\n  "b": 3\n}',
     }
     assert llm_spans[0].attributes[LLM_INPUT_MESSAGES] == [
         {
@@ -205,7 +203,10 @@ def add(a: int, b: int) -> int:
         },
     ]
     assert llm_spans[1].attributes[OUTPUT_MIME_TYPE] is MimeType.TEXT
-    assert llm_spans[1].attributes[OUTPUT_VALUE] == "2 multiplied by 3 equals 6."
+    assert llm_spans[1].attributes[OUTPUT_VALUE] in (
+        "2 multiplied by 3 equals 6.",
+        "2 multiplied by 3 is equal to 6.",
+    )
     assert llm_spans[1].attributes.get(LLM_INPUT_MESSAGES) == [
         {
             "message.role": "user",
@@ -218,18 +219,18 @@ def add(a: int, b: int) -> int:
             "message.function_call_name": "multiply",
         },
         {
-            "message.role": "function",
+            "message.role": "tool",
             "message.content": "6",
             "message.name": "multiply",
         },
     ]
     assert llm_spans[1].attributes[LLM_OUTPUT_MESSAGES] == [
         {
-            "message.content": "2 multiplied by 3 equals 6.",
+            "message.content": llm_spans[1].attributes[OUTPUT_VALUE],
             "message.role": "assistant",
         },
         {
-            "message.content": "2 multiplied by 3 equals 6.",
+            "message.content": llm_spans[1].attributes[OUTPUT_VALUE],
             "message.role": "assistant",
         },
     ]
 
@@ -55,12 +55,16 @@ dev = [
   "strawberry-graphql[debug-server]==0.208.2",
   "pre-commit",
   "arize[AutoEmbeddings, LLM_Evaluation]",
-  "llama-index>=0.8.64",
+  "llama-index>=0.9.0",
   "langchain>=0.0.334",
+  "litellm>=1.0.3"
 ]
 experimental = [
   "tenacity",
 ]
+llama-index = [
+  "llama-index~=0.9.0",
+]
 
 [project.urls]
 Documentation = "https://docs.arize.com/phoenix/"
@@ -92,7 +96,8 @@ dependencies = [
   "pytest-lazy-fixture",
   "arize",
   "langchain>=0.0.334",
-  "llama-index>=0.8.63.post2",
+  "litellm>=1.0.3",
+  "llama-index>=0.9.0",
   "openai>=1.0.0",
   "tenacity",
   "nltk==3.8.1",
@@ -110,13 +115,14 @@ dependencies = [
 [tool.hatch.envs.type]
 dependencies = [
   "mypy==1.5.1",
-  "llama-index>=0.8.64",
+  "llama-index>=0.9.0",
   "pandas-stubs<=2.0.2.230605",  # version 2.0.3.230814 is causing a dependency conflict.
   "types-psutil",
   "types-tqdm",
   "types-requests",
   "types-protobuf",
   "openai>=1.0.0",
+  "litellm>=1.0.3"
 ]
 
 [tool.hatch.envs.style]
@@ -270,6 +276,7 @@ module = [
   "wrapt",
   "sortedcontainers",
   "langchain.*",
+  "litellm"
 ]
 ignore_missing_imports = true
 
 
@@ -319,7 +319,7 @@ def df_evals(
         model=model,
         template=templates.RAG_RELEVANCY_PROMPT_TEMPLATE,
         rails=list(templates.RAG_RELEVANCY_PROMPT_RAILS_MAP.values()),
-        # query_column_name="question",
+        query_column_name="query",
         # document_column_name="retrieved_context_list",
     )
 
 
@@ -5,7 +5,7 @@
 from .trace.fixtures import load_example_traces
 from .trace.trace_dataset import TraceDataset
 
-__version__ = "1.1.0"
+__version__ = "1.2.0"
 
 # module level doc-string
 __doc__ = """
 
@@ -39,7 +39,7 @@ def get_running_pid() -> Optional[int]:
 # The host the server will run on after launch_app is called
 HOST = "127.0.0.1"
 # The port the server will run on after launch_app is called
-PORT = 6060
+PORT = 6006
 # The prefix of datasets that are auto-assigned a name
 GENERATED_DATASET_NAME_PREFIX = "phoenix_dataset_"
 
 
@@ -25,7 +25,7 @@
 from typing_extensions import TypeAlias
 from wrapt import ObjectProxy
 
-import phoenix.trace.v1.trace_pb2 as pb
+import phoenix.trace.v1 as pb
 from phoenix.datetime_utils import right_open_time_range
 from phoenix.trace import semantic_conventions
 from phoenix.trace.schemas import (
@@ -37,7 +37,9 @@
     SpanID,
     TraceID,
 )
-from phoenix.trace.v1 import decode, encode
+from phoenix.trace.v1.utils import decode, encode
+
+END_OF_QUEUE = None  # sentinel value for queue termination
 
 NAME = "name"
 STATUS_CODE = "status_code"
@@ -112,7 +114,7 @@ class Traces:
     def __init__(self, spans: Optional[Iterable[Span]] = None) -> None:
         self._queue: "SimpleQueue[Optional[pb.Span]]" = SimpleQueue()
         # Putting `None` as the sentinel value for queue termination.
-        weakref.finalize(self, self._queue.put, None)
+        weakref.finalize(self, self._queue.put, END_OF_QUEUE)
         for span in spans or ():
             self.put(span)
         self._lock = RLock()
@@ -224,11 +226,9 @@ def _start_consumer(self) -> None:
         ).start()
 
     def _consume_spans(self) -> None:
-        while True:
-            if not (span := self._queue.get()):
-                return
+        while (item := self._queue.get()) is not END_OF_QUEUE:
             with self._lock:
-                self._process_span(span)
+                self._process_span(item)
 
     def _process_span(self, span: pb.Span) -> None:
         span_id = UUID(bytes=span.context.span_id)
Original file line number	Diff line number	Diff line change
`@@ -319,7 +319,7 @@ def df_evals(`
`319`	`319`	`model=model,`
`320`	`320`	`template=templates.RAG_RELEVANCY_PROMPT_TEMPLATE,`
`321`	`321`	`rails=list(templates.RAG_RELEVANCY_PROMPT_RAILS_MAP.values()),`
`322`		`- # query_column_name="question",`
	`322`	`+ query_column_name="query",`
`323`	`323`	`# document_column_name="retrieved_context_list",`
`324`	`324`	`)`
`325`	`325`