From 394f2cd60895c125e713187882db931d85309ce5 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Mon, 16 Dec 2024 09:04:45 -0800 Subject: [PATCH 1/3] fix: extract token counts for groq when streaming --- .../instrumentation/llama_index/_handler.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/instrumentation/openinference-instrumentation-llama-index/src/openinference/instrumentation/llama_index/_handler.py b/python/instrumentation/openinference-instrumentation-llama-index/src/openinference/instrumentation/llama_index/_handler.py index c2a031122..6fc5dc3c3 100644 --- a/python/instrumentation/openinference-instrumentation-llama-index/src/openinference/instrumentation/llama_index/_handler.py +++ b/python/instrumentation/openinference-instrumentation-llama-index/src/openinference/instrumentation/llama_index/_handler.py @@ -535,6 +535,16 @@ def _extract_token_counts(self, response: Union[ChatResponse, CompletionResponse ): for k, v in _get_token_counts(usage): self[k] = v + if ( + (raw := getattr(response, "raw", None)) + and (model_extra := getattr(raw, "model_extra", None)) + and hasattr(model_extra, "get") + and (x_groq := model_extra.get("x_groq")) + and hasattr(x_groq, "get") + and (usage := x_groq.get("usage")) + ): + for k, v in _get_token_counts(usage): + self[k] = v # Look for token counts in additional_kwargs of the completion payload # This is needed for non-OpenAI models if additional_kwargs := getattr(response, "additional_kwargs", None): From 94dea55a8030b745b4ca99ad6cafa4a21fb6331d Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Mon, 16 Dec 2024 13:29:42 -0800 Subject: [PATCH 2/3] add test --- .../pyproject.toml | 2 + .../test_groq_astream_chat_token_count.yaml | 118 ++++++++++++++++++ .../llama_index/test_token_counts.py | 43 +++++++ 3 files changed, 163 insertions(+) create mode 100644 python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/cassettes/test_groq_astream_chat_token_count.yaml create mode 100644 python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/test_token_counts.py diff --git a/python/instrumentation/openinference-instrumentation-llama-index/pyproject.toml b/python/instrumentation/openinference-instrumentation-llama-index/pyproject.toml index 3a5356b72..6bc5b3ce1 100644 --- a/python/instrumentation/openinference-instrumentation-llama-index/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-llama-index/pyproject.toml @@ -43,6 +43,8 @@ test = [ "llama-index == 0.11.0", "llama-index-core >= 0.11.0", "llama-index-llms-openai", + "llama-index-llms-groq", + "pytest-vcr", "llama-index-multi-modal-llms-openai>=0.1.7", "openinference-instrumentation-openai", "opentelemetry-sdk", diff --git a/python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/cassettes/test_groq_astream_chat_token_count.yaml b/python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/cassettes/test_groq_astream_chat_token_count.yaml new file mode 100644 index 000000000..bd5fa27b5 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/cassettes/test_groq_astream_chat_token_count.yaml @@ -0,0 +1,118 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "llama3-8b-8192", + "stream": true, "temperature": 0.1}' + headers: {} + method: POST + uri: https://api.groq.com/openai/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"x_groq":{"id":"req_01jf8nmy68fxxvgnfqz4m90h20"}} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + It"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"''s"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + nice"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + meet"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + Is"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + there"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + something"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + help"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + with"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + or"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + would"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + like"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + to"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":" + chat"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}]} + + + data: {"id":"chatcmpl-3de22103-9266-4903-ba6b-a5ce224c638a","object":"chat.completion.chunk","created":1734384187,"model":"llama3-8b-8192","system_fingerprint":"fp_179b0f92c9","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"x_groq":{"id":"req_01jf8nmy68fxxvgnfqz4m90h20","usage":{"queue_time":0.004289418,"prompt_tokens":12,"prompt_time":0.001769862,"completion_tokens":26,"completion_time":0.021666667,"total_tokens":38,"total_time":0.023436529}}} + + + data: [DONE] + + + ' + headers: {} + status: + code: 200 + message: OK +version: 1 diff --git a/python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/test_token_counts.py b/python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/test_token_counts.py new file mode 100644 index 000000000..88893cf11 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-llama-index/tests/openinference/instrumentation/llama_index/test_token_counts.py @@ -0,0 +1,43 @@ +from typing import Iterator + +import pytest +from llama_index.core.base.llms.types import ChatMessage +from llama_index.llms.groq import Groq # type: ignore[import-untyped] +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.trace import TracerProvider + +from openinference.instrumentation.llama_index import LlamaIndexInstrumentor +from openinference.semconv.trace import SpanAttributes + + +@pytest.mark.vcr( + decode_compressed_response=True, + before_record_request=lambda _: _.headers.clear() or _, + before_record_response=lambda _: {**_, "headers": {}}, +) +async def test_groq_astream_chat_token_count( + in_memory_span_exporter: InMemorySpanExporter, +) -> None: + result = await Groq(model="llama3-8b-8192").astream_chat([ChatMessage(content="Hello!")]) + async for _ in result: + pass + span = in_memory_span_exporter.get_finished_spans()[0] + assert span.attributes + assert span.attributes.get(LLM_TOKEN_COUNT_TOTAL) + assert span.attributes.get(LLM_TOKEN_COUNT_COMPLETION) + assert span.attributes.get(LLM_TOKEN_COUNT_TOTAL) + + +@pytest.fixture(autouse=True) +def instrument( + tracer_provider: TracerProvider, + in_memory_span_exporter: InMemorySpanExporter, +) -> Iterator[None]: + LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider) + yield + LlamaIndexInstrumentor().uninstrument() + + +LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION +LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT +LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL From 1cb26b9e427458e13c9ca64f58986c9ab5d9515f Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Mon, 16 Dec 2024 13:41:57 -0800 Subject: [PATCH 3/3] remove py38 --- python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index 960fda7f4..2c164e4df 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -8,7 +8,7 @@ envlist = py3{9,12}-ci-{mistralai,mistralai-latest} py3{8,12}-ci-{openai,openai-latest} py3{8,12}-ci-{vertexai,vertexai-latest} - py3{8,12}-ci-{llama_index,llama_index-latest} + py3{9,12}-ci-{llama_index,llama_index-latest} py3{9,12}-ci-{dspy,dspy-latest} py3{9,12}-ci-{langchain,langchain-latest} ; py3{9,12}-ci-{guardrails,guardrails-latest}