From 79ee02ada803da986c31dc51cd79ca57da7cf8b5 Mon Sep 17 00:00:00 2001 From: Sid Date: Fri, 21 Jun 2024 13:51:46 +0930 Subject: [PATCH 1/2] feat(Support Anthropic Tools): Adds Functionality for Supporting Anthropic Tools --- .../instrumentation/anthropic/__init__.py | 17 ++ .../poetry.lock | 9 +- .../pyproject.toml | 2 +- .../test_completion/test_anthropic_tools.yaml | 99 ++++++++++++ .../tests/test_completion.py | 152 ++++++++++++++++++ 5 files changed, 274 insertions(+), 5 deletions(-) create mode 100644 packages/opentelemetry-instrumentation-anthropic/tests/cassettes/test_completion/test_anthropic_tools.yaml diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py index f937b7568..25b83b560 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py @@ -134,6 +134,23 @@ def _set_input_attributes(span, kwargs): set_span_attribute( span, f"{SpanAttributes.LLM_PROMPTS}.{i}.role", message.get("role") ) + if kwargs.get("tools"): + for i, tool in enumerate(kwargs.get("tools")): + set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.name", + tool.get("name") + ) + set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.description", + tool.get("description") + ) + set_span_attribute( + span, + f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}.input_schema", + json.dumps(tool.get("input_schema")) + ) def _set_span_completions(span, response): diff --git a/packages/opentelemetry-instrumentation-anthropic/poetry.lock b/packages/opentelemetry-instrumentation-anthropic/poetry.lock index ff0e69bfb..556d1c92e 100644 --- a/packages/opentelemetry-instrumentation-anthropic/poetry.lock +++ b/packages/opentelemetry-instrumentation-anthropic/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -13,12 +13,13 @@ files = [ [[package]] name = "anthropic" -version = "0.28.0" +version = "0.29.0" description = "The official Python library for the anthropic API" optional = false python-versions = ">=3.7" files = [ - {file = "anthropic-0.28.0-py3-none-any.whl", hash = "sha256:2b620b21aee3d20c5d8005483c34df239d53ae895687113b26b8a36892a7e20f"}, + {file = "anthropic-0.29.0-py3-none-any.whl", hash = "sha256:d16010715129c8bc3295b74fbf4da73cfb156618bf0abb2d007255983266b76a"}, + {file = "anthropic-0.29.0.tar.gz", hash = "sha256:3eb558a232d83bdf7cdedb75663bf7ff7a8b50cc10acaa9ce6494ff295b8506a"}, ] [package.dependencies] @@ -1450,4 +1451,4 @@ instruments = [] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "719efa2529138819d725a9966278e4c9f08998cb4e78dd072640751c889b46e6" +content-hash = "fa67b3464c43d1dc7a505e8eb89872e707d7689ca3802aacdad6591de8a5d58a" diff --git a/packages/opentelemetry-instrumentation-anthropic/pyproject.toml b/packages/opentelemetry-instrumentation-anthropic/pyproject.toml index 933b61e8d..bc467ba05 100644 --- a/packages/opentelemetry-instrumentation-anthropic/pyproject.toml +++ b/packages/opentelemetry-instrumentation-anthropic/pyproject.toml @@ -36,7 +36,7 @@ pytest = "^8.2.2" pytest-sugar = "1.0.0" [tool.poetry.group.test.dependencies] -anthropic = ">=0.21.3,<0.29.0" +anthropic = ">=0.27.0" pytest = "^8.2.2" pytest-sugar = "1.0.0" vcrpy = "^6.0.1" diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/cassettes/test_completion/test_anthropic_tools.yaml b/packages/opentelemetry-instrumentation-anthropic/tests/cassettes/test_completion/test_anthropic_tools.yaml new file mode 100644 index 000000000..ad5f22fb6 --- /dev/null +++ b/packages/opentelemetry-instrumentation-anthropic/tests/cassettes/test_completion/test_anthropic_tools.yaml @@ -0,0 +1,99 @@ +interactions: +- request: + body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is + the weather like right now in New York? Also what time is it there?"}], "model": + "claude-3-opus-20240229", "tools": [{"name": "get_weather", "description": "Get + the current weather in a given location", "input_schema": {"type": "object", + "properties": {"location": {"type": "string", "description": "The city and state, + e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], + "description": "The unit of temperature, either ''celsius'' or ''fahrenheit''"}}, + "required": ["location"]}}, {"name": "get_time", "description": "Get the current + time in a given time zone", "input_schema": {"type": "object", "properties": + {"timezone": {"type": "string", "description": "The IANA time zone name, e.g. + America/Los_Angeles"}}, "required": ["timezone"]}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '845' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.29.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 0.29.0 + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.14 + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA4RTYW/TMBD9K6f7wpe0awPbWDQhVTAQaKoQVJsGQZGb3BrT5JzZ54Wu6n9HdklX + ISQ+xbk7v3vv6XmLusIMW7cqJtN3sujO3OPVslf3b282Z0+fp1/mS0xQNh2FKXJOrQgTtKYJBeWc + dqJYMMHWVNRghmWjfEWjlyPTeTdKJ+mrSZpeYIKlYSEWzL5vB0ChX+Fq/GR4KbXmtebVm5wXBhS7 + nixIrR08eLKbBHqCXjcNMFEFYsA7AukNiDGNy3LOeTqGFUnRk5KaLIxgYUIBpCYovbXEAkNTM8yp + hztj12NYhDWWHry25OJ4jo0plWjDOUKnrGpJyCbQ17qs44R3ZKFWDjprHnVFFSgHOQ6gOQbYAORZ + yzEIaAemC9CqGQfa6Z626Jb+zTl2/ks4TD0ZpuNdew6RaqUrYCNQaUulNJuBd/Q4gaUP3kCpGDTf + B5YCS+WoAsNxweDHXxqHc1Cl95NXyglZhkWg/c0wDbYFt8LAx9l8thcV+AKrNvCftWR1qU7m1Bd/ + 0IM7X03gVatHineZyhBEu3kW6UIaWrUmWBqpYx6gVE0T64cg7W14EePkghA3zvny5BA73CXP0TSm + KbwLYY8vJPz7YjL9cHF+a+zPm0/96+v3d9e3s2mVnoUQBwmY4VH6wlXuvGC2PUQJs4N1uNv9SNCJ + 6QpLysXe0dbYcPTgiUvCjH3TJOjj+8u2e+BCzJrYYXZ+miZovBzX0snpbvcbAAD//wMAiN3y7N8D + AAA= + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 89712d1918f04631-SIN + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Fri, 21 Jun 2024 04:15:22 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + anthropic-ratelimit-requests-limit: + - '50' + anthropic-ratelimit-requests-remaining: + - '50' + anthropic-ratelimit-requests-reset: + - '2024-06-21T04:16:18Z' + anthropic-ratelimit-tokens-limit: + - '20000' + anthropic-ratelimit-tokens-remaining: + - '20000' + anthropic-ratelimit-tokens-reset: + - '2024-06-21T04:16:18Z' + request-id: + - req_01X7p5rhys5xac6P9AmQiGs4 + via: + - 1.1 google + x-cloud-trace-context: + - 791e086e0002047f61d7da1c157754d9 + status: + code: 200 + message: OK +version: 1 diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py index 1196f8619..584102695 100644 --- a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py +++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py @@ -570,3 +570,155 @@ async def test_async_anthropic_message_streaming(exporter, reader): assert found_token_metric is True assert found_choice_metric is True assert found_duration_metric is True + + +@pytest.mark.vcr +def test_anthropic_tools(exporter, reader): + client = Anthropic() + response = client.messages.create( + model="claude-3-opus-20240229", + max_tokens=1024, + tools=[ + { + "name": "get_weather", + "description": "Get the current weather in a given location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The unit of temperature, either 'celsius' or 'fahrenheit'" + } + }, + "required": ["location"] + } + }, + { + "name": "get_time", + "description": "Get the current time in a given time zone", + "input_schema": { + "type": "object", + "properties": { + "timezone": { + "type": "string", + "description": "The IANA time zone name, e.g. America/Los_Angeles" + } + }, + "required": ["timezone"] + } + } + ], + messages=[ + { + "role": "user", + "content": "What is the weather like right now in New York? Also what time is it there?" + } + ] + ) + try: + client.messages.create( + unknown_parameter="unknown", + ) + except Exception: + pass + + spans = exporter.get_finished_spans() + assert all(span.name == "anthropic.chat" for span in spans) + + anthropic_span = spans[0] + + assert ( + anthropic_span.attributes["gen_ai.prompt.0.content"] == + "What is the weather like right now in New York? Also what time is it there?" + ) + + assert (anthropic_span.attributes["gen_ai.prompt.0.role"]) == "user" + assert (anthropic_span.attributes.get("gen_ai.completion.0.content") == response.content[0].text) + + assert anthropic_span.attributes["gen_ai.usage.prompt_tokens"] == 18 + assert ( + anthropic_span.attributes["gen_ai.usage.completion_tokens"] + + anthropic_span.attributes["gen_ai.usage.prompt_tokens"] + == anthropic_span.attributes["llm.usage.total_tokens"] + ) + + assert ( + anthropic_span.attributes["llm.request.functions.0.name"] == "get_weather" + ) + assert ( + anthropic_span.attributes["llm.request.functions.0.description"] + == "Get the current weather in a given location" + ) + + assert (anthropic_span.attributes["llm.request.functions.1.name"]) == "get_time" + assert ( + anthropic_span.attributes["llm.request.functions.1.description"] + == "Get the current time in a given time zone" + ) + + assert (anthropic_span.attributes["gen_ai.completion.0.finish_reason"]) == "tool_use" + + metrics_data = reader.get_metrics_data() + resource_metrics = metrics_data.resource_metrics + assert len(resource_metrics) > 0 + + found_token_metric = False + found_choice_metric = False + found_duration_metric = False + found_exception_metric = False + + for rm in resource_metrics: + for sm in rm.scope_metrics: + for metric in sm.metrics: + if metric.name == "gen_ai.client.token.usage": + found_token_metric = True + for data_point in metric.data.data_points: + assert data_point.attributes["gen_ai.token.type"] in [ + "input", + "output", + ] + assert ( + data_point.attributes["gen_ai.response.model"] + == "claude-3-opus-20240229" + ) + assert data_point.sum > 0 + + if metric.name == "gen_ai.client.generation.choices": + found_choice_metric = True + for data_point in metric.data.data_points: + assert data_point.value >= 1 + assert ( + data_point.attributes["gen_ai.response.model"] + == "claude-3-opus-20240229" + ) + + if metric.name == "gen_ai.client.operation.duration": + found_duration_metric = True + assert any( + data_point.count > 0 for data_point in metric.data.data_points + ) + assert any( + data_point.sum > 0 for data_point in metric.data.data_points + ) + assert all( + data_point.attributes.get("gen_ai.response.model") + == "claude-3-opus-20240229" + or data_point.attributes.get("error.type") == "TypeError" + for data_point in metric.data.data_points + ) + + if metric.name == "llm.anthropic.completion.exceptions": + found_exception_metric = True + for data_point in metric.data.data_points: + assert data_point.value == 1 + assert data_point.attributes["error.type"] == "TypeError" + + assert found_token_metric is True + assert found_choice_metric is True + assert found_duration_metric is True + assert found_exception_metric is True From 1466086492dfdde2852c41e93f8b6d02e537951d Mon Sep 17 00:00:00 2001 From: Sid Date: Sun, 23 Jun 2024 14:30:51 +0930 Subject: [PATCH 2/2] Adds spans to response attributes --- .../instrumentation/anthropic/__init__.py | 33 +++++++++++++++++++ .../tests/test_completion.py | 7 ++++ 2 files changed, 40 insertions(+) diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py index 25b83b560..1521255c1 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py @@ -337,6 +337,39 @@ def _set_response_attributes(span, response): prompt_tokens + completion_tokens, ) + if response.get("role"): + set_span_attribute(span, f"{SpanAttributes.LLM_COMPLETIONS}.role", response.get("role")) + + if response.get("stop_reason"): + set_span_attribute(span, f"{SpanAttributes.LLM_COMPLETIONS}.stop_reason", response.get("stop_reason")) + + if response.get("content"): + for i, content in enumerate(response.get("content")): + if dict(content).get('id') is not None: + set_span_attribute( + span, + f"{SpanAttributes.LLM_COMPLETIONS}.{i}.id", + dict(content).get('id'), + ) + if dict(content).get('type') is not None: + set_span_attribute( + span, + f"{SpanAttributes.LLM_COMPLETIONS}.{i}.type", + dict(content).get('type'), + ) + if dict(content).get('input') is not None: + set_span_attribute( + span, + f"{SpanAttributes.LLM_COMPLETIONS}.{i}.input", + json.dumps(dict(content).get('input')), + ) + if dict(content).get('name') is not None: + set_span_attribute( + span, + f"{SpanAttributes.LLM_COMPLETIONS}.{i}.name", + dict(content).get('name'), + ) + if should_send_prompts(): _set_span_completions(span, response) diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py index 584102695..a07a38627 100644 --- a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py +++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py @@ -663,6 +663,13 @@ def test_anthropic_tools(exporter, reader): assert (anthropic_span.attributes["gen_ai.completion.0.finish_reason"]) == "tool_use" + assert (anthropic_span.attributes["gen_ai.completion.role"]) == "assistant" + assert (anthropic_span.attributes["gen_ai.completion.0.type"]) == "text" + assert (anthropic_span.attributes["gen_ai.completion.1.type"]) == "tool_use" + assert (anthropic_span.attributes["gen_ai.completion.1.id"]) == "toolu_01G97WorjVJw8LFYLWA1d26t" + assert (anthropic_span.attributes["gen_ai.completion.1.name"]) == "get_weather" + assert (anthropic_span.attributes["gen_ai.completion.1.input"]) == json.dumps({"location": "New York"}) + metrics_data = reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics assert len(resource_metrics) > 0