Skip to content

Commit 9f6a181

Browse files
committed
use upstream's bedrock runtime extension
1 parent 8081d7d commit 9f6a181

File tree

6 files changed

+18
-316
lines changed

6 files changed

+18
-316
lines changed

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py

Lines changed: 1 addition & 218 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0
33
import abc
44
import inspect
5-
import io
6-
import json
75
import logging
8-
import math
9-
from typing import Any, Dict, Optional
10-
11-
from botocore.response import StreamingBody
6+
from typing import Dict, Optional
127

138
from amazon.opentelemetry.distro._aws_attribute_keys import (
149
AWS_BEDROCK_AGENT_ID,
@@ -17,16 +12,6 @@
1712
AWS_BEDROCK_GUARDRAIL_ID,
1813
AWS_BEDROCK_KNOWLEDGE_BASE_ID,
1914
)
20-
from amazon.opentelemetry.distro._aws_span_processing_util import (
21-
GEN_AI_REQUEST_MAX_TOKENS,
22-
GEN_AI_REQUEST_MODEL,
23-
GEN_AI_REQUEST_TEMPERATURE,
24-
GEN_AI_REQUEST_TOP_P,
25-
GEN_AI_RESPONSE_FINISH_REASONS,
26-
GEN_AI_SYSTEM,
27-
GEN_AI_USAGE_INPUT_TOKENS,
28-
GEN_AI_USAGE_OUTPUT_TOKENS,
29-
)
3015
from opentelemetry.instrumentation.botocore.extensions.types import (
3116
_AttributeMapT,
3217
_AwsSdkCallContext,
@@ -245,205 +230,3 @@ def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _Bo
245230
AWS_BEDROCK_GUARDRAIL_ARN,
246231
guardrail_arn,
247232
)
248-
249-
250-
class _BedrockRuntimeExtension(_AwsSdkExtension):
251-
"""
252-
This class is an extension for <a
253-
href="https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html">
254-
Amazon Bedrock Runtime</a>.
255-
"""
256-
257-
def extract_attributes(self, attributes: _AttributeMapT):
258-
attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM
259-
260-
model_id = self._call_context.params.get(_MODEL_ID)
261-
if model_id:
262-
attributes[GEN_AI_REQUEST_MODEL] = model_id
263-
264-
# Get the request body if it exists
265-
body = self._call_context.params.get("body")
266-
if body:
267-
try:
268-
request_body = json.loads(body)
269-
270-
if "amazon.titan" in model_id:
271-
self._extract_titan_attributes(attributes, request_body)
272-
if "amazon.nova" in model_id:
273-
self._extract_nova_attributes(attributes, request_body)
274-
elif "anthropic.claude" in model_id:
275-
self._extract_claude_attributes(attributes, request_body)
276-
elif "meta.llama" in model_id:
277-
self._extract_llama_attributes(attributes, request_body)
278-
elif "cohere.command" in model_id:
279-
self._extract_cohere_attributes(attributes, request_body)
280-
elif "ai21.jamba" in model_id:
281-
self._extract_ai21_attributes(attributes, request_body)
282-
elif "mistral" in model_id:
283-
self._extract_mistral_attributes(attributes, request_body)
284-
285-
except json.JSONDecodeError:
286-
_logger.debug("Error: Unable to parse the body as JSON")
287-
288-
def _extract_titan_attributes(self, attributes, request_body):
289-
config = request_body.get("textGenerationConfig", {})
290-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
291-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP"))
292-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount"))
293-
294-
def _extract_nova_attributes(self, attributes, request_body):
295-
config = request_body.get("inferenceConfig", {})
296-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
297-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p"))
298-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens"))
299-
300-
def _extract_claude_attributes(self, attributes, request_body):
301-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
302-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
303-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
304-
305-
def _extract_cohere_attributes(self, attributes, request_body):
306-
prompt = request_body.get("message")
307-
if prompt:
308-
attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
309-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
310-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
311-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p"))
312-
313-
def _extract_ai21_attributes(self, attributes, request_body):
314-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
315-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
316-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
317-
318-
def _extract_llama_attributes(self, attributes, request_body):
319-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len"))
320-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
321-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
322-
323-
def _extract_mistral_attributes(self, attributes, request_body):
324-
prompt = request_body.get("prompt")
325-
if prompt:
326-
attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
327-
self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
328-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
329-
self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
330-
331-
@staticmethod
332-
def _set_if_not_none(attributes, key, value):
333-
if value is not None:
334-
attributes[key] = value
335-
336-
# pylint: disable=too-many-branches
337-
def on_success(self, span: Span, result: Dict[str, Any], instrumentor_context: _BotocoreInstrumentorContext):
338-
model_id = self._call_context.params.get(_MODEL_ID)
339-
340-
if not model_id:
341-
return
342-
343-
if "body" in result and isinstance(result["body"], StreamingBody):
344-
original_body = None
345-
try:
346-
original_body = result["body"]
347-
body_content = original_body.read()
348-
349-
# Use one stream for telemetry
350-
stream = io.BytesIO(body_content)
351-
telemetry_content = stream.read()
352-
response_body = json.loads(telemetry_content.decode("utf-8"))
353-
if "amazon.titan" in model_id:
354-
self._handle_amazon_titan_response(span, response_body)
355-
if "amazon.nova" in model_id:
356-
self._handle_amazon_nova_response(span, response_body)
357-
elif "anthropic.claude" in model_id:
358-
self._handle_anthropic_claude_response(span, response_body)
359-
elif "meta.llama" in model_id:
360-
self._handle_meta_llama_response(span, response_body)
361-
elif "cohere.command" in model_id:
362-
self._handle_cohere_command_response(span, response_body)
363-
elif "ai21.jamba" in model_id:
364-
self._handle_ai21_jamba_response(span, response_body)
365-
elif "mistral" in model_id:
366-
self._handle_mistral_mistral_response(span, response_body)
367-
# Replenish stream for downstream application use
368-
new_stream = io.BytesIO(body_content)
369-
result["body"] = StreamingBody(new_stream, len(body_content))
370-
371-
except json.JSONDecodeError:
372-
_logger.debug("Error: Unable to parse the response body as JSON")
373-
except Exception as e: # pylint: disable=broad-exception-caught, invalid-name
374-
_logger.debug("Error processing response: %s", e)
375-
finally:
376-
if original_body is not None:
377-
original_body.close()
378-
379-
# pylint: disable=no-self-use
380-
def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
381-
if "inputTextTokenCount" in response_body:
382-
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"])
383-
if "results" in response_body and response_body["results"]:
384-
result = response_body["results"][0]
385-
if "tokenCount" in result:
386-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"])
387-
if "completionReason" in result:
388-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]])
389-
390-
# pylint: disable=no-self-use
391-
def _handle_amazon_nova_response(self, span: Span, response_body: Dict[str, Any]):
392-
if "usage" in response_body:
393-
usage = response_body["usage"]
394-
if "inputTokens" in usage:
395-
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"])
396-
if "outputTokens" in usage:
397-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"])
398-
if "stopReason" in response_body:
399-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]])
400-
401-
# pylint: disable=no-self-use
402-
def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
403-
if "usage" in response_body:
404-
usage = response_body["usage"]
405-
if "input_tokens" in usage:
406-
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"])
407-
if "output_tokens" in usage:
408-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"])
409-
if "stop_reason" in response_body:
410-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
411-
412-
# pylint: disable=no-self-use
413-
def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
414-
# Output tokens: Approximate from the response text
415-
if "text" in response_body:
416-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6))
417-
if "finish_reason" in response_body:
418-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]])
419-
420-
# pylint: disable=no-self-use
421-
def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]):
422-
if "usage" in response_body:
423-
usage = response_body["usage"]
424-
if "prompt_tokens" in usage:
425-
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"])
426-
if "completion_tokens" in usage:
427-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"])
428-
if "choices" in response_body:
429-
choices = response_body["choices"][0]
430-
if "finish_reason" in choices:
431-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]])
432-
433-
# pylint: disable=no-self-use
434-
def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
435-
if "prompt_token_count" in response_body:
436-
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"])
437-
if "generation_token_count" in response_body:
438-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"])
439-
if "stop_reason" in response_body:
440-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
441-
442-
# pylint: disable=no-self-use
443-
def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]):
444-
if "outputs" in response_body:
445-
outputs = response_body["outputs"][0]
446-
if "text" in outputs:
447-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6))
448-
if "stop_reason" in outputs:
449-
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])

aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
_BedrockAgentExtension,
2020
_BedrockAgentRuntimeExtension,
2121
_BedrockExtension,
22-
_BedrockRuntimeExtension,
2322
)
2423
from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS
2524
from opentelemetry.instrumentation.botocore.extensions.lmbd import _LambdaExtension
@@ -196,17 +195,17 @@ def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_contex
196195

197196

198197
def _apply_botocore_bedrock_patch() -> None:
199-
"""Botocore instrumentation patch for Bedrock, Bedrock Agent, Bedrock Runtime and Bedrock Agent Runtime
198+
"""Botocore instrumentation patch for Bedrock, Bedrock Agent, and Bedrock Agent Runtime
200199
201200
This patch adds an extension to the upstream's list of known extension for Bedrock.
202201
Extensions allow for custom logic for adding service-specific information to spans, such as attributes.
203-
Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys,
204-
GEN_AI_REQUEST_MODEL and GEN_AI_SYSTEM attributes referenced in _aws_span_processing_util.
202+
Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys.
203+
Note: Bedrock Runtime uses the upstream extension directly.
205204
"""
206205
_KNOWN_EXTENSIONS["bedrock"] = _lazy_load(".", "_BedrockExtension")
207206
_KNOWN_EXTENSIONS["bedrock-agent"] = _lazy_load(".", "_BedrockAgentExtension")
208207
_KNOWN_EXTENSIONS["bedrock-agent-runtime"] = _lazy_load(".", "_BedrockAgentRuntimeExtension")
209-
_KNOWN_EXTENSIONS["bedrock-runtime"] = _lazy_load(".", "_BedrockRuntimeExtension")
208+
# bedrock-runtime is handled by upstream
210209

211210

212211
# The OpenTelemetry Authors code

aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -274,17 +274,6 @@ def _test_patched_botocore_instrumentation(self):
274274
output_prompt=cohere_output,
275275
)
276276

277-
# BedrockRuntime - AI21 Jambda
278-
self._test_patched_bedrock_runtime_invoke_model(
279-
model_id="ai21.jamba-1-5-large-v1:0",
280-
max_tokens=512,
281-
temperature=0.5,
282-
top_p=0.999,
283-
finish_reason="end_turn",
284-
input_tokens=23,
285-
output_tokens=36,
286-
)
287-
288277
# BedrockRuntime - Mistral
289278
msg = "Hello World"
290279
mistral_input = f"<s>[INST] {msg} [/INST]"
@@ -429,7 +418,7 @@ def get_model_response_request():
429418
"inferenceConfig": {
430419
"max_new_tokens": max_tokens,
431420
"temperature": temperature,
432-
"top_p": top_p,
421+
"topP": top_p,
433422
}
434423
}
435424

@@ -453,22 +442,6 @@ def get_model_response_request():
453442
"usage": {"input_tokens": input_tokens, "output_tokens": output_tokens},
454443
}
455444

456-
if "ai21.jamba" in model_id:
457-
request_body = {
458-
"max_tokens": max_tokens,
459-
"temperature": temperature,
460-
"top_p": top_p,
461-
}
462-
463-
response_body = {
464-
"choices": [{"finish_reason": finish_reason}],
465-
"usage": {
466-
"prompt_tokens": input_tokens,
467-
"completion_tokens": output_tokens,
468-
"total_tokens": (input_tokens + output_tokens),
469-
},
470-
}
471-
472445
if "meta.llama" in model_id:
473446
request_body = {
474447
"max_gen_len": max_tokens,
@@ -512,10 +485,10 @@ def get_model_response_request():
512485
request_body, response_body = get_model_response_request()
513486

514487
bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
515-
"bedrock-runtime", model_id=model_id, request_body=request_body
488+
"bedrock-runtime", operation="InvokeModel", model_id=model_id, request_body=request_body
516489
)
517490
bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
518-
"bedrock-runtime", model_id=model_id, streaming_body=response_body
491+
"bedrock-runtime", operation="InvokeModel", model_id=model_id, streaming_body=response_body
519492
)
520493

521494
bedrock_runtime_attributes.update(bedrock_runtime_success_attributes)

contract-tests/images/applications/botocore/botocore_server.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def get_model_request_response(path):
435435
"inferenceConfig": {
436436
"max_new_tokens": 800,
437437
"temperature": 0.9,
438-
"top_p": 0.7,
438+
"topP": 0.7,
439439
},
440440
}
441441

@@ -496,32 +496,6 @@ def get_model_request_response(path):
496496
"text": "test-generation-text",
497497
}
498498

499-
if "ai21.jamba" in path:
500-
model_id = "ai21.jamba-1-5-large-v1:0"
501-
502-
request_body = {
503-
"messages": [
504-
{
505-
"role": "user",
506-
"content": prompt,
507-
},
508-
],
509-
"top_p": 0.8,
510-
"temperature": 0.6,
511-
"max_tokens": 512,
512-
}
513-
514-
response_body = {
515-
"stop_reason": "end_turn",
516-
"usage": {
517-
"prompt_tokens": 21,
518-
"completion_tokens": 24,
519-
},
520-
"choices": [
521-
{"finish_reason": "stop"},
522-
],
523-
}
524-
525499
if "mistral" in path:
526500
model_id = "mistral.mistral-7b-instruct-v0:2"
527501

0 commit comments

Comments
 (0)