use upstream's bedrock runtime extension

yiyuan-he · yiyuan-he · commit 9f6a181bfa7d · 2025-06-18T04:09:45.000Z
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
@@ -2,13 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 import abc
 import inspect
-import io
-import json
 import logging
-import math
-from typing import Any, Dict, Optional
-
-from botocore.response import StreamingBody
+from typing import Dict, Optional
 
 from amazon.opentelemetry.distro._aws_attribute_keys import (
     AWS_BEDROCK_AGENT_ID,
@@ -17,16 +12,6 @@
     AWS_BEDROCK_GUARDRAIL_ID,
     AWS_BEDROCK_KNOWLEDGE_BASE_ID,
 )
-from amazon.opentelemetry.distro._aws_span_processing_util import (
-    GEN_AI_REQUEST_MAX_TOKENS,
-    GEN_AI_REQUEST_MODEL,
-    GEN_AI_REQUEST_TEMPERATURE,
-    GEN_AI_REQUEST_TOP_P,
-    GEN_AI_RESPONSE_FINISH_REASONS,
-    GEN_AI_SYSTEM,
-    GEN_AI_USAGE_INPUT_TOKENS,
-    GEN_AI_USAGE_OUTPUT_TOKENS,
-)
 from opentelemetry.instrumentation.botocore.extensions.types import (
     _AttributeMapT,
     _AwsSdkCallContext,
@@ -245,205 +230,3 @@ def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _Bo
                 AWS_BEDROCK_GUARDRAIL_ARN,
                 guardrail_arn,
             )
-
-
-class _BedrockRuntimeExtension(_AwsSdkExtension):
-    """
-    This class is an extension for <a
-    href="https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html">
-    Amazon Bedrock Runtime</a>.
-    """
-
-    def extract_attributes(self, attributes: _AttributeMapT):
-        attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM
-
-        model_id = self._call_context.params.get(_MODEL_ID)
-        if model_id:
-            attributes[GEN_AI_REQUEST_MODEL] = model_id
-
-            # Get the request body if it exists
-            body = self._call_context.params.get("body")
-            if body:
-                try:
-                    request_body = json.loads(body)
-
-                    if "amazon.titan" in model_id:
-                        self._extract_titan_attributes(attributes, request_body)
-                    if "amazon.nova" in model_id:
-                        self._extract_nova_attributes(attributes, request_body)
-                    elif "anthropic.claude" in model_id:
-                        self._extract_claude_attributes(attributes, request_body)
-                    elif "meta.llama" in model_id:
-                        self._extract_llama_attributes(attributes, request_body)
-                    elif "cohere.command" in model_id:
-                        self._extract_cohere_attributes(attributes, request_body)
-                    elif "ai21.jamba" in model_id:
-                        self._extract_ai21_attributes(attributes, request_body)
-                    elif "mistral" in model_id:
-                        self._extract_mistral_attributes(attributes, request_body)
-
-                except json.JSONDecodeError:
-                    _logger.debug("Error: Unable to parse the body as JSON")
-
-    def _extract_titan_attributes(self, attributes, request_body):
-        config = request_body.get("textGenerationConfig", {})
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount"))
-
-    def _extract_nova_attributes(self, attributes, request_body):
-        config = request_body.get("inferenceConfig", {})
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens"))
-
-    def _extract_claude_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    def _extract_cohere_attributes(self, attributes, request_body):
-        prompt = request_body.get("message")
-        if prompt:
-            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p"))
-
-    def _extract_ai21_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    def _extract_llama_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    def _extract_mistral_attributes(self, attributes, request_body):
-        prompt = request_body.get("prompt")
-        if prompt:
-            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    @staticmethod
-    def _set_if_not_none(attributes, key, value):
-        if value is not None:
-            attributes[key] = value
-
-    # pylint: disable=too-many-branches
-    def on_success(self, span: Span, result: Dict[str, Any], instrumentor_context: _BotocoreInstrumentorContext):
-        model_id = self._call_context.params.get(_MODEL_ID)
-
-        if not model_id:
-            return
-
-        if "body" in result and isinstance(result["body"], StreamingBody):
-            original_body = None
-            try:
-                original_body = result["body"]
-                body_content = original_body.read()
-
-                # Use one stream for telemetry
-                stream = io.BytesIO(body_content)
-                telemetry_content = stream.read()
-                response_body = json.loads(telemetry_content.decode("utf-8"))
-                if "amazon.titan" in model_id:
-                    self._handle_amazon_titan_response(span, response_body)
-                if "amazon.nova" in model_id:
-                    self._handle_amazon_nova_response(span, response_body)
-                elif "anthropic.claude" in model_id:
-                    self._handle_anthropic_claude_response(span, response_body)
-                elif "meta.llama" in model_id:
-                    self._handle_meta_llama_response(span, response_body)
-                elif "cohere.command" in model_id:
-                    self._handle_cohere_command_response(span, response_body)
-                elif "ai21.jamba" in model_id:
-                    self._handle_ai21_jamba_response(span, response_body)
-                elif "mistral" in model_id:
-                    self._handle_mistral_mistral_response(span, response_body)
-                # Replenish stream for downstream application use
-                new_stream = io.BytesIO(body_content)
-                result["body"] = StreamingBody(new_stream, len(body_content))
-
-            except json.JSONDecodeError:
-                _logger.debug("Error: Unable to parse the response body as JSON")
-            except Exception as e:  # pylint: disable=broad-exception-caught, invalid-name
-                _logger.debug("Error processing response: %s", e)
-            finally:
-                if original_body is not None:
-                    original_body.close()
-
-    # pylint: disable=no-self-use
-    def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
-        if "inputTextTokenCount" in response_body:
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"])
-            if "results" in response_body and response_body["results"]:
-                result = response_body["results"][0]
-                if "tokenCount" in result:
-                    span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"])
-                if "completionReason" in result:
-                    span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_amazon_nova_response(self, span: Span, response_body: Dict[str, Any]):
-        if "usage" in response_body:
-            usage = response_body["usage"]
-            if "inputTokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"])
-            if "outputTokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"])
-        if "stopReason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
-        if "usage" in response_body:
-            usage = response_body["usage"]
-            if "input_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"])
-            if "output_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"])
-        if "stop_reason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
-        # Output tokens: Approximate from the response text
-        if "text" in response_body:
-            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6))
-        if "finish_reason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]):
-        if "usage" in response_body:
-            usage = response_body["usage"]
-            if "prompt_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"])
-            if "completion_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"])
-        if "choices" in response_body:
-            choices = response_body["choices"][0]
-            if "finish_reason" in choices:
-                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
-        if "prompt_token_count" in response_body:
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"])
-        if "generation_token_count" in response_body:
-            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"])
-        if "stop_reason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]):
-        if "outputs" in response_body:
-            outputs = response_body["outputs"][0]
-            if "text" in outputs:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6))
-        if "stop_reason" in outputs:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py
@@ -19,7 +19,6 @@
     _BedrockAgentExtension,
     _BedrockAgentRuntimeExtension,
     _BedrockExtension,
-    _BedrockRuntimeExtension,
 )
 from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS
 from opentelemetry.instrumentation.botocore.extensions.lmbd import _LambdaExtension
@@ -196,17 +195,17 @@ def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_contex
 
 
 def _apply_botocore_bedrock_patch() -> None:
-    """Botocore instrumentation patch for Bedrock, Bedrock Agent, Bedrock Runtime and Bedrock Agent Runtime
+    """Botocore instrumentation patch for Bedrock, Bedrock Agent, and Bedrock Agent Runtime
 
     This patch adds an extension to the upstream's list of known extension for Bedrock.
     Extensions allow for custom logic for adding service-specific information to spans, such as attributes.
-    Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys,
-    GEN_AI_REQUEST_MODEL and GEN_AI_SYSTEM attributes referenced in _aws_span_processing_util.
+    Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys.
+    Note: Bedrock Runtime uses the upstream extension directly.
     """
     _KNOWN_EXTENSIONS["bedrock"] = _lazy_load(".", "_BedrockExtension")
     _KNOWN_EXTENSIONS["bedrock-agent"] = _lazy_load(".", "_BedrockAgentExtension")
     _KNOWN_EXTENSIONS["bedrock-agent-runtime"] = _lazy_load(".", "_BedrockAgentRuntimeExtension")
-    _KNOWN_EXTENSIONS["bedrock-runtime"] = _lazy_load(".", "_BedrockRuntimeExtension")
+    # bedrock-runtime is handled by upstream
 
 
 # The OpenTelemetry Authors code
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py
@@ -274,17 +274,6 @@ def _test_patched_botocore_instrumentation(self):
             output_prompt=cohere_output,
         )
 
-        # BedrockRuntime - AI21 Jambda
-        self._test_patched_bedrock_runtime_invoke_model(
-            model_id="ai21.jamba-1-5-large-v1:0",
-            max_tokens=512,
-            temperature=0.5,
-            top_p=0.999,
-            finish_reason="end_turn",
-            input_tokens=23,
-            output_tokens=36,
-        )
-
         # BedrockRuntime - Mistral
         msg = "Hello World"
         mistral_input = f"<s>[INST] {msg} [/INST]"
@@ -429,7 +418,7 @@ def get_model_response_request():
                     "inferenceConfig": {
                         "max_new_tokens": max_tokens,
                         "temperature": temperature,
-                        "top_p": top_p,
+                        "topP": top_p,
                     }
                 }
 
@@ -453,22 +442,6 @@ def get_model_response_request():
                     "usage": {"input_tokens": input_tokens, "output_tokens": output_tokens},
                 }
 
-            if "ai21.jamba" in model_id:
-                request_body = {
-                    "max_tokens": max_tokens,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                }
-
-                response_body = {
-                    "choices": [{"finish_reason": finish_reason}],
-                    "usage": {
-                        "prompt_tokens": input_tokens,
-                        "completion_tokens": output_tokens,
-                        "total_tokens": (input_tokens + output_tokens),
-                    },
-                }
-
             if "meta.llama" in model_id:
                 request_body = {
                     "max_gen_len": max_tokens,
@@ -512,10 +485,10 @@ def get_model_response_request():
         request_body, response_body = get_model_response_request()
 
         bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
-            "bedrock-runtime", model_id=model_id, request_body=request_body
+            "bedrock-runtime", operation="InvokeModel", model_id=model_id, request_body=request_body
         )
         bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
-            "bedrock-runtime", model_id=model_id, streaming_body=response_body
+            "bedrock-runtime", operation="InvokeModel", model_id=model_id, streaming_body=response_body
         )
 
         bedrock_runtime_attributes.update(bedrock_runtime_success_attributes)
diff --git a/contract-tests/images/applications/botocore/botocore_server.py b/contract-tests/images/applications/botocore/botocore_server.py
@@ -435,7 +435,7 @@ def get_model_request_response(path):
             "inferenceConfig": {
                 "max_new_tokens": 800,
                 "temperature": 0.9,
-                "top_p": 0.7,
+                "topP": 0.7,
             },
         }
 
@@ -496,32 +496,6 @@ def get_model_request_response(path):
             "text": "test-generation-text",
         }
 
-    if "ai21.jamba" in path:
-        model_id = "ai21.jamba-1-5-large-v1:0"
-
-        request_body = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": prompt,
-                },
-            ],
-            "top_p": 0.8,
-            "temperature": 0.6,
-            "max_tokens": 512,
-        }
-
-        response_body = {
-            "stop_reason": "end_turn",
-            "usage": {
-                "prompt_tokens": 21,
-                "completion_tokens": 24,
-            },
-            "choices": [
-                {"finish_reason": "stop"},
-            ],
-        }
-
     if "mistral" in path:
         model_id = "mistral.mistral-7b-instruct-v0:2"
 
diff --git a/contract-tests/tests/test/amazon/botocore/botocore_test.py b/contract-tests/tests/test/amazon/botocore/botocore_test.py
diff --git a/lambda-layer/src/tests/requirements.txt b/lambda-layer/src/tests/requirements.txt