fix(bedrock): llama3 completion wasnt logged (#1914)

traceloop · Aug 29, 2024 · 633b4ed · 633b4ed
1 parent 5b21197
commit 633b4ed
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 4 deletions.
diff --git a/...s/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/...s/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py
@@ -421,13 +421,27 @@ def _set_llama_span_attributes(span, request_body, response_body):
 
     if should_send_prompts():
         _set_span_attribute(
-            span, f"{SpanAttributes.LLM_PROMPTS}.0.user", request_body.get("prompt")
+            span, f"{SpanAttributes.LLM_PROMPTS}.0.content", request_body.get("prompt")
         )
+        _set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user")
 
-        for i, generation in enumerate(response_body.get("generations")):
+        if response_body.get("generation"):
             _set_span_attribute(
-                span, f"{SpanAttributes.LLM_COMPLETIONS}.{i}.content", generation
+                span, f"{SpanAttributes.LLM_COMPLETIONS}.0.role", "assistant"
             )
+            _set_span_attribute(
+                span,
+                f"{SpanAttributes.LLM_COMPLETIONS}.0.content",
+                response_body.get("generation"),
+            )
+        else:
+            for i, generation in enumerate(response_body.get("generations")):
+                _set_span_attribute(
+                    span, f"{SpanAttributes.LLM_COMPLETIONS}.{i}.role", "assistant"
+                )
+                _set_span_attribute(
+                    span, f"{SpanAttributes.LLM_COMPLETIONS}.{i}.content", generation
+                )
 
 
 def _set_amazon_span_attributes(span, request_body, response_body):

diff --git a/...eta_llama3_completion_string_content.yaml → ...eta_llama2_completion_string_content.yaml b/...eta_llama3_completion_string_content.yaml → ...eta_llama2_completion_string_content.yaml
diff --git a/...emetry-instrumentation-bedrock/tests/cassettes/test_meta/test_meta_llama3_completion.yaml b/...emetry-instrumentation-bedrock/tests/cassettes/test_meta/test_meta_llama3_completion.yaml
@@ -0,0 +1,49 @@
+interactions:
+- request:
+    body: '{"prompt": "Tell me a joke about opentelemetry", "max_gen_len": 128, "temperature":
+      0.1, "top_p": 0.9}'
+    headers:
+      Content-Length:
+      - '102'
+      User-Agent:
+      - !!binary |
+        Qm90bzMvMS4zNC4xNDUgbWQvQm90b2NvcmUjMS4zNC4xNDUgdWEvMi4wIG9zL21hY29zIzIzLjYu
+        MCBtZC9hcmNoI2FybTY0IGxhbmcvcHl0aG9uIzMuMTIuMSBtZC9weWltcGwjQ1B5dGhvbiBjZmcv
+        cmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zNC4xNDU=
+      X-Amz-Date:
+      - !!binary |
+        MjAyNDA4MjlUMTg0OTAyWg==
+      amz-sdk-invocation-id:
+      - !!binary |
+        YWUxN2YyYzgtNzkxOC00ZmI5LWJkYTktZmVhYzUyNzc1MGRm
+      amz-sdk-request:
+      - !!binary |
+        YXR0ZW1wdD0x
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/meta.llama3-70b-instruct-v1%3A0/invoke
+  response:
+    body:
+      string: '{"generation":"\nWhy did the opentelemetry span go to therapy?\nBecause
+        it was feeling a little \"distributed\" and wanted to get to the \"root\"
+        of its problems!","prompt_token_count":8,"generation_token_count":35,"stop_reason":"stop"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '236'
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 29 Aug 2024 18:49:03 GMT
+      X-Amzn-Bedrock-Input-Token-Count:
+      - '8'
+      X-Amzn-Bedrock-Invocation-Latency:
+      - '715'
+      X-Amzn-Bedrock-Output-Token-Count:
+      - '35'
+      x-amzn-RequestId:
+      - 49f48fb6-28ab-4471-95e3-57e5579f3a52
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/test_meta.py b/packages/opentelemetry-instrumentation-bedrock/tests/test_meta.py
@@ -5,7 +5,7 @@
 
 
 @pytest.mark.vcr()
-def test_meta_llama3_completion_string_content(exporter, brt):
+def test_meta_llama2_completion_string_content(exporter, brt):
     model_id = "meta.llama2-13b-chat-v1"
     prompt = """<s>[INST] <<SYS>>
 You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your
@@ -42,3 +42,39 @@ def test_meta_llama3_completion_string_content(exporter, brt):
         meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
         == response_body["generation_token_count"] + response_body["prompt_token_count"]
     )
+
+
+@pytest.mark.vcr()
+def test_meta_llama3_completion(exporter, brt):
+    model_id = "meta.llama3-70b-instruct-v1:0"
+    prompt = "Tell me a joke about opentelemetry"
+    # Create request body.
+    body = json.dumps(
+        {"prompt": prompt, "max_gen_len": 128, "temperature": 0.1, "top_p": 0.9}
+    )
+
+    response = brt.invoke_model(body=body, modelId=model_id)
+
+    response_body = json.loads(response.get("body").read())
+
+    spans = exporter.get_finished_spans()
+    assert all(span.name == "bedrock.completion" for span in spans)
+
+    meta_span = spans[0]
+    assert (
+        meta_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
+        == response_body["prompt_token_count"]
+    )
+    assert (
+        meta_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS]
+        == response_body["generation_token_count"]
+    )
+    assert (
+        meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
+        == response_body["generation_token_count"] + response_body["prompt_token_count"]
+    )
+    assert meta_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"] == prompt
+    assert (
+        meta_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.0.content"]
+        == response_body["generation"]
+    )