Skip to content

Commit

Permalink
fix(bedrock): llama3 completion wasnt logged (#1914)
Browse files Browse the repository at this point in the history
  • Loading branch information
nirga authored Aug 29, 2024
1 parent 5b21197 commit 633b4ed
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -421,13 +421,27 @@ def _set_llama_span_attributes(span, request_body, response_body):

if should_send_prompts():
_set_span_attribute(
span, f"{SpanAttributes.LLM_PROMPTS}.0.user", request_body.get("prompt")
span, f"{SpanAttributes.LLM_PROMPTS}.0.content", request_body.get("prompt")
)
_set_span_attribute(span, f"{SpanAttributes.LLM_PROMPTS}.0.role", "user")

for i, generation in enumerate(response_body.get("generations")):
if response_body.get("generation"):
_set_span_attribute(
span, f"{SpanAttributes.LLM_COMPLETIONS}.{i}.content", generation
span, f"{SpanAttributes.LLM_COMPLETIONS}.0.role", "assistant"
)
_set_span_attribute(
span,
f"{SpanAttributes.LLM_COMPLETIONS}.0.content",
response_body.get("generation"),
)
else:
for i, generation in enumerate(response_body.get("generations")):
_set_span_attribute(
span, f"{SpanAttributes.LLM_COMPLETIONS}.{i}.role", "assistant"
)
_set_span_attribute(
span, f"{SpanAttributes.LLM_COMPLETIONS}.{i}.content", generation
)


def _set_amazon_span_attributes(span, request_body, response_body):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
interactions:
- request:
body: '{"prompt": "Tell me a joke about opentelemetry", "max_gen_len": 128, "temperature":
0.1, "top_p": 0.9}'
headers:
Content-Length:
- '102'
User-Agent:
- !!binary |
Qm90bzMvMS4zNC4xNDUgbWQvQm90b2NvcmUjMS4zNC4xNDUgdWEvMi4wIG9zL21hY29zIzIzLjYu
MCBtZC9hcmNoI2FybTY0IGxhbmcvcHl0aG9uIzMuMTIuMSBtZC9weWltcGwjQ1B5dGhvbiBjZmcv
cmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zNC4xNDU=
X-Amz-Date:
- !!binary |
MjAyNDA4MjlUMTg0OTAyWg==
amz-sdk-invocation-id:
- !!binary |
YWUxN2YyYzgtNzkxOC00ZmI5LWJkYTktZmVhYzUyNzc1MGRm
amz-sdk-request:
- !!binary |
YXR0ZW1wdD0x
method: POST
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/meta.llama3-70b-instruct-v1%3A0/invoke
response:
body:
string: '{"generation":"\nWhy did the opentelemetry span go to therapy?\nBecause
it was feeling a little \"distributed\" and wanted to get to the \"root\"
of its problems!","prompt_token_count":8,"generation_token_count":35,"stop_reason":"stop"}'
headers:
Connection:
- keep-alive
Content-Length:
- '236'
Content-Type:
- application/json
Date:
- Thu, 29 Aug 2024 18:49:03 GMT
X-Amzn-Bedrock-Input-Token-Count:
- '8'
X-Amzn-Bedrock-Invocation-Latency:
- '715'
X-Amzn-Bedrock-Output-Token-Count:
- '35'
x-amzn-RequestId:
- 49f48fb6-28ab-4471-95e3-57e5579f3a52
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.mark.vcr()
def test_meta_llama3_completion_string_content(exporter, brt):
def test_meta_llama2_completion_string_content(exporter, brt):
model_id = "meta.llama2-13b-chat-v1"
prompt = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your
Expand Down Expand Up @@ -42,3 +42,39 @@ def test_meta_llama3_completion_string_content(exporter, brt):
meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
== response_body["generation_token_count"] + response_body["prompt_token_count"]
)


@pytest.mark.vcr()
def test_meta_llama3_completion(exporter, brt):
model_id = "meta.llama3-70b-instruct-v1:0"
prompt = "Tell me a joke about opentelemetry"
# Create request body.
body = json.dumps(
{"prompt": prompt, "max_gen_len": 128, "temperature": 0.1, "top_p": 0.9}
)

response = brt.invoke_model(body=body, modelId=model_id)

response_body = json.loads(response.get("body").read())

spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

meta_span = spans[0]
assert (
meta_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== response_body["prompt_token_count"]
)
assert (
meta_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS]
== response_body["generation_token_count"]
)
assert (
meta_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
== response_body["generation_token_count"] + response_body["prompt_token_count"]
)
assert meta_span.attributes[f"{SpanAttributes.LLM_PROMPTS}.0.content"] == prompt
assert (
meta_span.attributes[f"{SpanAttributes.LLM_COMPLETIONS}.0.content"]
== response_body["generation"]
)

0 comments on commit 633b4ed

Please sign in to comment.