Skip to content

Commit

Permalink
feat(bedrock): support metrics for bedrock (#1957)
Browse files Browse the repository at this point in the history
Co-authored-by: Nir Gazit <nirga@users.noreply.github.com>
  • Loading branch information
jinsongo and nirga authored Oct 2, 2024
1 parent 346d752 commit a30bb8c
Show file tree
Hide file tree
Showing 18 changed files with 392 additions and 77 deletions.

Large diffs are not rendered by default.

67 changes: 41 additions & 26 deletions packages/opentelemetry-instrumentation-bedrock/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,20 @@
import os
import pytest
import boto3

from opentelemetry import trace
from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter

pytest_plugins = []


@pytest.fixture(scope="session")
def exporter():
exporter = InMemorySpanExporter()
processor = SimpleSpanProcessor(exporter)

provider = TracerProvider()
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)

return exporter


@pytest.fixture(scope="session", autouse=True)
def instrument(exporter):
BedrockInstrumentor(enrich_token_usage=True).instrument()

yield

exporter.shutdown()
from opentelemetry import metrics
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import InMemoryMetricReader

from opentelemetry.instrumentation.bedrock import BedrockInstrumentor

@pytest.fixture(autouse=True)
def clear_exporter(exporter):
exporter.clear()
pytest_plugins = []


@pytest.fixture(autouse=True)
Expand All @@ -55,6 +36,40 @@ def brt():
)


@pytest.fixture(scope="session")
def test_context():
resource = Resource.create()
reader = InMemoryMetricReader()
metricProvider = MeterProvider(metric_readers=[reader], resource=resource)
metrics.set_meter_provider(metricProvider)

spanExporter = InMemorySpanExporter()
processor = SimpleSpanProcessor(spanExporter)
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(processor)
trace.set_tracer_provider(tracer_provider)

return spanExporter, metricProvider, reader


@pytest.fixture(scope="session", autouse=True)
def instrument(test_context):
BedrockInstrumentor(enrich_token_usage=True).instrument()

yield

exporter, provider, reader = test_context
exporter.shutdown()
reader.shutdown()
provider.shutdown()


@pytest.fixture(autouse=True)
def clear_test_context(test_context):
exporter, _, _ = test_context
exporter.clear()


@pytest.fixture(scope="module")
def vcr_config():
return {"filter_headers": ["authorization"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""unit tests."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
interactions:
- request:
body: '{"inputText": "Tell me a joke about opentelemetry", "textGenerationConfig":
{"maxTokenCount": 200, "temperature": 0.5, "topP": 0.5}}'
headers:
Accept:
- !!binary |
YXBwbGljYXRpb24vanNvbg==
Content-Length:
- '132'
Content-Type:
- !!binary |
YXBwbGljYXRpb24vanNvbg==
User-Agent:
- !!binary |
Qm90bzMvMS4zNC4xNjIgbWQvQm90b2NvcmUjMS4zNC4xNjIgdWEvMi4wIG9zL21hY29zIzIzLjYu
MCBtZC9hcmNoI2FybTY0IGxhbmcvcHl0aG9uIzMuMTEuNSBtZC9weWltcGwjQ1B5dGhvbiBjZmcv
cmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zNC4xNjI=
X-Amz-Date:
- !!binary |
MjAyNDA5MTlUMjE0NjE5Wg==
amz-sdk-invocation-id:
- !!binary |
MGVmMmNlZWUtNzA1OS00M2Y2LTk4OTUtZWUzMDdjNDFmNWI2
amz-sdk-request:
- !!binary |
YXR0ZW1wdD0x
method: POST
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke
response:
body:
string: '{"inputTextTokenCount":9,"results":[{"tokenCount":17,"outputText":"\nWhat
do you call a bear with no teeth?\nA gummy bear.","completionReason":"FINISH"}]}'
headers:
Connection:
- keep-alive
Content-Length:
- '154'
Content-Type:
- application/json
Date:
- Thu, 19 Sep 2024 21:46:20 GMT
X-Amzn-Bedrock-Input-Token-Count:
- '9'
X-Amzn-Bedrock-Invocation-Latency:
- '1155'
X-Amzn-Bedrock-Output-Token-Count:
- '17'
x-amzn-RequestId:
- 58c863f2-7a84-4bf8-8b93-1d51ca8aa150
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json

import pytest
from opentelemetry.semconv_ai import Meters, SpanAttributes


@pytest.mark.vcr
def test_invoke_model_metrics(test_context, brt):
if brt is None:
print("test_invoke_model_metrics test skipped.")
return

_, _, reader = test_context

body = json.dumps(
{
"inputText": "Tell me a joke about opentelemetry",
"textGenerationConfig": {
"maxTokenCount": 200,
"temperature": 0.5,
"topP": 0.5,
},
}
)

brt.invoke_model(
body=body,
modelId='amazon.titan-text-express-v1',
accept='application/json',
contentType='application/json'
)

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
assert len(resource_metrics) > 0

found_token_metric = False
found_duration_metric = False

for rm in resource_metrics:
for sm in rm.scope_metrics:
for metric in sm.metrics:

if metric.name == Meters.LLM_TOKEN_USAGE:
found_token_metric = True
for data_point in metric.data.data_points:
assert data_point.attributes[SpanAttributes.LLM_TOKEN_TYPE] in [
"output",
"input",
]
assert data_point.sum > 0

if metric.name == Meters.LLM_OPERATION_DURATION:
found_duration_metric = True
assert any(
data_point.count > 0 for data_point in metric.data.data_points
)
assert any(
data_point.sum > 0 for data_point in metric.data.data_points
)

assert (
metric.data.data_points[0].attributes[SpanAttributes.LLM_SYSTEM]
== "bedrock"
)

assert found_token_metric is True
assert found_duration_metric is True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""unit tests."""
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import json


@pytest.mark.vcr()
def test_ai21_j2_completion_string_content(exporter, brt):
@pytest.mark.vcr
def test_ai21_j2_completion_string_content(test_context, brt):
body = json.dumps(
{
"prompt": "Translate to spanish: 'Amazon Bedrock is the easiest way to build and"
Expand All @@ -26,6 +26,7 @@ def test_ai21_j2_completion_string_content(exporter, brt):

response_body = json.loads(response.get("body").read())

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.mark.vcr
def test_anthropic_2_completion(exporter, brt):
def test_anthropic_2_completion(test_context, brt):
body = json.dumps(
{
"prompt": "Human: Tell me a joke about opentelemetry Assistant:",
Expand All @@ -24,6 +24,7 @@ def test_anthropic_2_completion(exporter, brt):
response_body = json.loads(response.get("body").read())
completion = response_body.get("completion")

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand All @@ -48,7 +49,7 @@ def test_anthropic_2_completion(exporter, brt):


@pytest.mark.vcr
def test_anthropic_3_completion_complex_content(exporter, brt):
def test_anthropic_3_completion_complex_content(test_context, brt):
body = json.dumps(
{
"messages": [
Expand All @@ -75,6 +76,7 @@ def test_anthropic_3_completion_complex_content(exporter, brt):
response_body = json.loads(response.get("body").read())
completion = response_body.get("content")

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand Down Expand Up @@ -103,7 +105,7 @@ def test_anthropic_3_completion_complex_content(exporter, brt):


@pytest.mark.vcr
def test_anthropic_3_completion_streaming(exporter, brt):
def test_anthropic_3_completion_streaming(test_context, brt):
body = json.dumps(
{
"messages": [
Expand Down Expand Up @@ -135,6 +137,7 @@ def test_anthropic_3_completion_streaming(exporter, brt):
if "delta" in decoded_chunk:
completion += decoded_chunk.get("delta").get("text") or ""

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand Down Expand Up @@ -165,7 +168,7 @@ def test_anthropic_3_completion_streaming(exporter, brt):


@pytest.mark.vcr
def test_anthropic_3_completion_string_content(exporter, brt):
def test_anthropic_3_completion_string_content(test_context, brt):
body = json.dumps(
{
"messages": [
Expand All @@ -190,6 +193,7 @@ def test_anthropic_3_completion_string_content(exporter, brt):
response_body = json.loads(response.get("body").read())
completion = response_body.get("content")

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import json


@pytest.mark.vcr()
def test_meta_llama2_completion_string_content(exporter, brt):
@pytest.mark.vcr
def test_meta_llama2_completion_string_content(test_context, brt):
model_id = "meta.llama2-13b-chat-v1"
prompt = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your
Expand All @@ -26,6 +26,7 @@ def test_meta_llama2_completion_string_content(exporter, brt):

response_body = json.loads(response.get("body").read())

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand All @@ -44,8 +45,8 @@ def test_meta_llama2_completion_string_content(exporter, brt):
)


@pytest.mark.vcr()
def test_meta_llama3_completion(exporter, brt):
@pytest.mark.vcr
def test_meta_llama3_completion(test_context, brt):
model_id = "meta.llama3-70b-instruct-v1:0"
prompt = "Tell me a joke about opentelemetry"
# Create request body.
Expand All @@ -57,6 +58,7 @@ def test_meta_llama3_completion(exporter, brt):

response_body = json.loads(response.get("body").read())

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import json


@pytest.mark.vcr()
def test_titan_completion(exporter, brt):
@pytest.mark.vcr
def test_titan_completion(test_context, brt):
body = json.dumps(
{
"inputText": "Translate to spanish: 'Amazon Bedrock is the easiest way to build and"
Expand All @@ -28,6 +28,7 @@ def test_titan_completion(exporter, brt):

response_body = json.loads(response.get("body").read())

exporter, _, _ = test_context
spans = exporter.get_finished_spans()
assert all(span.name == "bedrock.completion" for span in spans)

Expand Down

0 comments on commit a30bb8c

Please sign in to comment.