Skip to content

Commit e02823b

Browse files
authored
elastic-opentelemetry-instrumentation-openai: Add support for tracing embeddings calls (#20)
{ "name": "embeddings all-minilm:33m", "context": { "trace_id": "0x34b6f145212c169ea39a980ff670dee3", "span_id": "0x74b9bd94f2ea6f31", "trace_state": "[]" }, "kind": "SpanKind.CLIENT", "parent_id": null, "start_time": "2024-09-27T14:55:13.104397Z", "end_time": "2024-09-27T14:55:13.261547Z", "status": { "status_code": "UNSET" }, "attributes": { "gen_ai.operation.name": "embeddings", "gen_ai.request.model": "all-minilm:33m", "gen_ai.system": "openai", "server.address": "localhost", "server.port": 11434, "gen_ai.request.encoding_format": "float", "gen_ai.response.model": "all-minilm:33m", "gen_ai.usage.input_tokens": 9 }, "events": [], "links": [], "resource": { "attributes": { "telemetry.sdk.language": "python", "telemetry.sdk.name": "opentelemetry", "telemetry.sdk.version": "1.27.0", "telemetry.distro.name": "elastic", "telemetry.distro.version": "0.2.0", "process.runtime.description": "3.10.12 (main, Sep 11 2024, 15:47:36) [GCC 11.4.0]", "process.runtime.name": "cpython", "process.runtime.version": "3.10.12", "os.type": "linux", "os.version": "6.1.0-1034-oem", "telemetry.auto.version": "0.48b0", "service.name": "unknown_service" }, "schema_url": "" } }
1 parent 646ef3d commit e02823b

21 files changed

+2886
-239
lines changed

instrumentation/elastic-opentelemetry-instrumentation-openai/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ chat_completion = client.chat.completions.create(model="gpt-4o-mini", messages=m
5151
- `ELASTIC_OTEL_GENAI_CAPTURE_CONTENT` (default: `false`): when sets to `true` collect more
5252
informations about prompts and responses by enabling content capture
5353

54+
### Elastic specific semantic conventions
55+
56+
- New `embeddings` value for `gen_ai.operation.name`
57+
- New `gen_ai.request.encoding_format` attribute with openai specific values `[float, base64]`
58+
5459
## Development
5560

5661
We use [pytest](https://docs.pytest.org/en/stable/) to execute tests written with the standard

instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/__init__.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@
2828
ELASTIC_OTEL_GENAI_CAPTURE_CONTENT,
2929
)
3030
from opentelemetry.instrumentation.openai.helpers import (
31+
_get_embeddings_span_attributes_from_wrapper,
3132
_get_span_attributes_from_wrapper,
3233
_message_from_choice,
3334
_record_token_usage_metrics,
3435
_record_operation_duration_metric,
3536
_set_span_attributes_from_response,
37+
_set_embeddings_span_attributes_from_response,
3638
)
3739
from opentelemetry.instrumentation.openai.package import _instruments
3840
from opentelemetry.instrumentation.openai.version import __version__
@@ -95,6 +97,16 @@ def _patch(self, _module):
9597
"AsyncCompletions.create",
9698
self._async_chat_completion_wrapper,
9799
)
100+
wrap_function_wrapper(
101+
"openai.resources.embeddings",
102+
"Embeddings.create",
103+
self._embeddings_wrapper,
104+
)
105+
wrap_function_wrapper(
106+
"openai.resources.embeddings",
107+
"AsyncEmbeddings.create",
108+
self._async_embeddings_wrapper,
109+
)
98110

99111
def _uninstrument(self, **kwargs):
100112
# unwrap only supports uninstrementing real module references so we
@@ -103,6 +115,8 @@ def _uninstrument(self, **kwargs):
103115

104116
unwrap(openai.resources.chat.completions.Completions, "create")
105117
unwrap(openai.resources.chat.completions.AsyncCompletions, "create")
118+
unwrap(openai.resources.embeddings.Embeddings, "create")
119+
unwrap(openai.resources.embeddings.AsyncEmbeddings, "create")
106120

107121
def _chat_completion_wrapper(self, wrapped, instance, args, kwargs):
108122
logger.debug(f"openai.resources.chat.completions.Completions.create kwargs: {kwargs}")
@@ -226,3 +240,63 @@ async def _async_chat_completion_wrapper(self, wrapped, instance, args, kwargs):
226240
span.end()
227241

228242
return result
243+
244+
def _embeddings_wrapper(self, wrapped, instance, args, kwargs):
245+
span_attributes = _get_embeddings_span_attributes_from_wrapper(instance, kwargs)
246+
247+
span_name = f"{span_attributes[GEN_AI_OPERATION_NAME]} {span_attributes[GEN_AI_REQUEST_MODEL]}"
248+
with self.tracer.start_as_current_span(
249+
name=span_name,
250+
kind=SpanKind.CLIENT,
251+
attributes=span_attributes,
252+
# this is important to avoid having the span closed before ending the stream
253+
end_on_exit=False,
254+
) as span:
255+
start_time = default_timer()
256+
try:
257+
result = wrapped(*args, **kwargs)
258+
except Exception as exc:
259+
span.set_status(StatusCode.ERROR, str(exc))
260+
span.set_attribute(ERROR_TYPE, exc.__class__.__qualname__)
261+
span.end()
262+
_record_operation_duration_metric(self.operation_duration_metric, span, start_time)
263+
raise
264+
265+
_set_embeddings_span_attributes_from_response(span, result.model, result.usage)
266+
267+
_record_token_usage_metrics(self.token_usage_metric, span, result.usage)
268+
_record_operation_duration_metric(self.operation_duration_metric, span, start_time)
269+
270+
span.end()
271+
272+
return result
273+
274+
async def _async_embeddings_wrapper(self, wrapped, instance, args, kwargs):
275+
span_attributes = _get_embeddings_span_attributes_from_wrapper(instance, kwargs)
276+
277+
span_name = f"{span_attributes[GEN_AI_OPERATION_NAME]} {span_attributes[GEN_AI_REQUEST_MODEL]}"
278+
with self.tracer.start_as_current_span(
279+
name=span_name,
280+
kind=SpanKind.CLIENT,
281+
attributes=span_attributes,
282+
# this is important to avoid having the span closed before ending the stream
283+
end_on_exit=False,
284+
) as span:
285+
start_time = default_timer()
286+
try:
287+
result = await wrapped(*args, **kwargs)
288+
except Exception as exc:
289+
span.set_status(StatusCode.ERROR, str(exc))
290+
span.set_attribute(ERROR_TYPE, exc.__class__.__qualname__)
291+
span.end()
292+
_record_operation_duration_metric(self.operation_duration_metric, span, start_time)
293+
raise
294+
295+
_set_embeddings_span_attributes_from_response(span, result.model, result.usage)
296+
297+
_record_token_usage_metrics(self.token_usage_metric, span, result.usage)
298+
_record_operation_duration_metric(self.operation_duration_metric, span, start_time)
299+
300+
span.end()
301+
302+
return result

instrumentation/elastic-opentelemetry-instrumentation-openai/src/opentelemetry/instrumentation/openai/helpers.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
else:
4747
CompletionUsage = None
4848

49+
GEN_AI_REQUEST_ENCODING_FORMAT = "gen_ai.request.encoding_format"
50+
4951

5052
def _set_span_attributes_from_response(
5153
span: Span, response_id: str, model: str, choices, usage: CompletionUsage
@@ -61,6 +63,11 @@ def _set_span_attributes_from_response(
6163
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage.completion_tokens)
6264

6365

66+
def _set_embeddings_span_attributes_from_response(span: Span, model: str, usage: CompletionUsage) -> None:
67+
span.set_attribute(GEN_AI_RESPONSE_MODEL, model)
68+
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage.prompt_tokens)
69+
70+
6471
def _decode_function_arguments(arguments: str):
6572
try:
6673
return json.loads(arguments)
@@ -98,6 +105,23 @@ def _message_from_stream_choices(choices):
98105
return message
99106

100107

108+
def _attributes_from_client(client):
109+
span_attributes = {}
110+
111+
if base_url := getattr(client, "_base_url", None):
112+
if host := getattr(base_url, "host", None):
113+
span_attributes[SERVER_ADDRESS] = host
114+
if port := getattr(base_url, "port", None):
115+
span_attributes[SERVER_PORT] = port
116+
elif scheme := getattr(base_url, "scheme", None):
117+
if scheme == "http":
118+
span_attributes[SERVER_PORT] = 80
119+
elif scheme == "https":
120+
span_attributes[SERVER_PORT] = 443
121+
122+
return span_attributes
123+
124+
101125
def _get_span_attributes_from_wrapper(instance, kwargs):
102126
span_attributes = {
103127
GEN_AI_OPERATION_NAME: "chat",
@@ -106,16 +130,7 @@ def _get_span_attributes_from_wrapper(instance, kwargs):
106130
}
107131

108132
if client := getattr(instance, "_client", None):
109-
if base_url := getattr(client, "_base_url", None):
110-
if host := getattr(base_url, "host", None):
111-
span_attributes[SERVER_ADDRESS] = host
112-
if port := getattr(base_url, "port", None):
113-
span_attributes[SERVER_PORT] = port
114-
elif scheme := getattr(base_url, "scheme", None):
115-
if scheme == "http":
116-
span_attributes[SERVER_PORT] = 80
117-
elif scheme == "https":
118-
span_attributes[SERVER_PORT] = 443
133+
span_attributes.update(_attributes_from_client(client))
119134

120135
if (frequency_penalty := kwargs.get("frequency_penalty")) is not None:
121136
span_attributes[GEN_AI_REQUEST_FREQUENCY_PENALTY] = frequency_penalty
@@ -135,6 +150,22 @@ def _get_span_attributes_from_wrapper(instance, kwargs):
135150
return span_attributes
136151

137152

153+
def _get_embeddings_span_attributes_from_wrapper(instance, kwargs):
154+
span_attributes = {
155+
GEN_AI_OPERATION_NAME: "embeddings",
156+
GEN_AI_REQUEST_MODEL: kwargs["model"],
157+
GEN_AI_SYSTEM: "openai",
158+
}
159+
160+
if client := getattr(instance, "_client", None):
161+
span_attributes.update(_attributes_from_client(client))
162+
163+
if (encoding_format := kwargs.get("encoding_format")) is not None:
164+
span_attributes[GEN_AI_REQUEST_ENCODING_FORMAT] = encoding_format
165+
166+
return span_attributes
167+
168+
138169
def _get_attributes_if_set(span: Span, names: Iterable) -> dict:
139170
"""Returns a dict with any attribute found in the span attributes"""
140171
attributes = span.attributes
@@ -154,7 +185,9 @@ def _record_token_usage_metrics(metric: Histogram, span: Span, usage: Completion
154185
),
155186
)
156187
metric.record(usage.prompt_tokens, {**token_usage_metric_attrs, GEN_AI_TOKEN_TYPE: "input"})
157-
metric.record(usage.completion_tokens, {**token_usage_metric_attrs, GEN_AI_TOKEN_TYPE: "output"})
188+
# embeddings responses only have input tokens
189+
if hasattr(usage, "completion_tokens"):
190+
metric.record(usage.completion_tokens, {**token_usage_metric_attrs, GEN_AI_TOKEN_TYPE: "output"})
158191

159192

160193
def _record_operation_duration_metric(metric: Histogram, span: Span, start: float):

0 commit comments

Comments
 (0)