fix(openai): handle async streaming responses for openai v1 client (#421

)
traceloop · Feb 13, 2024 · 5af77b5 · 5af77b5
1 parent 4dab114
commit 5af77b5
Show file tree

Hide file tree

Showing 9 changed files with 341 additions and 29 deletions.
diff --git a/...ntelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/__init__.py b/...ntelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/__init__.py
@@ -131,7 +131,9 @@ def _set_response_attributes(span, response):
 
 def is_streaming_response(response):
     if is_openai_v1():
-        return isinstance(response, openai.Stream)
+        return isinstance(response, openai.Stream) or isinstance(
+            response, openai.AsyncStream
+        )
 
     return isinstance(response, types.GeneratorType) or isinstance(
         response, types.AsyncGeneratorType

diff --git a/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py b/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py
@@ -5,10 +5,7 @@
 from opentelemetry.semconv.ai import SpanAttributes, LLMRequestTypeValues
 
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
-from opentelemetry.instrumentation.openai.utils import (
-    _with_tracer_wrapper,
-    start_as_current_span_async,
-)
+from opentelemetry.instrumentation.openai.utils import _with_tracer_wrapper
 from opentelemetry.instrumentation.openai.shared import (
     _set_request_attributes,
     _set_span_attribute,
@@ -60,22 +57,23 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
     if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
         return wrapped(*args, **kwargs)
 
-    async with start_as_current_span_async(
-        tracer=tracer,
+    span = tracer.start_span(
         name=SPAN_NAME,
         kind=SpanKind.CLIENT,
         attributes={SpanAttributes.LLM_REQUEST_TYPE: LLM_REQUEST_TYPE.value},
-    ) as span:
-        _handle_request(span, kwargs)
-        response = await wrapped(*args, **kwargs)
+    )
 
-        if is_streaming_response(response):
-            # span will be closed after the generator is done
-            return _abuild_from_streaming_response(span, response)
-        else:
-            _handle_response(response, span)
+    _handle_request(span, kwargs)
+    response = await wrapped(*args, **kwargs)
 
-        return response
+    if is_streaming_response(response):
+        # span will be closed after the generator is done
+        return _abuild_from_streaming_response(span, response)
+    else:
+        _handle_response(response, span)
+
+    span.end()
+    return response
 
 
 def _handle_request(span, kwargs):

diff --git a/packages/opentelemetry-instrumentation-openai/poetry.lock b/packages/opentelemetry-instrumentation-openai/poetry.lock
diff --git a/packages/opentelemetry-instrumentation-openai/pyproject.toml b/packages/opentelemetry-instrumentation-openai/pyproject.toml
@@ -38,6 +38,7 @@ vcrpy = "^6.0.1"
 pytest-recording = "^0.13.1"
 openai = "^1.12.0"
 opentelemetry-sdk = "^1.22.0"
+pytest-asyncio = "^0.23.5"
 
 [build-system]
 requires = [ "poetry-core" ]

diff --git a/packages/opentelemetry-instrumentation-openai/pytest.ini b/packages/opentelemetry-instrumentation-openai/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+asyncio_mode=auto
diff --git a/...emetry-instrumentation-openai/tests/cassettes/test_completions/test_async_completion.yaml b/...emetry-instrumentation-openai/tests/cassettes/test_completions/test_async_completion.yaml
@@ -0,0 +1,100 @@
+interactions:
+- request:
+    body: '{"model": "davinci-002", "prompt": "Tell me a joke about opentelemetry"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '72'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.7
+    method: POST
+    uri: https://api.openai.com/v1/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA0SQXWvCMBSG7/srDrnZjUr9WOt6M/BmQzZlTPBiDonpaRtNzylp6tzE/z5Sxd4E
+        8j55P8g5ABA6FQkIVVamP7XH98V4OVPjxXFWfXzOX54mr3/zt3Al1z+i51/zbo/KeYfDk9sqLiuD
+        TjNdsbIoHfrEYRzG02j0GMUtKDlF422pPGpSuh+Go5ulYK2wFgl8BQAA5/aEa4F3DDa0oWXjgDPY
+        S22e/X1VSDrALzeQsYU9a9KUQ1P3YM4FDWCNDxYhZyLZ1rSJmlI8iQTCu2I4ryzvfDs1xtz1TJOu
+        i61FWTP5EQYpd4Vo+SUA+G63N7XMUSS3zaKyXFZu6/iA5COn1zzR/VLHhtENOnbSdPpoEviGS/AP
+        AAD//wMAmbPZ/p0BAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 85467257bc7309bf-HFA
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 12 Feb 2024 17:10:43 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=WDbPRqWajmIz2LRcLNayFicf4OrePY9xsNEIVcsTRlE-1707757843-1-AXVjVpU5C00qoujxdsI6vodiD1n6Pj4bjOWzmUlVkehvx5r8M6SBkZ5XJMd1641THjGn0RnRRyHlkXpGMiYrs3M=;
+        path=/; expires=Mon, 12-Feb-24 17:40:43 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=OocqtFxbRBsIIEZcKeLuGSLPLHw8cg_o1z.JxH9O6HU-1707757843870-0-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - davinci-002
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '133'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '3000'
+      x-ratelimit-limit-tokens:
+      - '250000'
+      x-ratelimit-remaining-requests:
+      - '2999'
+      x-ratelimit-remaining-tokens:
+      - '249975'
+      x-ratelimit-reset-requests:
+      - 20ms
+      x-ratelimit-reset-tokens:
+      - 5ms
+      x-request-id:
+      - req_705d0395332e66293a37b416a50a98e1
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/...trumentation-openai/tests/cassettes/test_completions/test_async_completion_streaming.yaml b/...trumentation-openai/tests/cassettes/test_completions/test_async_completion_streaming.yaml
@@ -0,0 +1,155 @@
+interactions:
+- request:
+    body: '{"model": "davinci-002", "prompt": "Tell me a joke about opentelemetry",
+      "stream": true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '88'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - AsyncOpenAI/Python 1.12.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.12.0
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.7
+    method: POST
+    uri: https://api.openai.com/v1/completions
+  response:
+    body:
+      string: 'data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"...\n","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"I","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        am","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        interested","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        in","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        what","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        others","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        are","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        saying","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        about","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        op","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"ente","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"lemetry","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":",","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        because","index":0,"logprobs":null,"finish_reason":null}],"model":"davinci-002"}
+
+
+        data: {"id":"cmpl-8rvMNoHTsfPzyfH0EqJKzd675XWrh","object":"text_completion","created":1707862567,"choices":[{"text":"
+        it","index":0,"logprobs":null,"finish_reason":"length"}],"model":"davinci-002"}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8546737919b909c1-HFA
+      Cache-Control:
+      - no-cache, must-revalidate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream
+      Date:
+      - Mon, 12 Feb 2024 17:11:29 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=TjHANioIQDz1WMG719e9GXn.42QzfAnG0dH9awPNqDk-1707757889-1-AWC0wRI2j8+0fSgwORy2y5GcvOInCeVvww2j9YhYdZzz4ZkgSwwUoEAk8N1S0vGVDdIsWGCvOPUN+kyew1D8GSI=;
+        path=/; expires=Mon, 12-Feb-24 17:41:29 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=O4hUzPakmT.WjhI.Pw2hu8ilrT4EUlB7kDmt0rBPQLQ-1707757889752-0-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-model:
+      - davinci-002
+      openai-organization:
+      - traceloop
+      openai-processing-ms:
+      - '45'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=15724800; includeSubDomains
+      x-ratelimit-limit-requests:
+      - '3000'
+      x-ratelimit-limit-tokens:
+      - '250000'
+      x-ratelimit-remaining-requests:
+      - '2999'
+      x-ratelimit-remaining-tokens:
+      - '249975'
+      x-ratelimit-reset-requests:
+      - 20ms
+      x-ratelimit-reset-tokens:
+      - 5ms
+      x-request-id:
+      - req_933c7953e50739bbd588b14542770259
+    status:
+      code: 200
+      message: OK
+version: 1