traceloop
diff --git a/‎packages/traceloop-sdk/tests/cassettes/test_manual/test_manual_report.yaml‎
Lines changed: 0 additions & 98 deletions b/‎packages/traceloop-sdk/tests/cassettes/test_manual/test_manual_report.yaml‎
Lines changed: 0 additions & 98 deletions
diff --git a/‎packages/traceloop-sdk/tests/cassettes/test_manual/test_resource_attributes.yaml‎
Lines changed: 0 additions & 98 deletions b/‎packages/traceloop-sdk/tests/cassettes/test_manual/test_resource_attributes.yaml‎
Lines changed: 0 additions & 98 deletions
diff --git a/‎packages/traceloop-sdk/tests/test_manual.py‎
Lines changed: 25 additions & 9 deletions b/‎packages/traceloop-sdk/tests/test_manual.py‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎packages/traceloop-sdk/traceloop/sdk/tracing/manual.py‎
Lines changed: 22 additions & 1 deletion b/‎packages/traceloop-sdk/traceloop/sdk/tracing/manual.py‎
Lines changed: 22 additions & 1 deletion
@@ -1,15 +1,14 @@
 from opentelemetry.semconv_ai import SpanAttributes
 import pytest
 from openai import OpenAI
-from traceloop.sdk.tracing.manual import LLMMessage, track_llm_call
+from traceloop.sdk.tracing.manual import LLMMessage, LLMUsage, track_llm_call
 
 
 @pytest.fixture
 def openai_client():
     return OpenAI()
 
 
-@pytest.mark.vcr
 def test_manual_report(exporter, openai_client):
     with track_llm_call(vendor="openai", type="chat") as span:
         span.report_request(
@@ -19,14 +18,21 @@ def test_manual_report(exporter, openai_client):
             ],
         )
 
-        res = openai_client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=[
-                {"role": "user", "content": "Tell me a joke about opentelemetry"}
-            ],
-        )
+        res = [
+            "Why did the opentelemetry developer break up with their partner? Because they were tired"
+            + " of constantly tracing their every move!",
+        ]
 
-        span.report_response(res.model, [text.message.content for text in res.choices])
+        span.report_response("gpt-3.5-turbo-0125", res)
+        span.report_usage(
+            LLMUsage(
+                prompt_tokens=15,
+                completion_tokens=24,
+                total_tokens=39,
+                cache_creation_input_tokens=15,
+                cache_read_input_tokens=18,
+            )
+        )
 
     spans = exporter.get_finished_spans()
     open_ai_span = spans[0]
@@ -46,3 +52,13 @@ def test_manual_report(exporter, openai_client):
         + " of constantly tracing their every move!"
     )
     assert open_ai_span.end_time > open_ai_span.start_time
+    assert open_ai_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == 15
+    assert open_ai_span.attributes[SpanAttributes.LLM_USAGE_COMPLETION_TOKENS] == 24
+    assert open_ai_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] == 39
+    assert (
+        open_ai_span.attributes[SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS]
+        == 15
+    )
+    assert (
+        open_ai_span.attributes[SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS] == 18
+    )
@@ -14,6 +14,8 @@ class LLMUsage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
 
 
 class LLMSpan:
@@ -40,9 +42,28 @@ def report_response(self, model: str, completions: list[str]):
                 f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role", "assistant"
             )
             self._span.set_attribute(
-                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}", completion
+                f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content", completion
             )
 
+    def report_usage(self, usage: LLMUsage):
+        self._span.set_attribute(
+            SpanAttributes.LLM_USAGE_PROMPT_TOKENS, usage.prompt_tokens
+        )
+        self._span.set_attribute(
+            SpanAttributes.LLM_USAGE_COMPLETION_TOKENS, usage.completion_tokens
+        )
+        self._span.set_attribute(
+            SpanAttributes.LLM_USAGE_TOTAL_TOKENS, usage.total_tokens
+        )
+        self._span.set_attribute(
+            SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS,
+            usage.cache_creation_input_tokens,
+        )
+        self._span.set_attribute(
+            SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
+            usage.cache_read_input_tokens,
+        )
+
 
 @contextmanager
 def track_llm_call(vendor: str, type: str):