microsoft · zzn2 · Mar 11, 2024 · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024
@@ -111,9 +111,9 @@ class SpanAttributeFieldName:
     INPUTS = "inputs"
     OUTPUT = "output"
     # token metrics
-    COMPLETION_TOKEN_COUNT = "llm.token_count.completion"
-    PROMPT_TOKEN_COUNT = "llm.token_count.prompt"
-    TOTAL_TOKEN_COUNT = "llm.token_count.total"
+    COMPLETION_TOKEN_COUNT = "llm.usage.completion_tokens"
+    PROMPT_TOKEN_COUNT = "llm.usage.prompt_tokens"
+    TOTAL_TOKEN_COUNT = "llm.usage.total_tokens"
     CUMULATIVE_COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
     CUMULATIVE_PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
     CUMULATIVE_TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"

@@ -12,18 +12,18 @@
 from typing import Callable, List, Optional
 
 import opentelemetry.trace as otel_trace
+from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.trace import Link
-from opentelemetry.trace.status import StatusCode
 from opentelemetry.trace.span import NonRecordingSpan
-from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.trace.status import StatusCode
 
 from promptflow._core.generator_proxy import GeneratorProxy
 from promptflow._core.operation_context import OperationContext
 from promptflow._utils.dataclass_serializer import serialize
 from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func
 
 from .._utils.utils import default_json_encoder
-from ._tracer import _create_trace_from_function_call, get_node_name_from_context, Tracer
+from ._tracer import Tracer, _create_trace_from_function_call, get_node_name_from_context
 from .contracts.trace import TraceType
 
 IS_LEGACY_OPENAI = version("openai").startswith("0.")
@@ -146,6 +146,7 @@ def traced_generator(generator, original_span: ReadableSpan):
         # TODO: Enrich LLM token count for streaming scenario
         if original_span.attributes["span_type"] == "LLM" and not IS_LEGACY_OPENAI:
             from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+
             chunks = []
             role = "assistant"
             for item in generator_output:
@@ -181,7 +182,7 @@ def enrich_span_with_openai_tokens(span, trace_type):
         if tokens:
             span_tokens = {f"__computed__.cumulative_token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
             if trace_type in [TraceType.LLM, TraceType.EMBEDDING]:
-                llm_tokens = {f"{trace_type.value.lower()}.token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
+                llm_tokens = {f"{trace_type.value.lower()}.usage.{k}": v for k, v in tokens.items()}
                 span_tokens.update(llm_tokens)
             span.set_attributes(span_tokens)
     except Exception as e:
@@ -212,10 +213,10 @@ def enrich_span_with_embedding(span, inputs, output):
 def _is_single_input(embedding_inputs):
     # OpenAI Embedding API accepts a single string/tokenized string or a list of string/tokenized string as input.
     # For the single string/tokenized string case, we should return true, otherwise return false.
-    if (isinstance(embedding_inputs, str)):
+    if isinstance(embedding_inputs, str):
         # input is a string
         return True
-    elif (isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs)):
+    elif isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs):
         # input is a token array
         return True
     return False
@@ -228,7 +229,7 @@ def enrich_span_with_llm_model(span, output):
             from openai.types.completion import Completion
 
             if isinstance(output, (ChatCompletion, Completion)):
-                span.set_attribute("llm.model", output.model)
+                span.set_attribute("llm.response.model", output.model)
     except Exception as e:
         logging.warning(f"Failed to enrich span with llm model: {e}")
 

@@ -15,7 +15,7 @@
 from promptflow.tracing.contracts.trace import TraceType
 
 from ..process_utils import execute_function_in_subprocess
-from ..utils import get_flow_folder, get_flow_sample_inputs, get_yaml_file, prepare_memory_exporter, load_content
+from ..utils import get_flow_folder, get_flow_sample_inputs, get_yaml_file, load_content, prepare_memory_exporter
 
 LLM_FUNCTION_NAMES = [
     "openai.resources.chat.completions.Completions.create",
@@ -30,9 +30,9 @@
 ]
 
 LLM_TOKEN_NAMES = [
-    "llm.token_count.prompt",
-    "llm.token_count.completion",
-    "llm.token_count.total",
+    "llm.usage.prompt_tokens",
+    "llm.usage.completion_tokens",
+    "llm.usage.total_tokens",
 ]
 
 EMBEDDING_TOKEN_NAMES = [
@@ -350,7 +350,7 @@ def assert_otel_traces(self, dev_connections, flow_file, inputs, expected_span_l
             ("llm_tool", {"topic": "Hello", "stream": False}, "joke.jinja2"),
             # Add back this test case after changing the interface of render_template_jinja2
             # ("prompt_tools", {"text": "test"}, "summarize_text_content_prompt.jinja2"),
-        ]
+        ],
     )
     def test_otel_trace_with_prompt(
         self,
@@ -419,15 +419,15 @@ def assert_otel_traces_with_llm(self, dev_connections, flow_file, inputs, expect
         self.validate_openai_tokens(span_list)
         for span in span_list:
             if span.attributes.get("function", "") in LLM_FUNCTION_NAMES:
-                assert span.attributes.get("llm.model", "") in ["gpt-35-turbo", "text-ada-001"]
+                assert span.attributes.get("llm.response.model", "") in ["gpt-35-turbo", "text-ada-001"]
 
     @pytest.mark.parametrize(
         "flow_file, inputs, expected_span_length",
         [
             ("openai_embedding_api_flow", {"input": "Hello"}, 3),
             # [9906] is the tokenized version of "Hello"
             ("openai_embedding_api_flow_with_token", {"input": [9906]}, 3),
-        ]
+        ],
     )
     def test_otel_trace_with_embedding(
         self,