Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename LLM and Embedding span attributes #2270

Merged
merged 11 commits into from
Mar 11, 2024
6 changes: 3 additions & 3 deletions src/promptflow/promptflow/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ class SpanAttributeFieldName:
INPUTS = "inputs"
OUTPUT = "output"
# token metrics
COMPLETION_TOKEN_COUNT = "llm.token_count.completion"
PROMPT_TOKEN_COUNT = "llm.token_count.prompt"
TOTAL_TOKEN_COUNT = "llm.token_count.total"
COMPLETION_TOKEN_COUNT = "llm.usage.completion_tokens"
PROMPT_TOKEN_COUNT = "llm.usage.prompt_tokens"
TOTAL_TOKEN_COUNT = "llm.usage.total_tokens"
CUMULATIVE_COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
CUMULATIVE_PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
CUMULATIVE_TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"
Expand Down
17 changes: 9 additions & 8 deletions src/promptflow/promptflow/tracing/_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
from typing import Callable, List, Optional

import opentelemetry.trace as otel_trace
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace import Link
from opentelemetry.trace.status import StatusCode
from opentelemetry.trace.span import NonRecordingSpan
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace.status import StatusCode

from promptflow._core.generator_proxy import GeneratorProxy
from promptflow._core.operation_context import OperationContext
from promptflow._utils.dataclass_serializer import serialize
from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func

from .._utils.utils import default_json_encoder
from ._tracer import _create_trace_from_function_call, get_node_name_from_context, Tracer
from ._tracer import Tracer, _create_trace_from_function_call, get_node_name_from_context
from .contracts.trace import TraceType

IS_LEGACY_OPENAI = version("openai").startswith("0.")
Expand Down Expand Up @@ -146,6 +146,7 @@ def traced_generator(generator, original_span: ReadableSpan):
# TODO: Enrich LLM token count for streaming scenario
if original_span.attributes["span_type"] == "LLM" and not IS_LEGACY_OPENAI:
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk

chunks = []
role = "assistant"
for item in generator_output:
Expand Down Expand Up @@ -181,7 +182,7 @@ def enrich_span_with_openai_tokens(span, trace_type):
if tokens:
span_tokens = {f"__computed__.cumulative_token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
if trace_type in [TraceType.LLM, TraceType.EMBEDDING]:
llm_tokens = {f"{trace_type.value.lower()}.token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
llm_tokens = {f"llm.usage.{k}": v for k, v in tokens.items()}
span_tokens.update(llm_tokens)
span.set_attributes(span_tokens)
except Exception as e:
Expand All @@ -193,7 +194,7 @@ def enrich_span_with_embedding(span, inputs, output):

try:
if isinstance(output, CreateEmbeddingResponse):
span.set_attribute("embedding.model", output.model)
span.set_attribute("llm.response.model", output.model)
embeddings = []
input_list = [emb_input] if _is_single_input(emb_input := inputs["input"]) else emb_input
for emb in output.data:
Expand All @@ -212,10 +213,10 @@ def enrich_span_with_embedding(span, inputs, output):
def _is_single_input(embedding_inputs):
# OpenAI Embedding API accepts a single string/tokenized string or a list of string/tokenized string as input.
# For the single string/tokenized string case, we should return true, otherwise return false.
if (isinstance(embedding_inputs, str)):
if isinstance(embedding_inputs, str):
# input is a string
return True
elif (isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs)):
elif isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs):
# input is a token array
return True
return False
Expand All @@ -228,7 +229,7 @@ def enrich_span_with_llm_model(span, output):
from openai.types.completion import Completion

if isinstance(output, (ChatCompletion, Completion)):
span.set_attribute("llm.model", output.model)
span.set_attribute("llm.response.model", output.model)
zzn2 marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logging.warning(f"Failed to enrich span with llm model: {e}")

Expand Down
16 changes: 9 additions & 7 deletions src/promptflow/tests/executor/e2etests/test_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,16 @@
]

LLM_TOKEN_NAMES = [
"llm.token_count.prompt",
"llm.token_count.completion",
"llm.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.completion_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

EMBEDDING_TOKEN_NAMES = [
"embedding.token_count.prompt",
"embedding.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

CUMULATIVE_LLM_TOKEN_NAMES = [
Expand Down Expand Up @@ -427,7 +429,7 @@ def assert_otel_traces_with_llm(self, dev_connections, flow_file, inputs, expect
self.validate_openai_tokens(span_list)
for span in span_list:
if span.attributes.get("function", "") in LLM_FUNCTION_NAMES:
assert span.attributes.get("llm.model", "") in ["gpt-35-turbo", "text-ada-001"]
assert span.attributes.get("llm.response.model", "") in ["gpt-35-turbo", "text-ada-001"]

@pytest.mark.parametrize(
"flow_file, inputs, expected_span_length",
Expand Down Expand Up @@ -463,7 +465,7 @@ def assert_otel_traces_with_embedding(self, dev_connections, flow_file, inputs,
self.validate_span_list(span_list, line_run_id, expected_span_length)
for span in span_list:
if span.attributes.get("function", "") in EMBEDDING_FUNCTION_NAMES:
assert span.attributes.get("embedding.model", "") == "ada"
assert span.attributes.get("llm.response.model", "") == "ada"
embeddings = span.attributes.get("embedding.embeddings", "")
assert "embedding.vector" in embeddings
assert "embedding.text" in embeddings
Expand Down
18 changes: 10 additions & 8 deletions src/promptflow/tests/tracing_test/e2etests/test_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,16 @@
]

LLM_TOKEN_NAMES = [
"llm.token_count.prompt",
"llm.token_count.completion",
"llm.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.completion_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

EMBEDDING_TOKEN_NAMES = [
"embedding.token_count.prompt",
"embedding.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.total_tokens",
"llm.response.model",
]

CUMULATIVE_LLM_TOKEN_NAMES = [
Expand Down Expand Up @@ -77,7 +79,7 @@ def assert_otel_trace(self, func, inputs, expected_span_length):
"func, inputs",
[
(render_prompt_template, {"prompt": "Hello {{name}}!", "name": "world"}),
]
],
)
def test_otel_trace_with_prompt(self, func, inputs):
execute_function_in_subprocess(self.assert_otel_traces_with_prompt, func, inputs)
Expand Down Expand Up @@ -130,7 +132,7 @@ def assert_otel_trace_with_llm(self, dev_connections, func, inputs, expected_spa
(openai_embedding_async, {"input": "Hello"}, 2),
# [9906] is the tokenized version of "Hello"
(openai_embedding_async, {"input": [9906]}, 2),
]
],
)
def test_otel_trace_with_embedding(
self,
Expand All @@ -156,7 +158,7 @@ def assert_otel_traces_with_embedding(self, dev_connections, func, inputs, expec
self.validate_openai_tokens(span_list)
for span in span_list:
if span.attributes.get("function", "") in EMBEDDING_FUNCTION_NAMES:
assert span.attributes.get("embedding.model", "") == "ada"
assert span.attributes.get("llm.response.model", "") == "ada"
embeddings = span.attributes.get("embedding.embeddings", "")
assert "embedding.vector" in embeddings
assert "embedding.text" in embeddings
Expand Down
Loading