Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename LLM and Embedding span attributes #2270

Merged
merged 11 commits into from
Mar 11, 2024
6 changes: 3 additions & 3 deletions src/promptflow/promptflow/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ class SpanAttributeFieldName:
INPUTS = "inputs"
OUTPUT = "output"
# token metrics
COMPLETION_TOKEN_COUNT = "llm.token_count.completion"
PROMPT_TOKEN_COUNT = "llm.token_count.prompt"
TOTAL_TOKEN_COUNT = "llm.token_count.total"
COMPLETION_TOKEN_COUNT = "llm.usage.completion_tokens"
PROMPT_TOKEN_COUNT = "llm.usage.prompt_tokens"
TOTAL_TOKEN_COUNT = "llm.usage.total_tokens"
CUMULATIVE_COMPLETION_TOKEN_COUNT = "__computed__.cumulative_token_count.completion"
CUMULATIVE_PROMPT_TOKEN_COUNT = "__computed__.cumulative_token_count.prompt"
CUMULATIVE_TOTAL_TOKEN_COUNT = "__computed__.cumulative_token_count.total"
Expand Down
15 changes: 8 additions & 7 deletions src/promptflow/promptflow/tracing/_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
from typing import Callable, List, Optional

import opentelemetry.trace as otel_trace
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace import Link
from opentelemetry.trace.status import StatusCode
from opentelemetry.trace.span import NonRecordingSpan
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace.status import StatusCode

from promptflow._core.generator_proxy import GeneratorProxy
from promptflow._core.operation_context import OperationContext
from promptflow._utils.dataclass_serializer import serialize
from promptflow._utils.tool_utils import get_inputs_for_prompt_template, get_prompt_param_name_from_func

from .._utils.utils import default_json_encoder
from ._tracer import _create_trace_from_function_call, get_node_name_from_context, Tracer
from ._tracer import Tracer, _create_trace_from_function_call, get_node_name_from_context
from .contracts.trace import TraceType

IS_LEGACY_OPENAI = version("openai").startswith("0.")
Expand Down Expand Up @@ -146,6 +146,7 @@ def traced_generator(generator, original_span: ReadableSpan):
# TODO: Enrich LLM token count for streaming scenario
if original_span.attributes["span_type"] == "LLM" and not IS_LEGACY_OPENAI:
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk

chunks = []
role = "assistant"
for item in generator_output:
Expand Down Expand Up @@ -181,7 +182,7 @@ def enrich_span_with_openai_tokens(span, trace_type):
if tokens:
span_tokens = {f"__computed__.cumulative_token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
if trace_type in [TraceType.LLM, TraceType.EMBEDDING]:
llm_tokens = {f"{trace_type.value.lower()}.token_count.{k.split('_')[0]}": v for k, v in tokens.items()}
llm_tokens = {f"{trace_type.value.lower()}.usage.{k}": v for k, v in tokens.items()}
span_tokens.update(llm_tokens)
span.set_attributes(span_tokens)
except Exception as e:
Expand Down Expand Up @@ -212,10 +213,10 @@ def enrich_span_with_embedding(span, inputs, output):
def _is_single_input(embedding_inputs):
# OpenAI Embedding API accepts a single string/tokenized string or a list of string/tokenized string as input.
# For the single string/tokenized string case, we should return true, otherwise return false.
if (isinstance(embedding_inputs, str)):
if isinstance(embedding_inputs, str):
# input is a string
return True
elif (isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs)):
elif isinstance(embedding_inputs, list) and all(isinstance(i, int) for i in embedding_inputs):
# input is a token array
return True
return False
Expand All @@ -228,7 +229,7 @@ def enrich_span_with_llm_model(span, output):
from openai.types.completion import Completion

if isinstance(output, (ChatCompletion, Completion)):
span.set_attribute("llm.model", output.model)
span.set_attribute("llm.response.model", output.model)
zzn2 marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logging.warning(f"Failed to enrich span with llm model: {e}")

Expand Down
14 changes: 7 additions & 7 deletions src/promptflow/tests/executor/e2etests/test_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from promptflow.tracing.contracts.trace import TraceType

from ..process_utils import execute_function_in_subprocess
from ..utils import get_flow_folder, get_flow_sample_inputs, get_yaml_file, prepare_memory_exporter, load_content
from ..utils import get_flow_folder, get_flow_sample_inputs, get_yaml_file, load_content, prepare_memory_exporter

LLM_FUNCTION_NAMES = [
"openai.resources.chat.completions.Completions.create",
Expand All @@ -30,9 +30,9 @@
]

LLM_TOKEN_NAMES = [
"llm.token_count.prompt",
"llm.token_count.completion",
"llm.token_count.total",
"llm.usage.prompt_tokens",
"llm.usage.completion_tokens",
"llm.usage.total_tokens",
]

EMBEDDING_TOKEN_NAMES = [
Expand Down Expand Up @@ -350,7 +350,7 @@ def assert_otel_traces(self, dev_connections, flow_file, inputs, expected_span_l
("llm_tool", {"topic": "Hello", "stream": False}, "joke.jinja2"),
# Add back this test case after changing the interface of render_template_jinja2
# ("prompt_tools", {"text": "test"}, "summarize_text_content_prompt.jinja2"),
]
],
)
def test_otel_trace_with_prompt(
self,
Expand Down Expand Up @@ -419,15 +419,15 @@ def assert_otel_traces_with_llm(self, dev_connections, flow_file, inputs, expect
self.validate_openai_tokens(span_list)
for span in span_list:
if span.attributes.get("function", "") in LLM_FUNCTION_NAMES:
assert span.attributes.get("llm.model", "") in ["gpt-35-turbo", "text-ada-001"]
assert span.attributes.get("llm.response.model", "") in ["gpt-35-turbo", "text-ada-001"]

@pytest.mark.parametrize(
"flow_file, inputs, expected_span_length",
[
("openai_embedding_api_flow", {"input": "Hello"}, 3),
# [9906] is the tokenized version of "Hello"
("openai_embedding_api_flow_with_token", {"input": [9906]}, 3),
]
],
)
def test_otel_trace_with_embedding(
self,
Expand Down
Loading