Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ddtrace/llmobs/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,6 @@

EXPERIMENT_ID_KEY = "_ml_obs.experiment_id"
EXPERIMENT_EXPECTED_OUTPUT = "_ml_obs.meta.input.expected_output"
EXPERIMENTS_INPUT = "_ml_obs.meta.input"
EXPERIMENTS_OUTPUT = "_ml_obs.meta.output"
DEFAULT_PROJECT_NAME = "default-project"
3 changes: 2 additions & 1 deletion ddtrace/llmobs/_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from ddtrace.llmobs._constants import DD_SITES_NEEDING_APP_SUBDOMAIN
from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
from ddtrace.llmobs._utils import convert_tags_dict_to_list
from ddtrace.llmobs._utils import safe_json


if TYPE_CHECKING:
Expand Down Expand Up @@ -349,7 +350,7 @@ def _process_record(self, idx_record: Tuple[int, DatasetRecord]) -> Optional[Tas
except Exception:
span.set_exc_info(*sys.exc_info())
self._llmobs_instance.annotate(span, input_data=input_data, output_data=output_data, tags=tags)
span._set_ctx_item(EXPERIMENT_EXPECTED_OUTPUT, record["expected_output"])
span._set_ctx_item(EXPERIMENT_EXPECTED_OUTPUT, safe_json(record["expected_output"]))
return {
"idx": idx,
"span_id": span_id,
Expand Down
31 changes: 28 additions & 3 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
from ddtrace.llmobs._constants import EXPERIMENT_CSV_FIELD_MAX_SIZE
from ddtrace.llmobs._constants import EXPERIMENT_EXPECTED_OUTPUT
from ddtrace.llmobs._constants import EXPERIMENT_ID_KEY
from ddtrace.llmobs._constants import EXPERIMENTS_INPUT
from ddtrace.llmobs._constants import EXPERIMENTS_OUTPUT
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PROMPT
Expand Down Expand Up @@ -278,9 +280,18 @@ def _llmobs_span_event(self, span: Span) -> Optional[LLMObsSpanEvent]:

if span.context.get_baggage_item(EXPERIMENT_ID_KEY):
_dd_attrs["scope"] = "experiments"
expected_output = span._get_ctx_item(EXPERIMENT_EXPECTED_OUTPUT)
if span_kind == "experiment" and expected_output:
meta["expected_output"] = expected_output
if span_kind == "experiment":
expected_output = span._get_ctx_item(EXPERIMENT_EXPECTED_OUTPUT)
if expected_output:
meta["expected_output"] = expected_output

input_data = span._get_ctx_item(EXPERIMENTS_INPUT)
if input_data:
meta["input"] = input_data

output_data = span._get_ctx_item(EXPERIMENTS_OUTPUT)
if output_data:
meta["output"] = output_data

input_messages = span._get_ctx_item(INPUT_MESSAGES)
if span_kind == "llm" and input_messages is not None:
Expand Down Expand Up @@ -1366,6 +1377,8 @@ def annotate(
error = cls._tag_embedding_io(span, input_documents=input_data, output_text=output_data)
elif span_kind == "retrieval":
error = cls._tag_retrieval_io(span, input_text=input_data, output_documents=output_data)
elif span_kind == "experiment":
cls._tag_freeform_io(span, input_value=input_data, output_value=output_data)
else:
cls._tag_text_io(span, input_value=input_data, output_value=output_data)
finally:
Expand Down Expand Up @@ -1447,6 +1460,18 @@ def _tag_text_io(cls, span, input_value=None, output_value=None):
if output_value is not None:
span._set_ctx_item(OUTPUT_VALUE, safe_json(output_value))

@classmethod
def _tag_freeform_io(cls, span, input_value=None, output_value=None):
"""Tags input/output values for experient spans.
Will be mapped to span's `meta.{input,output}` fields.
this is meant to be non restrictive on user's data, experiments allow
arbitrary structured or non structured IO values in its spans
"""
if input_value is not None:
span._set_ctx_item(EXPERIMENTS_INPUT, safe_json(input_value))
if output_value is not None:
span._set_ctx_item(EXPERIMENTS_OUTPUT, safe_json(output_value))

@staticmethod
def _set_dict_attribute(span: Span, key, value: Dict[str, Any]) -> None:
"""Sets a given LLM Obs span attribute with a dictionary key/values.
Expand Down
6 changes: 3 additions & 3 deletions tests/llmobs/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,9 +1078,9 @@ def test_experiment_span_written_to_experiment_scope(llmobs, llmobs_events, test
for key in ("span_id", "trace_id", "parent_id", "start_ns", "duration", "metrics"):
assert event[key] == mock.ANY
assert event["status"] == "ok"
assert event["meta"]["input"] == {"value": '{"prompt": "What is the capital of France?"}'}
assert event["meta"]["output"] == {"value": '{"prompt": "What is the capital of France?"}'}
assert event["meta"]["expected_output"] == {"answer": "Paris"}
assert event["meta"]["input"] == '{"prompt": "What is the capital of France?"}'
assert event["meta"]["output"] == '{"prompt": "What is the capital of France?"}'
assert event["meta"]["expected_output"] == '{"answer": "Paris"}'
assert "dataset_id:{}".format(test_dataset_one_record._id) in event["tags"]
assert "dataset_record_id:{}".format(test_dataset_one_record._records[0]["record_id"]) in event["tags"]
assert "experiment_id:1234567890" in event["tags"]
Expand Down
Loading