Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ddtrace/llmobs/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Union
from typing import cast
from urllib.parse import quote
from urllib.parse import urlparse


# TypedDict was added to typing in python 3.8
Expand Down Expand Up @@ -156,6 +157,14 @@ def __init__(
f"{self.AGENTLESS_BASE_URL}.{self._site}" if is_agentless else agent_config.trace_agent_url
)
self._endpoint: str = self.ENDPOINT if is_agentless else f"{EVP_PROXY_AGENT_BASE_PATH}{self.ENDPOINT}"
override_url_parsed = urlparse(self._override_url)
if self._override_url and override_url_parsed.scheme != "unix" and override_url_parsed.path not in ("/", ""):
# handles cases where the override url includes a base path, ie
# http://localhost:8080/foo/bar and endpoint /buz/baz
# we need to strip the base path from the endpoint so the eventual urljoin works properly
# to form http://localhost:8080/foo/bar/buz/baz
self._endpoint = self.ENDPOINT.lstrip("/")

self._headers: Dict[str, str] = {"Content-Type": "application/json"}
if is_agentless:
self._headers["DD-API-KEY"] = self._api_key
Expand Down
8 changes: 7 additions & 1 deletion tests/llmobs/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,11 @@ def llmobs_enable_opts():
yield {"project_name": "test-project"}


@pytest.fixture
def llmobs_api_proxy_url():
return "http://localhost:9126/vcr/datadog"


@pytest.fixture
def llmobs(
ddtrace_global_config,
Expand All @@ -261,6 +266,7 @@ def llmobs(
llmobs_enable_opts,
llmobs_env,
llmobs_span_writer,
llmobs_api_proxy_url,
mock_llmobs_eval_metric_writer,
mock_llmobs_evaluator_runner,
):
Expand All @@ -274,7 +280,7 @@ def llmobs(
llmobs_service.enable(_tracer=tracer, **llmobs_enable_opts)
llmobs_service._instance._llmobs_span_writer = llmobs_span_writer
llmobs_service._instance._llmobs_span_writer.start()
llmobs_service._instance._dne_client._intake = "http://localhost:9126/vcr/datadog"
llmobs_service._instance._dne_client._intake = llmobs_api_proxy_url
yield llmobs_service
tracer.shutdown()
llmobs_service.disable()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
interactions:
- request:
body: '{"data": {"type": "evaluation_metric", "attributes": {"metrics": [{"join_on":
{"span": {"span_id": "12345678901", "trace_id": "98765432101"}}, "metric_type":
"categorical", "categorical_value": "very", "label": "toxicity", "ml_app": "dummy-ml-app",
"timestamp_ms": 1756910127022}]}}}'
headers:
Accept:
- '*/*'
? !!python/object/apply:multidict._multidict.istr
- Accept-Encoding
: - identity
Connection:
- keep-alive
Content-Length:
- '283'
? !!python/object/apply:multidict._multidict.istr
- Content-Type
: - application/json
User-Agent:
- python-requests/2.32.4
method: POST
uri: https://api.datadoghq.com/api/intake/llm-obs/v2/eval-metric
response:
body:
string: '{"data":{"id":"1ef94721-392d-4612-ad63-5f3b289c1cd5","type":"evaluation_metric","attributes":{"metrics":[{"id":"-Xbd-WStY2","join_on":{"span":{"trace_id":"98765432101","span_id":"12345678901"}},"timestamp_ms":1756910127022,"ml_app":"dummy-ml-app","metric_type":"categorical","label":"toxicity","categorical_value":"very"}]}}}'
headers:
content-length:
- '325'
content-security-policy:
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
content-type:
- application/vnd.api+json
date:
- Wed, 03 Sep 2025 14:41:13 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
vary:
- Accept-Encoding
x-content-type-options:
- nosniff
x-frame-options:
- SAMEORIGIN
status:
code: 202
message: Accepted
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
interactions:
- request:
body: '{"data": {"type": "evaluation_metric", "attributes": {"metrics": [{"join_on":
{"span": {"span_id": "123", "trace_id": "1234"}}, "label": "dummy", "metric_type":
"score", "timestamp_ms": 1757074814754, "score_value": 1.0, "ml_app": "unnamed-ml-app",
"tags": ["ddtrace.version:3.13.0.dev56+gf40756451.d20250822", "ml_app:unnamed-ml-app"]}]}}}'
headers:
Accept:
- '*/*'
? !!python/object/apply:multidict._multidict.istr
- Accept-Encoding
: - identity
Connection:
- keep-alive
Content-Length:
- '340'
? !!python/object/apply:multidict._multidict.istr
- Content-Type
: - application/json
User-Agent:
- python-requests/2.32.4
method: POST
uri: https://api.datadoghq.com/api/intake/llm-obs/v2/eval-metric
response:
body:
string: '{"status":"error","code":403,"errors":["Forbidden"],"statuspage":"http://status.datadoghq.com","twitter":"http://twitter.com/datadogops","email":"support@datadoghq.com"}'
headers:
connection:
- close
content-length:
- '169'
content-type:
- application/json
date:
- Fri, 05 Sep 2025 12:20:14 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-content-type-options:
- nosniff
status:
code: 403
message: Forbidden
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
interactions:
- request:
body: '{"data": {"type": "evaluation_metric", "attributes": {"metrics": [{"join_on":
{"span": {"span_id": "12345678902", "trace_id": "98765432102"}}, "metric_type":
"score", "label": "sentiment", "score_value": 0.9, "ml_app": "dummy-ml-app",
"timestamp_ms": 1756910127022}]}}}'
headers:
Accept:
- '*/*'
? !!python/object/apply:multidict._multidict.istr
- Accept-Encoding
: - identity
Connection:
- keep-alive
Content-Length:
- '269'
? !!python/object/apply:multidict._multidict.istr
- Content-Type
: - application/json
User-Agent:
- python-requests/2.32.4
method: POST
uri: https://api.datadoghq.com/api/intake/llm-obs/v2/eval-metric
response:
body:
string: '{"data":{"id":"c7ca5837-c593-4973-aefc-fe9ccbca1e74","type":"evaluation_metric","attributes":{"metrics":[{"id":"BKrS9Vc9nU","join_on":{"span":{"trace_id":"98765432102","span_id":"12345678902"}},"timestamp_ms":1756910127022,"ml_app":"dummy-ml-app","metric_type":"score","label":"sentiment","score_value":0.9}]}}}'
headers:
content-length:
- '311'
content-security-policy:
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
content-type:
- application/vnd.api+json
date:
- Wed, 03 Sep 2025 14:45:25 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
vary:
- Accept-Encoding
x-content-type-options:
- nosniff
x-frame-options:
- SAMEORIGIN
status:
code: 202
message: Accepted
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
interactions:
- request:
body: '{"data": {"type": "evaluation_metric", "attributes": {"metrics": [{"join_on":
{"span": {"span_id": "123", "trace_id": "1234"}}, "label": "dummy", "metric_type":
"score", "timestamp_ms": 1756911917780, "score_value": 1.0, "ml_app": "unnamed-ml-app",
"tags": ["ddtrace.version:3.13.0.dev56+gf40756451.d20250822", "ml_app:unnamed-ml-app"]}]}}}'
headers:
Accept:
- '*/*'
? !!python/object/apply:multidict._multidict.istr
- Accept-Encoding
: - identity
Connection:
- keep-alive
Content-Length:
- '340'
? !!python/object/apply:multidict._multidict.istr
- Content-Type
: - application/json
User-Agent:
- python-requests/2.32.4
method: POST
uri: https://api.datadoghq.com/api/intake/llm-obs/v2/eval-metric
response:
body:
string: '{"status":"error","code":403,"errors":["Forbidden"],"statuspage":"http://status.datadoghq.com","twitter":"http://twitter.com/datadogops","email":"support@datadoghq.com"}'
headers:
connection:
- close
content-length:
- '169'
content-type:
- application/json
date:
- Wed, 03 Sep 2025 15:05:17 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-content-type-options:
- nosniff
status:
code: 403
message: Forbidden
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
interactions:
- request:
body: '{"data": {"type": "evaluation_metric", "attributes": {"metrics": [{"join_on":
{"span": {"span_id": "12345678901", "trace_id": "98765432101"}}, "metric_type":
"categorical", "categorical_value": "wrong-api-key", "label": "api-key", "ml_app":
"dummy-ml-app", "timestamp_ms": 1756910127022}]}}}'
headers:
Accept:
- '*/*'
? !!python/object/apply:multidict._multidict.istr
- Accept-Encoding
: - identity
Connection:
- keep-alive
Content-Length:
- '291'
? !!python/object/apply:multidict._multidict.istr
- Content-Type
: - application/json
User-Agent:
- python-requests/2.32.4
method: POST
uri: https://api.datadoghq.com/api/intake/llm-obs/v2/eval-metric
response:
body:
string: '{"status":"error","code":403,"errors":["Forbidden"],"statuspage":"http://status.datadoghq.com","twitter":"http://twitter.com/datadogops","email":"support@datadoghq.com"}'
headers:
connection:
- close
content-length:
- '169'
content-type:
- application/json
date:
- Wed, 03 Sep 2025 14:39:21 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-content-type-options:
- nosniff
status:
code: 403
message: Forbidden
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
interactions:
- request:
body: '{"data": {"type": "evaluation_metric", "attributes": {"metrics": [{"join_on":
{"span": {"span_id": "123", "trace_id": "1234"}}, "label": "dummy", "metric_type":
"score", "timestamp_ms": 1757074518879, "score_value": 1.0, "ml_app": "unnamed-ml-app",
"tags": ["ddtrace.version:3.13.0.dev56+gf40756451.d20250822", "ml_app:unnamed-ml-app"]}]}}}'
headers:
Accept:
- '*/*'
? !!python/object/apply:multidict._multidict.istr
- Accept-Encoding
: - identity
Connection:
- keep-alive
Content-Length:
- '340'
? !!python/object/apply:multidict._multidict.istr
- Content-Type
: - application/json
User-Agent:
- python-requests/2.32.4
method: POST
uri: https://api.datadoghq.com/api/intake/llm-obs/v2/eval-metric
response:
body:
string: '{"status":"error","code":403,"errors":["Forbidden"],"statuspage":"http://status.datadoghq.com","twitter":"http://twitter.com/datadogops","email":"support@datadoghq.com"}'
headers:
connection:
- close
content-length:
- '169'
content-type:
- application/json
date:
- Fri, 05 Sep 2025 12:15:18 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-content-type-options:
- nosniff
status:
code: 403
message: Forbidden
version: 1
12 changes: 6 additions & 6 deletions tests/llmobs/test_llmobs_eval_metric_agent_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ def test_buffer_limit(mock_writer_logs):
@mock.patch("ddtrace.llmobs._writer.LLMObsEvalMetricWriter._send_payload")
def test_send_categorical_metrics(mock_send_payload, mock_writer_logs):
llmobs_eval_metric_writer = LLMObsEvalMetricWriter(1, 1, is_agentless=False)
llmobs_eval_metric_writer.enqueue(_categorical_metric_event())
llmobs_eval_metric_writer.enqueue(_categorical_metric_event(label="toxicity", value="very"))
llmobs_eval_metric_writer.periodic()
mock_writer_logs.debug.assert_called_with("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")


@mock.patch("ddtrace.llmobs._writer.LLMObsEvalMetricWriter._send_payload")
def test_send_score_metric(mock_send_payload, mock_writer_logs):
llmobs_eval_metric_writer = LLMObsEvalMetricWriter(1, 1, is_agentless=False)
llmobs_eval_metric_writer.enqueue(_score_metric_event())
llmobs_eval_metric_writer.enqueue(_score_metric_event(label="sentiment", value=0.9))
llmobs_eval_metric_writer.periodic()
mock_writer_logs.debug.assert_called_with("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")

Expand All @@ -63,11 +63,11 @@ def test_send_timed_events(mock_send_payload, mock_writer_logs):
llmobs_eval_metric_writer.start()
mock_writer_logs.reset_mock()

llmobs_eval_metric_writer.enqueue(_score_metric_event())
llmobs_eval_metric_writer.enqueue(_score_metric_event(label="sentiment", value=0.9))
time.sleep(0.1)
mock_writer_logs.debug.assert_called_with("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")
mock_writer_logs.reset_mock()
llmobs_eval_metric_writer.enqueue(_categorical_metric_event())
llmobs_eval_metric_writer.enqueue(_categorical_metric_event(label="toxicity", value="very"))
time.sleep(0.1)
mock_writer_logs.debug.assert_called_with("encoded %d LLMObs %s events to be sent", 1, "evaluation_metric")
llmobs_eval_metric_writer.stop()
Expand All @@ -77,7 +77,7 @@ def test_send_timed_events(mock_send_payload, mock_writer_logs):
def test_send_multiple_events(mock_send_payload, mock_writer_logs):
llmobs_eval_metric_writer = LLMObsEvalMetricWriter(1, 1, is_agentless=False)
mock_writer_logs.reset_mock()
llmobs_eval_metric_writer.enqueue(_score_metric_event())
llmobs_eval_metric_writer.enqueue(_categorical_metric_event())
llmobs_eval_metric_writer.enqueue(_score_metric_event(label="sentiment", value=0.9))
llmobs_eval_metric_writer.enqueue(_categorical_metric_event(label="toxicity", value="very"))
llmobs_eval_metric_writer.periodic()
mock_writer_logs.debug.assert_called_with("encoded %d LLMObs %s events to be sent", 2, "evaluation_metric")
Loading
Loading