Skip to content

Commit d36f0a6

Browse files
authored
chore(llmobs): fix issue where non updated fields would lose state locally (#14254)
when updating, the previous behavior would lose state of the fields that weren't updated, for example - if only expected_output is updated, then the local state would lose track of input_data and metadata, but remote state still has the old state this PR fixes that and maintains the state of the fields that weren't updated [before example](https://app.datadoghq.com/llm/datasets/421c182d-14e8-4df7-956a-1056633a601f?page=1) <img width="819" height="765" alt="image" src="https://github.com/user-attachments/assets/7db8115b-7eac-45a4-aa33-02d6b93ad719" /> <img width="1335" height="404" alt="image" src="https://github.com/user-attachments/assets/ea533503-422f-4f49-b79f-49166bb3fdc2" /> [after](https://app.datadoghq.com/llm/datasets/e660b3f3-d57e-4eff-a223-2fa5ac6cdcd6?page=1) <img width="748" height="766" alt="image" src="https://github.com/user-attachments/assets/23c3bf39-fc82-4bdb-af99-520c01b8a44f" /> ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 88396be commit d36f0a6

15 files changed

+729
-9
lines changed

ddtrace/llmobs/_experiment.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from typing import cast
1616
from typing import overload
1717

18+
from typing_extensions import NotRequired
19+
1820
import ddtrace
1921
from ddtrace import config
2022
from ddtrace.constants import ERROR_MSG
@@ -46,6 +48,13 @@ class DatasetRecordRaw(TypedDict):
4648
metadata: Dict[str, Any]
4749

4850

51+
class UpdatableDatasetRecord(TypedDict):
52+
input_data: NotRequired[DatasetRecordInputType]
53+
expected_output: NotRequired[JSONType]
54+
metadata: NotRequired[Dict[str, Any]]
55+
record_id: str
56+
57+
4958
class DatasetRecord(DatasetRecordRaw):
5059
record_id: str
5160

@@ -87,7 +96,7 @@ class Dataset:
8796
_version: int
8897
_dne_client: "LLMObsExperimentsClient"
8998
_new_records: List[DatasetRecordRaw]
90-
_updated_record_ids: List[str]
99+
_updated_record_ids_to_new_fields: Dict[str, UpdatableDatasetRecord]
91100
_deleted_record_ids: List[str]
92101

93102
def __init__(
@@ -106,7 +115,7 @@ def __init__(
106115
self._dne_client = _dne_client
107116
self._records = records
108117
self._new_records = []
109-
self._updated_record_ids = []
118+
self._updated_record_ids_to_new_fields = {}
110119
self._deleted_record_ids = []
111120

112121
def push(self) -> None:
@@ -125,7 +134,7 @@ def push(self) -> None:
125134
)
126135
)
127136

128-
updated_records = [r for r in self._records if "record_id" in r and r["record_id"] in self._updated_record_ids]
137+
updated_records = list(self._updated_record_ids_to_new_fields.values())
129138
new_version, new_record_ids = self._dne_client.dataset_batch_update(
130139
self._id, self._new_records, updated_records, self._deleted_record_ids
131140
)
@@ -138,7 +147,7 @@ def push(self) -> None:
138147
self._version = new_version if new_version != -1 else self._version + 1
139148
self._new_records = []
140149
self._deleted_record_ids = []
141-
self._updated_record_ids = []
150+
self._updated_record_ids_to_new_fields = {}
142151

143152
def update(self, index: int, record: DatasetRecordRaw) -> None:
144153
if all(k not in record for k in ("input_data", "expected_output", "metadata")):
@@ -147,8 +156,12 @@ def update(self, index: int, record: DatasetRecordRaw) -> None:
147156
"input_data, expected_output, or metadata to update"
148157
)
149158
record_id = self._records[index]["record_id"]
150-
self._updated_record_ids.append(record_id)
151-
self._records[index] = {**record, "record_id": record_id}
159+
self._updated_record_ids_to_new_fields[record_id] = {
160+
**self._updated_record_ids_to_new_fields.get(record_id, {"record_id": record_id}),
161+
**record,
162+
"record_id": record_id,
163+
}
164+
self._records[index] = {**self._records[index], **record, "record_id": record_id}
152165

153166
def append(self, record: DatasetRecordRaw) -> None:
154167
r: DatasetRecord = {**record, "record_id": ""}
@@ -161,6 +174,9 @@ def delete(self, index: int) -> None:
161174
self._deleted_record_ids.append(record_id)
162175
del self._records[index]
163176

177+
if record_id in self._updated_record_ids_to_new_fields:
178+
del self._updated_record_ids_to_new_fields[record_id]
179+
164180
@property
165181
def url(self) -> str:
166182
# FIXME: will not work for subdomain orgs

ddtrace/llmobs/_writer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from ddtrace.llmobs._experiment import DatasetRecord
4646
from ddtrace.llmobs._experiment import DatasetRecordRaw
4747
from ddtrace.llmobs._experiment import JSONType
48+
from ddtrace.llmobs._experiment import UpdatableDatasetRecord
4849
from ddtrace.llmobs._utils import safe_json
4950
from ddtrace.settings._agent import config as agent_config
5051

@@ -355,7 +356,7 @@ def dataset_create(self, name: str, description: str) -> Dataset:
355356
return Dataset(name, dataset_id, [], description, curr_version, _dne_client=self)
356357

357358
@staticmethod
358-
def _get_record_json(record: Union[DatasetRecord, DatasetRecordRaw], is_update: bool) -> JSONType:
359+
def _get_record_json(record: Union[UpdatableDatasetRecord, DatasetRecordRaw], is_update: bool) -> JSONType:
359360
# for now, if a user wants to "erase" the value of expected_output, they are expected to
360361
# set expected_output to None, and we serialize that as empty string to indicate this to BE
361362
expected_output: JSONType = None
@@ -383,7 +384,7 @@ def dataset_batch_update(
383384
self,
384385
dataset_id: str,
385386
insert_records: List[DatasetRecordRaw],
386-
update_records: List[DatasetRecord],
387+
update_records: List[UpdatableDatasetRecord],
387388
delete_record_ids: List[str],
388389
) -> Tuple[int, List[str]]:
389390
irs: JSONType = [self._get_record_json(r, False) for r in insert_records]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "0a3ce469-bbb5-4d8e-8f9d-86024dba131f",
4+
"attributes": {"insert_records": [], "update_records": [{"input": {"prompt":
5+
"What is the capital of Germany?"}, "expected_output": {"answer": "Berlin"},
6+
"metadata": null, "id": "61e3dfe8-ab26-4952-84a0-ef6717a6508c"}, {"input": {"prompt":
7+
"What is the capital of Mexico?"}, "expected_output": "", "metadata": {"difficulty":
8+
"easy"}, "id": "fe4a6696-deab-4410-8667-dc6b32d1c8c2"}], "delete_records": []}}}'
9+
headers:
10+
Accept:
11+
- '*/*'
12+
? !!python/object/apply:multidict._multidict.istr
13+
- Accept-Encoding
14+
: - identity
15+
Connection:
16+
- keep-alive
17+
Content-Length:
18+
- '481'
19+
? !!python/object/apply:multidict._multidict.istr
20+
- Content-Type
21+
: - application/json
22+
User-Agent:
23+
- python-requests/2.32.3
24+
method: POST
25+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0a3ce469-bbb5-4d8e-8f9d-86024dba131f/batch_update
26+
response:
27+
body:
28+
string: '{"data":[{"id":"3bace74a-68a9-4a5a-ad70-7af6aba25b57","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:14:12.868054731Z","dataset_id":"0a3ce469-bbb5-4d8e-8f9d-86024dba131f","expected_output":{"answer":"Berlin"},"input":{"prompt":"What
29+
is the capital of Germany?"},"updated_at":"2025-08-08T05:14:12.868054805Z","version":2}},{"id":"0f4490c1-6a55-4688-b695-e46d9e172671","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:14:12.868072454Z","dataset_id":"0a3ce469-bbb5-4d8e-8f9d-86024dba131f","expected_output":"","input":{"prompt":"What
30+
is the capital of Mexico?"},"metadata":{"difficulty":"easy"},"updated_at":"2025-08-08T05:14:12.868072528Z","version":2}}]}'
31+
headers:
32+
content-length:
33+
- '787'
34+
content-security-policy:
35+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
36+
content-type:
37+
- application/vnd.api+json
38+
date:
39+
- Fri, 08 Aug 2025 05:14:12 GMT
40+
strict-transport-security:
41+
- max-age=31536000; includeSubDomains; preload
42+
vary:
43+
- Accept-Encoding
44+
x-content-type-options:
45+
- nosniff
46+
x-frame-options:
47+
- SAMEORIGIN
48+
status:
49+
code: 200
50+
message: OK
51+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "0a3ce469-bbb5-4d8e-8f9d-86024dba131f",
4+
"attributes": {"insert_records": [{"input": {"prompt": "What is the capital
5+
of France?"}, "expected_output": {"answer": "Paris"}, "metadata": null}, {"input":
6+
{"prompt": "What is the capital of China?"}, "expected_output": {"answer": "Beijing"},
7+
"metadata": null}], "update_records": [], "delete_records": []}}}'
8+
headers:
9+
Accept:
10+
- '*/*'
11+
? !!python/object/apply:multidict._multidict.istr
12+
- Accept-Encoding
13+
: - identity
14+
Connection:
15+
- keep-alive
16+
Content-Length:
17+
- '387'
18+
? !!python/object/apply:multidict._multidict.istr
19+
- Content-Type
20+
: - application/json
21+
User-Agent:
22+
- python-requests/2.32.3
23+
method: POST
24+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0a3ce469-bbb5-4d8e-8f9d-86024dba131f/batch_update
25+
response:
26+
body:
27+
string: '{"data":[{"id":"61e3dfe8-ab26-4952-84a0-ef6717a6508c","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:14:09.951986612Z","dataset_id":"0a3ce469-bbb5-4d8e-8f9d-86024dba131f","expected_output":{"answer":"Paris"},"input":{"prompt":"What
28+
is the capital of France?"},"updated_at":"2025-08-08T05:14:09.951986612Z","version":1}},{"id":"fe4a6696-deab-4410-8667-dc6b32d1c8c2","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:14:09.951986612Z","dataset_id":"0a3ce469-bbb5-4d8e-8f9d-86024dba131f","expected_output":{"answer":"Beijing"},"input":{"prompt":"What
29+
is the capital of China?"},"updated_at":"2025-08-08T05:14:09.951986612Z","version":1}}]}'
30+
headers:
31+
content-length:
32+
- '769'
33+
content-security-policy:
34+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
35+
content-type:
36+
- application/vnd.api+json
37+
date:
38+
- Fri, 08 Aug 2025 05:14:10 GMT
39+
strict-transport-security:
40+
- max-age=31536000; includeSubDomains; preload
41+
vary:
42+
- Accept-Encoding
43+
x-content-type-options:
44+
- nosniff
45+
x-frame-options:
46+
- SAMEORIGIN
47+
status:
48+
code: 200
49+
message: OK
50+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
? !!python/object/apply:multidict._multidict.istr
8+
- Accept-Encoding
9+
: - identity
10+
Connection:
11+
- keep-alive
12+
? !!python/object/apply:multidict._multidict.istr
13+
- Content-Length
14+
: - '0'
15+
? !!python/object/apply:multidict._multidict.istr
16+
- Content-Type
17+
: - application/json
18+
User-Agent:
19+
- python-requests/2.32.3
20+
method: GET
21+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0a3ce469-bbb5-4d8e-8f9d-86024dba131f/records
22+
response:
23+
body:
24+
string: '{"data":[{"id":"3bace74a-68a9-4a5a-ad70-7af6aba25b57","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:14:12.868054Z","dataset_id":"0a3ce469-bbb5-4d8e-8f9d-86024dba131f","expected_output":{"answer":"Berlin"},"input":{"prompt":"What
25+
is the capital of Germany?"},"updated_at":"2025-08-08T05:14:12.868054Z"}},{"id":"0f4490c1-6a55-4688-b695-e46d9e172671","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:14:12.868072Z","dataset_id":"0a3ce469-bbb5-4d8e-8f9d-86024dba131f","expected_output":"","input":{"prompt":"What
26+
is the capital of Mexico?"},"metadata":{"difficulty":"easy"},"updated_at":"2025-08-08T05:14:12.868072Z"}}],"meta":{"after":""}}'
27+
headers:
28+
content-length:
29+
- '771'
30+
content-security-policy:
31+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
32+
content-type:
33+
- application/vnd.api+json
34+
date:
35+
- Fri, 08 Aug 2025 05:14:17 GMT
36+
strict-transport-security:
37+
- max-age=31536000; includeSubDomains; preload
38+
vary:
39+
- Accept-Encoding
40+
x-content-type-options:
41+
- nosniff
42+
x-frame-options:
43+
- SAMEORIGIN
44+
status:
45+
code: 200
46+
message: OK
47+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "1ebc4710-c5a1-4487-b56f-25ecefcc1bf1",
4+
"attributes": {"insert_records": [], "update_records": [], "delete_records":
5+
["f681d2a5-01aa-4369-b760-5eed65ae632d"]}}}'
6+
headers:
7+
Accept:
8+
- '*/*'
9+
? !!python/object/apply:multidict._multidict.istr
10+
- Accept-Encoding
11+
: - identity
12+
Connection:
13+
- keep-alive
14+
Content-Length:
15+
- '196'
16+
? !!python/object/apply:multidict._multidict.istr
17+
- Content-Type
18+
: - application/json
19+
User-Agent:
20+
- python-requests/2.32.3
21+
method: POST
22+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/1ebc4710-c5a1-4487-b56f-25ecefcc1bf1/batch_update
23+
response:
24+
body:
25+
string: '{"data":[]}'
26+
headers:
27+
content-length:
28+
- '11'
29+
content-security-policy:
30+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
31+
content-type:
32+
- application/vnd.api+json
33+
date:
34+
- Fri, 08 Aug 2025 05:56:40 GMT
35+
strict-transport-security:
36+
- max-age=31536000; includeSubDomains; preload
37+
vary:
38+
- Accept-Encoding
39+
x-content-type-options:
40+
- nosniff
41+
x-frame-options:
42+
- SAMEORIGIN
43+
status:
44+
code: 200
45+
message: OK
46+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
interactions:
2+
- request:
3+
body: '{"data": {"type": "datasets", "id": "1ebc4710-c5a1-4487-b56f-25ecefcc1bf1",
4+
"attributes": {"insert_records": [{"input": {"prompt": "What is the capital
5+
of France?"}, "expected_output": {"answer": "Paris"}, "metadata": null}, {"input":
6+
{"prompt": "What is the capital of Italy?"}, "expected_output": {"answer": "Rome"},
7+
"metadata": null}], "update_records": [], "delete_records": []}}}'
8+
headers:
9+
Accept:
10+
- '*/*'
11+
? !!python/object/apply:multidict._multidict.istr
12+
- Accept-Encoding
13+
: - identity
14+
Connection:
15+
- keep-alive
16+
Content-Length:
17+
- '384'
18+
? !!python/object/apply:multidict._multidict.istr
19+
- Content-Type
20+
: - application/json
21+
User-Agent:
22+
- python-requests/2.32.3
23+
method: POST
24+
uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/1ebc4710-c5a1-4487-b56f-25ecefcc1bf1/batch_update
25+
response:
26+
body:
27+
string: '{"data":[{"id":"f681d2a5-01aa-4369-b760-5eed65ae632d","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:56:35.363359262Z","dataset_id":"1ebc4710-c5a1-4487-b56f-25ecefcc1bf1","expected_output":{"answer":"Paris"},"input":{"prompt":"What
28+
is the capital of France?"},"updated_at":"2025-08-08T05:56:35.363359262Z","version":1}},{"id":"f28cabd6-e1c6-4b00-9888-4dea090b13d3","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-08-08T05:56:35.363359262Z","dataset_id":"1ebc4710-c5a1-4487-b56f-25ecefcc1bf1","expected_output":{"answer":"Rome"},"input":{"prompt":"What
29+
is the capital of Italy?"},"updated_at":"2025-08-08T05:56:35.363359262Z","version":1}}]}'
30+
headers:
31+
content-length:
32+
- '766'
33+
content-security-policy:
34+
- frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
35+
content-type:
36+
- application/vnd.api+json
37+
date:
38+
- Fri, 08 Aug 2025 05:56:35 GMT
39+
strict-transport-security:
40+
- max-age=31536000; includeSubDomains; preload
41+
vary:
42+
- Accept-Encoding
43+
x-content-type-options:
44+
- nosniff
45+
x-frame-options:
46+
- SAMEORIGIN
47+
status:
48+
code: 200
49+
message: OK
50+
version: 1

0 commit comments

Comments
 (0)