From 6288dbc36a03b42543262de2a5dc26634abac008 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang <38847871+zhengfeiwang@users.noreply.github.com> Date: Fri, 10 May 2024 16:13:27 +0800 Subject: [PATCH] [devkit][trace] Fix "Failed to load trace ... is not a valid JSON" when traces have invalid JSON values (#3190) # Description `-Infinity`, `Infinity` and `NaN` are serializable in Python, but not in standard JSON; so trace UI cannot parse such JSON string. This PR applies a deserialize and serialize to parse them as normal string to make them standard JSON values. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --- src/promptflow-devkit/CHANGELOG.md | 1 + .../promptflow/_sdk/entities/_trace.py | 15 +++++++++++++-- .../tests/sdk_cli_test/e2etests/test_trace.py | 13 +++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/promptflow-devkit/CHANGELOG.md b/src/promptflow-devkit/CHANGELOG.md index 0a609856ed7..83acb1d5819 100644 --- a/src/promptflow-devkit/CHANGELOG.md +++ b/src/promptflow-devkit/CHANGELOG.md @@ -9,6 +9,7 @@ ### Bugs Fixed - Fix the issue that import error will be raised after downgrading promptflow from >=1.10.0 to <1.8.0. - Fix the issue that `pf flow serve` is broken with exception `NotADirectoryError`. +- Fix "Failed to load trace ... is not valid JSON" when traces inputs/outputs have invalid JSON values like `-Infinity`, `Infinity` and `NaN`. ## v1.10.0 (2024.04.26) diff --git a/src/promptflow-devkit/promptflow/_sdk/entities/_trace.py b/src/promptflow-devkit/promptflow/_sdk/entities/_trace.py index 5e7072acef7..ca01c220a43 100644 --- a/src/promptflow-devkit/promptflow/_sdk/entities/_trace.py +++ b/src/promptflow-devkit/promptflow/_sdk/entities/_trace.py @@ -29,6 +29,7 @@ from promptflow._sdk._orm.trace import Event as ORMEvent from promptflow._sdk._orm.trace import LineRun as ORMLineRun from promptflow._sdk._orm.trace import Span as ORMSpan +from promptflow._sdk._utilities.general_utils import json_loads_parse_const_as_str class Event: @@ -364,12 +365,22 @@ def _get_outputs_from_span(span: Span) -> typing.Optional[typing.Dict]: @staticmethod def _from_orm_object(obj: ORMLineRun) -> "LineRun": + # handle potential nan, inf and -inf in inputs and outputs + # they are serializable in Python, but not in JSON + # so it will result in trace UI parse error + # here convert them into string type to make them standard JSON value + inputs, outputs = copy.deepcopy(obj.inputs), copy.deepcopy(obj.outputs) + if isinstance(inputs, dict): + inputs = json_loads_parse_const_as_str(json.dumps(inputs)) + if isinstance(outputs, dict): + outputs = json_loads_parse_const_as_str(json.dumps(outputs)) + return LineRun( line_run_id=obj.line_run_id, trace_id=obj.trace_id, root_span_id=obj.root_span_id, - inputs=copy.deepcopy(obj.inputs), - outputs=copy.deepcopy(obj.outputs), + inputs=inputs, + outputs=outputs, start_time=obj.start_time, end_time=obj.end_time, status=obj.status, diff --git a/src/promptflow-devkit/tests/sdk_cli_test/e2etests/test_trace.py b/src/promptflow-devkit/tests/sdk_cli_test/e2etests/test_trace.py index f8d2a633ad3..71f2da6c022 100644 --- a/src/promptflow-devkit/tests/sdk_cli_test/e2etests/test_trace.py +++ b/src/promptflow-devkit/tests/sdk_cli_test/e2etests/test_trace.py @@ -331,6 +331,19 @@ def test_span_non_json_io_in_attrs_persist(self, pf: PFClient) -> None: assert isinstance(line_run.inputs, str) and line_run.inputs == str(inputs) assert isinstance(line_run.outputs, str) and line_run.outputs == str(output) + def test_span_with_nan_as_io(self, pf: PFClient) -> None: + trace_id, span_id, line_run_id = str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4()) + span = mock_span(trace_id=trace_id, span_id=span_id, parent_id=None, line_run_id=line_run_id) + span.events[0]["attributes"]["payload"] = json.dumps(dict(input1=float("nan"), input2=float("inf"))) + span.events[1]["attributes"]["payload"] = json.dumps(dict(output1=float("nan"), output2=float("-inf"))) + span._persist() + line_run = pf.traces.get_line_run(line_run_id=line_run_id) + line_run_inputs, line_run_outputs = line_run.inputs, line_run.outputs + assert isinstance(line_run_inputs["input1"], str) and line_run_inputs["input1"] == "NaN" + assert isinstance(line_run_inputs["input2"], str) and line_run_inputs["input2"] == "Infinity" + assert isinstance(line_run_outputs["output1"], str) and line_run_outputs["output1"] == "NaN" + assert isinstance(line_run_outputs["output2"], str) and line_run_outputs["output2"] == "-Infinity" + def test_delete_traces_three_tables(self, pf: PFClient) -> None: # trace operation does not expose API for events and spans # so directly use ORM class to list and assert events and spans existence and deletion