From 73530207b082e3754db4c0a449d6e03d4cd36002 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Tue, 17 Dec 2024 21:24:30 +0100 Subject: [PATCH 01/19] Added test on basemodel with required params --- tests/models/test_mistral.py | 81 ++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 86532dee..08cf61fc 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -869,10 +869,10 @@ async def test_stream_result_type_primitif_array(allow_model_requests: None): assert result.cost().response_tokens == len(stream) -async def test_stream_result_type_basemodel(allow_model_requests: None): +async def test_stream_result_type_basemodel_with_default_params(allow_model_requests: None): class MyTypedBaseModel(BaseModel): - first: str = '' # Note: Don't forget to set default values - second: str = '' + first: str = '' # Note: Default, set value. + second: str = '' # Note: Default, set value. # Given stream = [ @@ -958,6 +958,81 @@ class MyTypedBaseModel(BaseModel): assert result.cost().response_tokens == len(stream) +async def test_stream_result_type_basemodel_with_required_params(allow_model_requests: None): + class MyTypedBaseModel(BaseModel): + first: str # Note: Required params + second: str # Note: Required params + + # Given + stream = [ + text_chunk('{'), + text_chunk('"'), + text_chunk('f'), + text_chunk('i'), + text_chunk('r'), + text_chunk('s'), + text_chunk('t'), + text_chunk('"'), + text_chunk(':'), + text_chunk(' '), + text_chunk('"'), + text_chunk('O'), + text_chunk('n'), + text_chunk('e'), + text_chunk('"'), + text_chunk(','), + text_chunk(' '), + text_chunk('"'), + text_chunk('s'), + text_chunk('e'), + text_chunk('c'), + text_chunk('o'), + text_chunk('n'), + text_chunk('d'), + text_chunk('"'), + text_chunk(':'), + text_chunk(' '), + text_chunk('"'), + text_chunk('T'), + text_chunk('w'), + text_chunk('o'), + text_chunk('"'), + text_chunk('}'), + chunk([]), + ] + + mock_client = MockMistralAI.create_stream_mock(stream) + model = MistralModel('mistral-large-latest', client=mock_client) + agent = Agent(model=model, result_type=MyTypedBaseModel) + + # When + async with agent.run_stream('User prompt value') as result: + # Then + assert result.is_structured + assert not result.is_complete + v = [c async for c in result.stream(debounce_by=None)] + assert v == snapshot( + [ + MyTypedBaseModel(first='One', second=''), + MyTypedBaseModel(first='One', second=''), + MyTypedBaseModel(first='One', second=''), + MyTypedBaseModel(first='One', second='T'), + MyTypedBaseModel(first='One', second='Tw'), + MyTypedBaseModel(first='One', second='Two'), + MyTypedBaseModel(first='One', second='Two'), + MyTypedBaseModel(first='One', second='Two'), + MyTypedBaseModel(first='One', second='Two'), + ] + ) + assert result.is_complete + assert result.cost().request_tokens == 34 + assert result.cost().response_tokens == 34 + assert result.cost().total_tokens == 34 + + # double check cost matches stream count + assert result.cost().response_tokens == len(stream) + + ##################### ## Completion Function call ##################### From f6a14e252e3ebde99027176361f9166fc379f48f Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Tue, 17 Dec 2024 21:26:18 +0100 Subject: [PATCH 02/19] Updated comments --- pydantic_ai_slim/pydantic_ai/models/mistral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 639b9375..d271f0e2 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -554,7 +554,7 @@ def get(self, *, final: bool = False) -> ModelResponse: if output_json: for result_tool in self._result_tools.values(): - # NOTE: Additional verification to prevent JSON validation to crash in `result.py` + # NOTE: Additional verification to prevent JSON validation to crash in `_result.py` # Ensures required parameters in the JSON schema are respected, especially for stream-based return types. # For example, `return_type=list[str]` expects a 'response' key with value type array of str. # when `{"response":` then `repair_json` sets `{"response": ""}` (type not found default str) From a95b2d2ee328e16bcda80027a05d650ee40db7de Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Tue, 17 Dec 2024 22:25:01 +0100 Subject: [PATCH 03/19] Removed json repair and simplify --- pydantic_ai_slim/pyproject.toml | 2 +- uv.lock | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml index 50550089..10a4eeb9 100644 --- a/pydantic_ai_slim/pyproject.toml +++ b/pydantic_ai_slim/pyproject.toml @@ -46,7 +46,7 @@ openai = ["openai>=1.54.3"] vertexai = ["google-auth>=2.36.0", "requests>=2.32.3"] anthropic = ["anthropic>=0.40.0"] groq = ["groq>=0.12.0"] -mistral = ["mistralai>=1.2.5", "json-repair>=0.30.3"] +mistral = ["mistralai>=1.2.5"] logfire = ["logfire>=2.3"] [dependency-groups] diff --git a/uv.lock b/uv.lock index 1c8e956c..530d8de8 100644 --- a/uv.lock +++ b/uv.lock @@ -820,15 +820,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/75/fc5a34b0376437eaac80c22886840d8f39ee7f0992c2e3bd4c246b91cab3/jiter-0.7.1-cp39-none-win_amd64.whl", hash = "sha256:6592f4067c74176e5f369228fb2995ed01400c9e8e1225fb73417183a5e635f0", size = 202098 }, ] -[[package]] -name = "json-repair" -version = "0.30.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2f/7a/7745d0d908563a478421c7520649dfd6a5c551858e2233ff7caf20cb8df7/json_repair-0.30.3.tar.gz", hash = "sha256:0ac56e7ae9253ee9c507a7e1a3a26799c9b0bbe5e2bec1b2cc5053e90d5b05e3", size = 27803 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fd/2d/79a46330c4b97ee90dd403fb0d267da7b25b24d7db604c5294e5c57d5f7c/json_repair-0.30.3-py3-none-any.whl", hash = "sha256:63bb588162b0958ae93d85356ecbe54c06b8c33f8a4834f93fa2719ea669804e", size = 18951 }, -] - [[package]] name = "jsonpath-python" version = "1.0.6" @@ -1677,7 +1668,6 @@ logfire = [ { name = "logfire" }, ] mistral = [ - { name = "json-repair" }, { name = "mistralai" }, ] openai = [ @@ -1709,7 +1699,6 @@ requires-dist = [ { name = "griffe", specifier = ">=1.3.2" }, { name = "groq", marker = "extra == 'groq'", specifier = ">=0.12.0" }, { name = "httpx", specifier = ">=0.27.2" }, - { name = "json-repair", marker = "extra == 'mistral'", specifier = ">=0.30.3" }, { name = "logfire", marker = "extra == 'logfire'", specifier = ">=2.3" }, { name = "logfire-api", specifier = ">=1.2.0" }, { name = "mistralai", marker = "extra == 'mistral'", specifier = ">=1.2.5" }, From b5208bd52c9957e0a12eefb7d5bb585c82c0cb51 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 09:50:31 +0100 Subject: [PATCH 04/19] Improved the json load --- .../pydantic_ai/models/mistral.py | 74 +++++++++++++++++-- tests/models/test_mistral.py | 25 +++---- 2 files changed, 80 insertions(+), 19 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index d271f0e2..c025195b 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -1,5 +1,6 @@ from __future__ import annotations as _annotations +import json import os from collections.abc import AsyncIterator, Iterable from contextlib import asynccontextmanager @@ -39,7 +40,6 @@ ) try: - from json_repair import repair_json from mistralai import ( UNSET, CompletionChunk as MistralCompletionChunk, @@ -547,12 +547,12 @@ def get(self, *, final: bool = False) -> ModelResponse: elif self._delta_content and self._result_tools: # NOTE: Params set for the most efficient and fastest way. - output_json = repair_json(self._delta_content, return_objects=True, skip_json_loads=True) - assert isinstance( - output_json, dict - ), f'Expected repair_json as type dict, invalid type: {type(output_json)}' + output_json = _repair_json(self._delta_content) + # assert isinstance( + # output_json, (dict, type(None)) + # ), f'Expected repair_json as type dict, invalid type: {type(output_json)}' - if output_json: + if isinstance(output_json, dict) and output_json: for result_tool in self._result_tools.values(): # NOTE: Additional verification to prevent JSON validation to crash in `_result.py` # Ensures required parameters in the JSON schema are respected, especially for stream-based return types. @@ -678,3 +678,65 @@ def _map_content(content: MistralOptionalNullable[MistralContent]) -> str | None result = None return result + + +def _repair_json(s: str) -> dict[str, Any] | list[Any] | None: + """Attempt to parse a given string as JSON, repairing common issues.""" + # Attempt to parse the string as-is. + try: + return json.loads(s, strict=False) + except json.JSONDecodeError: + pass + + new_chars: list[str] = [] + stack: list[Any] = [] + is_inside_string = False + escaped = False + + # Process each character in the string. + for char in s: + if is_inside_string: + if char == '"' and not escaped: + is_inside_string = False + elif char == '\n' and not escaped: + char = '\\n' # Replace newline with escape sequence. + elif char == '\\': + escaped = not escaped + else: + escaped = False + else: + if char == '"': + is_inside_string = True + escaped = False + elif char == '{': + stack.append('}') + elif char == '[': + stack.append(']') + elif char == '}' or char == ']': + if stack and stack[-1] == char: + stack.pop() + else: + # Mismatched closing character; the input is malformed. + return None + + # Append the processed character to the new string. + new_chars.append(char) + + # If we're still inside a string at the end of processing, close the string. + if is_inside_string: + new_chars.append('"') + + # Reverse the stack to get the closing characters. + stack.reverse() + + # Try to parse the modified string until we succeed or run out of characters. + while new_chars: + try: + value = ''.join(new_chars + stack) + return json.loads(value, strict=False) + except json.JSONDecodeError: + # If parsing fails, try removing the last character. + new_chars.pop() + + # If we still can't parse the string as JSON, return None. + return None diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 08cf61fc..ac46faae 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -544,7 +544,7 @@ async def test_request_result_type_with_arguments_str_response(allow_model_reque ##################### -async def test_stream_structured_with_all_typd(allow_model_requests: None): +async def test_stream_structured_with_all_type(allow_model_requests: None): class MyTypedDict(TypedDict, total=False): first: str second: int @@ -563,19 +563,19 @@ class MyTypedDict(TypedDict, total=False): '", "second": 2', ), text_chunk( - '", "bool_value": true', + ', "bool_value": true', ), text_chunk( - '", "nullable_value": null', + ', "nullable_value": null', ), text_chunk( - '", "array_value": ["A", "B", "C"]', + ', "array_value": ["A", "B", "C"]', ), text_chunk( - '", "dict_value": {"A": "A", "B":"B"}', + ', "dict_value": {"A": "A", "B":"B"}', ), text_chunk( - '", "dict_int_value": {"A": 1, "B":2}', + ', "dict_int_value": {"A": 1, "B":2}', ), text_chunk('}'), chunk([]), @@ -721,8 +721,8 @@ class MyTypedDict(TypedDict, total=False): {'first': 'One'}, {'first': 'One'}, {'first': 'One'}, - {'first': 'One', 'second': ''}, - {'first': 'One', 'second': ''}, + {'first': 'One'}, + {'first': 'One'}, {'first': 'One', 'second': ''}, {'first': 'One', 'second': 'T'}, {'first': 'One', 'second': 'Tw'}, @@ -828,6 +828,7 @@ async def test_stream_result_type_primitif_array(allow_model_requests: None): v = [c async for c in result.stream(debounce_by=None)] assert v == snapshot( [ + [''], ['f'], ['fi'], ['fir'], @@ -835,13 +836,13 @@ async def test_stream_result_type_primitif_array(allow_model_requests: None): ['first'], ['first'], ['first'], - ['first'], + ['first', ''], ['first', 'O'], ['first', 'On'], ['first', 'One'], ['first', 'One'], ['first', 'One'], - ['first', 'One'], + ['first', 'One', ''], ['first', 'One', 's'], ['first', 'One', 'se'], ['first', 'One', 'sec'], @@ -850,7 +851,7 @@ async def test_stream_result_type_primitif_array(allow_model_requests: None): ['first', 'One', 'second'], ['first', 'One', 'second'], ['first', 'One', 'second'], - ['first', 'One', 'second'], + ['first', 'One', 'second', ''], ['first', 'One', 'second', 'T'], ['first', 'One', 'second', 'Tw'], ['first', 'One', 'second', 'Two'], @@ -1013,8 +1014,6 @@ class MyTypedBaseModel(BaseModel): v = [c async for c in result.stream(debounce_by=None)] assert v == snapshot( [ - MyTypedBaseModel(first='One', second=''), - MyTypedBaseModel(first='One', second=''), MyTypedBaseModel(first='One', second=''), MyTypedBaseModel(first='One', second='T'), MyTypedBaseModel(first='One', second='Tw'), From 2b1bd0bd5cae61136bb14e5ac8a6d548d779c5dd Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 17:53:58 +0100 Subject: [PATCH 05/19] Added JSON repairer --- .../pydantic_ai/models/mistral.py | 156 +++++++++++------- 1 file changed, 93 insertions(+), 63 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index c025195b..10270b15 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -330,6 +330,7 @@ async def _process_streamed_response( content, timestamp, start_cost, + JSONRepairer(), ) elif content: @@ -509,6 +510,96 @@ def timestamp(self) -> datetime: return self._timestamp +class JSONRepairer: + """Initialize the JSONRepairer with an empty buffer and state to maintain across chunks.""" + + def __init__(self) -> None: + self.new_chars: list[str] = [] + self.stack: list[Any] = [] + self.is_inside_string = False + self.escaped = False + + def process_chunk(self, chunk: str) -> dict[str, Any] | list[Any] | None: + """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues. + + Args: + chunk (str): The next chunk of the JSON string. + + Returns: + Union[dict[str, Any], list[Any], None]: + Parsed JSON object if successful, None if parsing fails. + """ + try: + # Try parsing the current buffer + result = json.loads(chunk, strict=False) + return result + except json.JSONDecodeError: + pass # Continue to attempt repairing + + # Attempt to repair the JSON incrementally + return self._repair_json(chunk) + + def _repair_json(self, chunk: str) -> dict[str, Any] | list[Any] | None: + """Attempt to repair and parse the accumulated buffer as JSON, handling common issues. + + Returns: + Union[dict[str, Any], list[Any], None]: + Parsed JSON object if successful, None if parsing fails. + """ + start_index = len(self.new_chars) + for char in chunk[start_index:]: + if self.is_inside_string: + if char == '"' and not self.escaped: + self.is_inside_string = False + + elif char == '\n' and not self.escaped: + char = '\\n' # Replace newline with escape sequence + elif char == '\\': + self.escaped = not self.escaped + else: + self.escaped = False + else: + if char == '"': + self.is_inside_string = True + self.escaped = False + # self.stack.append('"') + elif char == '{': + self.stack.append('}') + elif char == '[': + self.stack.append(']') + elif char == '}' or char == ']': + if self.stack and self.stack[-1] == char: + self.stack.pop() + else: + # Mismatched closing character; the input is malformed + return None + + # Append the processed character to the new string + self.new_chars.append(char) + + r_stack = self.stack.copy() + # Reverse the stack to get the closing characters + + # If we're still inside a string at the end of processing, close the string + if self.is_inside_string: + r_stack.append('"') + self.is_inside_string = True + + r_stack.reverse() + c_new_chars = self.new_chars.copy() + # Try to parse the modified string until we succeed or run out of characters + while c_new_chars: + try: + value = ''.join(c_new_chars + r_stack) + return json.loads(value, strict=False) + except json.JSONDecodeError: + # If parsing fails, try removing the last character + c_new_chars.pop() + + # If we still can't parse the string as JSON, return None + return None + + @dataclass class MistralStreamStructuredResponse(StreamStructuredResponse): """Implementation of `StreamStructuredResponse` for Mistral models.""" @@ -519,6 +610,7 @@ class MistralStreamStructuredResponse(StreamStructuredResponse): _delta_content: str | None _timestamp: datetime _cost: Cost + _json: JSONRepairer async def __anext__(self) -> None: chunk = await self._response.__anext__() @@ -547,7 +639,7 @@ def get(self, *, final: bool = False) -> ModelResponse: elif self._delta_content and self._result_tools: # NOTE: Params set for the most efficient and fastest way. - output_json = _repair_json(self._delta_content) + output_json = self._json.process_chunk(self._delta_content) # assert isinstance( # output_json, (dict, type(None)) # ), f'Expected repair_json as type dict, invalid type: {type(output_json)}' @@ -678,65 +770,3 @@ def _map_content(content: MistralOptionalNullable[MistralContent]) -> str | None result = None return result - - -def _repair_json(s: str) -> dict[str, Any] | list[Any] | None: - """Attempt to parse a given string as JSON, repairing common issues.""" - # Attempt to parse the string as-is. - try: - return json.loads(s, strict=False) - except json.JSONDecodeError: - pass - - new_chars: list[str] = [] - stack: list[Any] = [] - is_inside_string = False - escaped = False - - # Process each character in the string. - for char in s: - if is_inside_string: - if char == '"' and not escaped: - is_inside_string = False - elif char == '\n' and not escaped: - char = '\\n' # Replace newline with escape sequence. - elif char == '\\': - escaped = not escaped - else: - escaped = False - else: - if char == '"': - is_inside_string = True - escaped = False - elif char == '{': - stack.append('}') - elif char == '[': - stack.append(']') - elif char == '}' or char == ']': - if stack and stack[-1] == char: - stack.pop() - else: - # Mismatched closing character; the input is malformed. - return None - - # Append the processed character to the new string. - new_chars.append(char) - - # If we're still inside a string at the end of processing, close the string. - if is_inside_string: - new_chars.append('"') - - # Reverse the stack to get the closing characters. - stack.reverse() - - # Try to parse the modified string until we succeed or run out of characters. - while new_chars: - try: - value = ''.join(new_chars + stack) - return json.loads(value, strict=False) - except json.JSONDecodeError: - # If parsing fails, try removing the last character. - new_chars.pop() - - # If we still can't parse the string as JSON, return None. - return None From 569a63b881c702e7b1f9a0dc0f4349481eb1d220 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 18:32:11 +0100 Subject: [PATCH 06/19] Added test on JSONRepairer --- .../pydantic_ai/models/mistral.py | 68 ++++++++----------- tests/models/test_mistral.py | 50 ++++++++++++++ 2 files changed, 80 insertions(+), 38 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 672c2302..2d7d1282 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -511,41 +511,35 @@ def timestamp(self) -> datetime: class JSONRepairer: - """Initialize the JSONRepairer with an empty buffer and state to maintain across chunks.""" + """Utility class to repair JSON data that is not valid.""" def __init__(self) -> None: + """Initialize the JSONRepairer with an empty buffer and state to maintain across chunks. + + The properties are: + + - `new_chars`: The characters that have been processed so far. + - `stack`: The stack of characters to be processed. + - `is_inside_string`: A boolean indicating whether we are inside a string or not. + - `escaped`: A boolean indicating whether the last character was an escape character or not. + """ self.new_chars: list[str] = [] self.stack: list[Any] = [] self.is_inside_string = False self.escaped = False - def process_chunk(self, chunk: str) -> dict[str, Any] | list[Any] | None: - """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues. - - Args: - chunk (str): The next chunk of the JSON string. - - Returns: - Union[dict[str, Any], list[Any], None]: - Parsed JSON object if successful, None if parsing fails. - """ + def process_chunk(self, chunk: str) -> dict[str, Any] | None: + """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues.""" try: - # Try parsing the current buffer - result = json.loads(chunk, strict=False) - return result + output_json: dict[str, Any] | None = json.loads(chunk) + return output_json except json.JSONDecodeError: pass # Continue to attempt repairing - # Attempt to repair the JSON incrementally return self._repair_json(chunk) - def _repair_json(self, chunk: str) -> dict[str, Any] | list[Any] | None: - """Attempt to repair and parse the accumulated buffer as JSON, handling common issues. - - Returns: - Union[dict[str, Any], list[Any], None]: - Parsed JSON object if successful, None if parsing fails. - """ + def _repair_json(self, chunk: str) -> dict[str, Any] | None: + """Attempts to repair and parse the accumulated buffer as JSON, handling common issues.""" start_index = len(self.new_chars) for char in chunk[start_index:]: if self.is_inside_string: @@ -562,7 +556,7 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | list[Any] | None: if char == '"': self.is_inside_string = True self.escaped = False - # self.stack.append('"') + elif char == '{': self.stack.append('}') elif char == '[': @@ -577,26 +571,27 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | list[Any] | None: # Append the processed character to the new string self.new_chars.append(char) - r_stack = self.stack.copy() - # Reverse the stack to get the closing characters + closing_chars = self.stack.copy() # 1 - # If we're still inside a string at the end of processing, close the string + # If we're still inside a string, close it if self.is_inside_string: - r_stack.append('"') + closing_chars.append('"') # 2 self.is_inside_string = True - r_stack.reverse() - c_new_chars = self.new_chars.copy() + closing_chars.reverse() # 3 + + repaired_chars = self.new_chars.copy() + # Try to parse the modified string until we succeed or run out of characters - while c_new_chars: + while repaired_chars: try: - value = ''.join(c_new_chars + r_stack) + value = ''.join(repaired_chars + closing_chars) return json.loads(value, strict=False) except json.JSONDecodeError: - # If parsing fails, try removing the last character - c_new_chars.pop() + # Remove the last character and retry + repaired_chars.pop() - # If we still can't parse the string as JSON, return None + # Return None if parsing fails after all attempts return None @@ -639,10 +634,7 @@ def get(self, *, final: bool = False) -> ModelResponse: elif self._delta_content and self._result_tools: # NOTE: Params set for the most efficient and fastest way. - output_json = self._json.process_chunk(self._delta_content) - # assert isinstance( - # output_json, (dict, type(None)) - # ), f'Expected repair_json as type dict, invalid type: {type(output_json)}' + output_json: dict[str, Any] | None = self._json.process_chunk(self._delta_content) if isinstance(output_json, dict) and output_json: for result_tool in self._result_tools.values(): diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 1214f3a7..51da316b 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -26,6 +26,7 @@ ToolReturnPart, UserPromptPart, ) +from pydantic_ai.models.mistral import JSONRepairer from ..conftest import IsNow, try_import @@ -1630,6 +1631,55 @@ async def get_location(loc_name: str) -> str: ) +##################### +## JSONRepairer +##################### +@pytest.fixture +def repairer(): + """Fixture to set up a JSONRepairer instance.""" + return JSONRepairer() + + +def test_valid_json_parsing(repairer: JSONRepairer): + """Test that valid JSON chunks are correctly parsed.""" + valid_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3]}, "boolean": true}' + result = repairer.process_chunk(valid_json) + expected = json.loads(valid_json) + assert result == expected + + +def test_repair_malformed_json(repairer: JSONRepairer): + """Test that the JSONRepairer can repair malformed JSON.""" + + # Case 1: Missing closing brace in the inner key-value structure + malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_' + result = repairer.process_chunk(malformed_json) + expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_"}]}}') + assert result == expected + + # Case 2: Missing closing brace in the outer structure + malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true' # Missing closing brace + result = repairer.process_chunk(malformed_json) + expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') + assert result == expected + + # Case 3: Missing closing brace with additional key + malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":' # Missing closing brace + result = repairer.process_chunk(malformed_json) + expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') + assert result == expected + + # Case 4: Missing closing quote and brace in the "test" key + malformed_json = ( + '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok' + ) + result = repairer.process_chunk(malformed_json) + expected = json.loads( + '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok"}}' + ) + assert result == expected + + ##################### ## Test methods ##################### From 34f97d9291ed186519c61cda06503511b57e1ed1 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 18:40:05 +0100 Subject: [PATCH 07/19] Fixed import --- tests/models/test_mistral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 51da316b..3b6d7518 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -26,7 +26,6 @@ ToolReturnPart, UserPromptPart, ) -from pydantic_ai.models.mistral import JSONRepairer from ..conftest import IsNow, try_import @@ -51,6 +50,7 @@ from mistralai.types.basemodel import Unset as MistralUnset from pydantic_ai.models.mistral import ( + JSONRepairer, MistralAgentModel, MistralModel, MistralStreamStructuredResponse, From 190dfdc6b1e4fc8cc82d5e5aeee0e74c7a775b78 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 19:21:49 +0100 Subject: [PATCH 08/19] Cleaned code --- .../pydantic_ai/models/mistral.py | 13 ++++++----- tests/models/test_mistral.py | 22 ++++++++++++++----- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 2d7d1282..4b6c5ce6 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -527,6 +527,7 @@ def __init__(self) -> None: self.stack: list[Any] = [] self.is_inside_string = False self.escaped = False + self.closing_map = {'"': '"', '{': '}', '[': ']'} def process_chunk(self, chunk: str) -> dict[str, Any] | None: """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues.""" @@ -571,7 +572,7 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | None: # Append the processed character to the new string self.new_chars.append(char) - closing_chars = self.stack.copy() # 1 + closing_chars = self.stack[::] # 1 # If we're still inside a string, close it if self.is_inside_string: @@ -580,16 +581,18 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | None: closing_chars.reverse() # 3 - repaired_chars = self.new_chars.copy() + repaired_chars = self.new_chars[::] # Try to parse the modified string until we succeed or run out of characters while repaired_chars: try: value = ''.join(repaired_chars + closing_chars) - return json.loads(value, strict=False) + return json.loads(value) except json.JSONDecodeError: # Remove the last character and retry - repaired_chars.pop() + value = repaired_chars.pop() + if closing_chars and closing_chars[0] == self.closing_map.get(value): + closing_chars.pop(0) # Return None if parsing fails after all attempts return None @@ -636,7 +639,7 @@ def get(self, *, final: bool = False) -> ModelResponse: # NOTE: Params set for the most efficient and fastest way. output_json: dict[str, Any] | None = self._json.process_chunk(self._delta_content) - if isinstance(output_json, dict) and output_json: + if output_json: for result_tool in self._result_tools.values(): # NOTE: Additional verification to prevent JSON validation to crash in `_result.py` # Ensures required parameters in the JSON schema are respected, especially for stream-based return types. diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 3b6d7518..239ea62a 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -1649,27 +1649,39 @@ def test_valid_json_parsing(repairer: JSONRepairer): def test_repair_malformed_json(repairer: JSONRepairer): - """Test that the JSONRepairer can repair malformed JSON.""" + """Test that the JSONRepairer can repair various malformed JSON structures.""" - # Case 1: Missing closing brace in the inner key-value structure + # Case 0: Missing closing array brace in the inner key-value structure + malformed_json = '{"key": "value", "nested": {"list": [1' + result = repairer.process_chunk(malformed_json) + expected = json.loads('{"key": "value", "nested": {"list": [1]}}') + assert result == expected + + # Case 1: Missing closing and closing string in inter key-value structure + malformed_json = '{"key": "value", "nested": {"list": [1, {"i' + result = repairer.process_chunk(malformed_json) + expected = json.loads('{"key": "value", "nested": {"list": [1, {}]}}') + assert result == expected + + # Case 2: Missing closing brace in the inner key-value structure malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_' result = repairer.process_chunk(malformed_json) expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_"}]}}') assert result == expected - # Case 2: Missing closing brace in the outer structure + # Case 3: Missing closing brace in the outer structure malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true' # Missing closing brace result = repairer.process_chunk(malformed_json) expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') assert result == expected - # Case 3: Missing closing brace with additional key + # Case 4: Missing closing brace with additional key malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":' # Missing closing brace result = repairer.process_chunk(malformed_json) expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') assert result == expected - # Case 4: Missing closing quote and brace in the "test" key + # Case 5: Missing closing quote and brace in the "test" key malformed_json = ( '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok' ) From 2d3346664811ea1dc51787589fab831ed57c8169 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 19:24:19 +0100 Subject: [PATCH 09/19] Cleaned code --- pydantic_ai_slim/pydantic_ai/models/mistral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 4b6c5ce6..e2eb23ac 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -527,7 +527,6 @@ def __init__(self) -> None: self.stack: list[Any] = [] self.is_inside_string = False self.escaped = False - self.closing_map = {'"': '"', '{': '}', '[': ']'} def process_chunk(self, chunk: str) -> dict[str, Any] | None: """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues.""" @@ -591,7 +590,8 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | None: except json.JSONDecodeError: # Remove the last character and retry value = repaired_chars.pop() - if closing_chars and closing_chars[0] == self.closing_map.get(value): + # Check if the last character removed was a opening character + if closing_chars and closing_chars[0] == {'"': '"', '{': '}', '[': ']'}.get(value): closing_chars.pop(0) # Return None if parsing fails after all attempts From 8106b15145664c4584157f9b5e66c2e922bc837b Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 21:53:52 +0100 Subject: [PATCH 10/19] Fixed Strip whitespace, newlines, backtick from the start and end --- .../pydantic_ai/models/mistral.py | 2 + tests/models/test_mistral.py | 153 ++++++++++++++++++ 2 files changed, 155 insertions(+) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index e2eb23ac..26530166 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -530,6 +530,8 @@ def __init__(self) -> None: def process_chunk(self, chunk: str) -> dict[str, Any] | None: """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues.""" + # Strip whitespace, newlines, backtick from the start and end + chunk = chunk.strip(' \n\r\t`') try: output_json: dict[str, Any] | None = json.loads(chunk) return output_json diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 239ea62a..fbc3bd1d 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -1634,6 +1634,8 @@ async def get_location(loc_name: str) -> str: ##################### ## JSONRepairer ##################### + + @pytest.fixture def repairer(): """Fixture to set up a JSONRepairer instance.""" @@ -1642,8 +1644,11 @@ def repairer(): def test_valid_json_parsing(repairer: JSONRepairer): """Test that valid JSON chunks are correctly parsed.""" + # Given valid_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3]}, "boolean": true}' + # When result = repairer.process_chunk(valid_json) + # Then expected = json.loads(valid_json) assert result == expected @@ -1652,46 +1657,194 @@ def test_repair_malformed_json(repairer: JSONRepairer): """Test that the JSONRepairer can repair various malformed JSON structures.""" # Case 0: Missing closing array brace in the inner key-value structure + # Given malformed_json = '{"key": "value", "nested": {"list": [1' + # When result = repairer.process_chunk(malformed_json) + # Then expected = json.loads('{"key": "value", "nested": {"list": [1]}}') assert result == expected # Case 1: Missing closing and closing string in inter key-value structure + # Given malformed_json = '{"key": "value", "nested": {"list": [1, {"i' + # When result = repairer.process_chunk(malformed_json) + # Then expected = json.loads('{"key": "value", "nested": {"list": [1, {}]}}') assert result == expected # Case 2: Missing closing brace in the inner key-value structure + # Given malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_' + # When result = repairer.process_chunk(malformed_json) + # Then expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_"}]}}') assert result == expected # Case 3: Missing closing brace in the outer structure + # Given malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true' # Missing closing brace + # When result = repairer.process_chunk(malformed_json) + # Then expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') assert result == expected # Case 4: Missing closing brace with additional key + # Given malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":' # Missing closing brace + # When result = repairer.process_chunk(malformed_json) + # Then expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') assert result == expected # Case 5: Missing closing quote and brace in the "test" key + # Given malformed_json = ( '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok' ) + # When result = repairer.process_chunk(malformed_json) + # Then expected = json.loads( '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok"}}' ) assert result == expected +def test_repair_malformed_indended_json(repairer: JSONRepairer): + """Test that the JSONRepairer can repair various malformed JSON structures.""" + # Case 0: Missing closing array brace in the inner key-value structure + # Given + malformed_json = """{ + "key": "val\\nue", + "nested": { + "list": [1""" + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads('{"key": "val\\nue", "nested": {"list": [1]}}') + assert result == expected + + # Case 1: Missing closing and closing string in inter key-value structure + # Given + malformed_json = """{ + "key": "val\\nue", + "nested": { + "list": [1, {"inner_key": "inner_""" + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads('{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_"}]}}') + assert result == expected + + # Case 2: Missing closing brace in the inner key-value structure + # Given + malformed_json = """{ + "key": "val\\nue", + "nested": { + "list": [1, {"inner_key": "inner_""" + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads("""{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_"}]}}""") + assert result == expected + + # Case 3: Missing closing brace in the outer structure + # Given + malformed_json = """{ + "key": "val\\nue", + "nested": { + "list": [1, {"inner_key": "inner_value"}, 3], + "boolean": true + + + """ # Added extra space for test strip + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads( + """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}""" + ) + assert result == expected + + # Case 4: Missing closing brace with additional key + # Given + malformed_json = ( + """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":""" + ) + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads( + """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}""" + ) + assert result == expected + + # Case 5: Missing closing quote and brace in the "test" key + # Given + malformed_json = """{ + "key": "val\\nue", + "nested": { + "list": [1, {"inner_key": "inner_value"}, 3], + "boolean": true, + "test":"okay + + """ # Added extra space for test strip + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads( + '{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"okay"}}' + ) + assert result == expected + + # Case 5: Missing closing quote and brace in the "test" key + # Given + malformed_json = """{ + "key": "val\\nue", + "nested": { + "list": [1, {"inner_key": "inner_value"}, 3], + "boolean": true, + "test":"okay" + } + }""" + # When + result = repairer.process_chunk(malformed_json) + # Then + expected = json.loads( + '{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"okay"}}' + ) + assert result == expected + + +def test_repair_malformed_json_with_whitespace(repairer: JSONRepairer): + """Test that the JSONRepairer can handle JSON with newlines, carriage returns, and tabs.""" + + # Malformed JSON with newlines, carriage returns, and tabs + # Given + malformed_json = """\n\r\t{ + "key": "val\\nue", + "nested": { + "list": [1, {"inner_key": "inner_value"}, 3], + "boolean": true + }\n\r\t""" + + # When + result = repairer.process_chunk(malformed_json) + + # Them + expected = json.loads( + """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}""" + ) + + # Assert that the result matches the expected JSON + assert result == expected + + ##################### ## Test methods ##################### From 6badb748438cee50dd3ef75d933ab92242df924c Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:12:43 +0100 Subject: [PATCH 11/19] Cleaned code --- .../pydantic_ai/models/mistral.py | 57 +++++++++++-------- tests/models/test_mistral.py | 12 ++-- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 26530166..65c11011 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -198,11 +198,10 @@ async def _stream_completions_create( """Create a streaming completion request to the Mistral model.""" response: MistralEventStreamAsync[MistralCompletionEvent] | None mistral_messages = list(chain(*(self._map_message(m) for m in messages))) - model_settings = model_settings or {} if self.result_tools and self.function_tools or self.function_tools: - # Function Calling Mode + # Function Calling response = await self.client.chat.stream_async( model=str(self.model_name), messages=mistral_messages, @@ -218,9 +217,9 @@ async def _stream_completions_create( elif self.result_tools: # Json Mode parameters_json_schemas = [tool.parameters_json_schema for tool in self.result_tools] - user_output_format_message = self._generate_user_output_format(parameters_json_schemas) mistral_messages.append(user_output_format_message) + response = await self.client.chat.stream_async( model=str(self.model_name), messages=mistral_messages, @@ -270,12 +269,13 @@ def _map_function_and_result_tools_definition(self) -> list[MistralTool] | None: @staticmethod def _process_response(response: MistralChatCompletionResponse) -> ModelResponse: """Process a non-streamed response, and prepare a message to return.""" + assert response.choices, 'Unexpected empty response choice.' + if response.created: timestamp = datetime.fromtimestamp(response.created, tz=timezone.utc) else: timestamp = _now_utc() - assert response.choices, 'Unexpected empty response choice.' choice = response.choices[0] content = choice.message.content tool_calls = choice.message.tool_calls @@ -330,7 +330,7 @@ async def _process_streamed_response( content, timestamp, start_usage, - JSONRepairer(), + _JSONChunkParser(), ) elif content: @@ -510,8 +510,8 @@ def timestamp(self) -> datetime: return self._timestamp -class JSONRepairer: - """Utility class to repair JSON data that is not valid.""" +class _JSONChunkParser: + """A class to repair JSON chunks that might be corrupted (e.g. missing closing quotes).""" def __init__(self) -> None: """Initialize the JSONRepairer with an empty buffer and state to maintain across chunks. @@ -545,58 +545,71 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | None: start_index = len(self.new_chars) for char in chunk[start_index:]: if self.is_inside_string: + # End of string detected if char == '"' and not self.escaped: self.is_inside_string = False + # Replace newline with escape sequence within a string elif char == '\n' and not self.escaped: - char = '\\n' # Replace newline with escape sequence + char = '\\n' + + # Toggle escaped status on encountering backslash elif char == '\\': self.escaped = not self.escaped + + # Reset escaped status for other characters else: self.escaped = False else: + # Start of string detected if char == '"': self.is_inside_string = True self.escaped = False + # Track expected closing brace elif char == '{': self.stack.append('}') + + # Track expected closing bracket elif char == '[': self.stack.append(']') + + # Handle closing characters and check for mismatches elif char == '}' or char == ']': if self.stack and self.stack[-1] == char: self.stack.pop() else: - # Mismatched closing character; the input is malformed + # Mismatched closing character means malformed input return None - # Append the processed character to the new string self.new_chars.append(char) - closing_chars = self.stack[::] # 1 + # Prepare closing characters to balance the structure (Copy) + closing_chars = self.stack[::] - # If we're still inside a string, close it + # If inside a string, ensure it is closed if self.is_inside_string: - closing_chars.append('"') # 2 + closing_chars.append('"') self.is_inside_string = True - closing_chars.reverse() # 3 + # Reverse to maintain correct order of closing characters + closing_chars.reverse() + # (Copy) repaired_chars = self.new_chars[::] - # Try to parse the modified string until we succeed or run out of characters + # Attempt to parse the repaired JSON string while repaired_chars: try: value = ''.join(repaired_chars + closing_chars) return json.loads(value) except json.JSONDecodeError: - # Remove the last character and retry + # Remove the last character and retry parsing value = repaired_chars.pop() - # Check if the last character removed was a opening character + # Check if the last character removed was an opening character if closing_chars and closing_chars[0] == {'"': '"', '{': '}', '[': ']'}.get(value): closing_chars.pop(0) - # Return None if parsing fails after all attempts return None @@ -610,7 +623,7 @@ class MistralStreamStructuredResponse(StreamStructuredResponse): _delta_content: str | None _timestamp: datetime _usage: Usage - _json: JSONRepairer + _json: _JSONChunkParser async def __anext__(self) -> None: chunk = await self._response.__anext__() @@ -638,17 +651,13 @@ def get(self, *, final: bool = False) -> ModelResponse: calls.append(tool) elif self._delta_content and self._result_tools: - # NOTE: Params set for the most efficient and fastest way. output_json: dict[str, Any] | None = self._json.process_chunk(self._delta_content) if output_json: for result_tool in self._result_tools.values(): # NOTE: Additional verification to prevent JSON validation to crash in `_result.py` # Ensures required parameters in the JSON schema are respected, especially for stream-based return types. - # For example, `return_type=list[str]` expects a 'response' key with value type array of str. - # when `{"response":` then `repair_json` sets `{"response": ""}` (type not found default str) - # when `{"response": {` then `repair_json` sets `{"response": {}}` (type found) - # This ensures it's corrected to `{"response": {}}` and other required parameters and type. + # Example with BaseModel and required fields. if not self._validate_required_json_shema(output_json, result_tool.parameters_json_schema): continue diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index fbc3bd1d..fc47b3c9 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -50,10 +50,10 @@ from mistralai.types.basemodel import Unset as MistralUnset from pydantic_ai.models.mistral import ( - JSONRepairer, MistralAgentModel, MistralModel, MistralStreamStructuredResponse, + _JSONChunkParser, # type: ignore ) pytestmark = [ @@ -1639,10 +1639,10 @@ async def get_location(loc_name: str) -> str: @pytest.fixture def repairer(): """Fixture to set up a JSONRepairer instance.""" - return JSONRepairer() + return _JSONChunkParser() -def test_valid_json_parsing(repairer: JSONRepairer): +def test_valid_json_parsing(repairer: _JSONChunkParser): """Test that valid JSON chunks are correctly parsed.""" # Given valid_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3]}, "boolean": true}' @@ -1653,7 +1653,7 @@ def test_valid_json_parsing(repairer: JSONRepairer): assert result == expected -def test_repair_malformed_json(repairer: JSONRepairer): +def test_repair_malformed_json(repairer: _JSONChunkParser): """Test that the JSONRepairer can repair various malformed JSON structures.""" # Case 0: Missing closing array brace in the inner key-value structure @@ -1715,7 +1715,7 @@ def test_repair_malformed_json(repairer: JSONRepairer): assert result == expected -def test_repair_malformed_indended_json(repairer: JSONRepairer): +def test_repair_malformed_indended_json(repairer: _JSONChunkParser): """Test that the JSONRepairer can repair various malformed JSON structures.""" # Case 0: Missing closing array brace in the inner key-value structure # Given @@ -1821,7 +1821,7 @@ def test_repair_malformed_indended_json(repairer: JSONRepairer): assert result == expected -def test_repair_malformed_json_with_whitespace(repairer: JSONRepairer): +def test_repair_malformed_json_with_whitespace(repairer: _JSONChunkParser): """Test that the JSONRepairer can handle JSON with newlines, carriage returns, and tabs.""" # Malformed JSON with newlines, carriage returns, and tabs From 2ab7b78f44b2e3c7bb268565bd3660e1a4807d52 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:25:49 +0100 Subject: [PATCH 12/19] Cleaned tests --- tests/models/test_mistral.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index fc47b3c9..75b3ee75 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -1683,18 +1683,26 @@ def test_repair_malformed_json(repairer: _JSONChunkParser): expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_"}]}}') assert result == expected - # Case 3: Missing closing brace in the outer structure + # Case 3: Missing closing brace in the outer structure with extra space for test strip # Given - malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true' # Missing closing brace + malformed_json = """{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true + + + + """ # Extra space for test strip # When result = repairer.process_chunk(malformed_json) # Then expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') assert result == expected - # Case 4: Missing closing brace with additional key + # Case 4: Missing closing brace with additional key with extra space for test strip # Given - malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":' # Missing closing brace + malformed_json = """{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test": + + + + """ # Extra space for test strip # When result = repairer.process_chunk(malformed_json) # Then @@ -1753,7 +1761,7 @@ def test_repair_malformed_indended_json(repairer: _JSONChunkParser): expected = json.loads("""{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_"}]}}""") assert result == expected - # Case 3: Missing closing brace in the outer structure + # Case 3: Missing closing brace in the outer structure with extra space for test strip # Given malformed_json = """{ "key": "val\\nue", @@ -1762,7 +1770,7 @@ def test_repair_malformed_indended_json(repairer: _JSONChunkParser): "boolean": true - """ # Added extra space for test strip + """ # Extra space for test strip # When result = repairer.process_chunk(malformed_json) # Then @@ -1784,7 +1792,7 @@ def test_repair_malformed_indended_json(repairer: _JSONChunkParser): ) assert result == expected - # Case 5: Missing closing quote and brace in the "test" key + # Case 5: Missing closing quote and brace in the "test" key with extra space for test strip # Given malformed_json = """{ "key": "val\\nue", @@ -1793,7 +1801,7 @@ def test_repair_malformed_indended_json(repairer: _JSONChunkParser): "boolean": true, "test":"okay - """ # Added extra space for test strip + """ # Extra space for test strip # When result = repairer.process_chunk(malformed_json) # Then From 87b488efb1107743572f097b30c3835c929b0ae4 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:30:45 +0100 Subject: [PATCH 13/19] Added final comments --- tests/models/test_mistral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 75b3ee75..b8d302d9 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -1730,7 +1730,7 @@ def test_repair_malformed_indended_json(repairer: _JSONChunkParser): malformed_json = """{ "key": "val\\nue", "nested": { - "list": [1""" + "list": [1""" # Extra \\n to test indent in string # When result = repairer.process_chunk(malformed_json) # Then @@ -1839,7 +1839,7 @@ def test_repair_malformed_json_with_whitespace(repairer: _JSONChunkParser): "nested": { "list": [1, {"inner_key": "inner_value"}, 3], "boolean": true - }\n\r\t""" + }\n\r\t""" # Extra \\n to test indent in string # When result = repairer.process_chunk(malformed_json) From e3695e9b9045f7fd5091abbca81f7c52f9713642 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:32:59 +0100 Subject: [PATCH 14/19] Fixed missing type --- pydantic_ai_slim/pydantic_ai/models/mistral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 65c11011..c651d1cd 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -524,7 +524,7 @@ def __init__(self) -> None: - `escaped`: A boolean indicating whether the last character was an escape character or not. """ self.new_chars: list[str] = [] - self.stack: list[Any] = [] + self.stack: list[str] = [] self.is_inside_string = False self.escaped = False From 3eb4aa9d6f42c0f008b8bcfc6bff87a51b5966e0 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:35:50 +0100 Subject: [PATCH 15/19] Added documentations --- pydantic_ai_slim/pydantic_ai/models/mistral.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index c651d1cd..ede7e6ca 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -542,6 +542,7 @@ def process_chunk(self, chunk: str) -> dict[str, Any] | None: def _repair_json(self, chunk: str) -> dict[str, Any] | None: """Attempts to repair and parse the accumulated buffer as JSON, handling common issues.""" + # Next string to continue processing from the previous iteration. start_index = len(self.new_chars) for char in chunk[start_index:]: if self.is_inside_string: From 9deb6fc0f17caba77c43414817fb59e64ea1b2af Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:38:23 +0100 Subject: [PATCH 16/19] Cleaning documentation --- pydantic_ai_slim/pydantic_ai/models/mistral.py | 9 --------- tests/models/test_mistral.py | 12 ++++++------ 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index ede7e6ca..3a73a676 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -514,15 +514,6 @@ class _JSONChunkParser: """A class to repair JSON chunks that might be corrupted (e.g. missing closing quotes).""" def __init__(self) -> None: - """Initialize the JSONRepairer with an empty buffer and state to maintain across chunks. - - The properties are: - - - `new_chars`: The characters that have been processed so far. - - `stack`: The stack of characters to be processed. - - `is_inside_string`: A boolean indicating whether we are inside a string or not. - - `escaped`: A boolean indicating whether the last character was an escape character or not. - """ self.new_chars: list[str] = [] self.stack: list[str] = [] self.is_inside_string = False diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index b8d302d9..8aa06c02 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -1632,18 +1632,18 @@ async def get_location(loc_name: str) -> str: ##################### -## JSONRepairer +## SONChunkParser ##################### @pytest.fixture def repairer(): - """Fixture to set up a JSONRepairer instance.""" + """Fixture to set up a JSONChunkParser instance.""" return _JSONChunkParser() def test_valid_json_parsing(repairer: _JSONChunkParser): - """Test that valid JSON chunks are correctly parsed.""" + """Test that the JSONChunkParser valid JSON chunks are correctly parsed.""" # Given valid_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3]}, "boolean": true}' # When @@ -1654,7 +1654,7 @@ def test_valid_json_parsing(repairer: _JSONChunkParser): def test_repair_malformed_json(repairer: _JSONChunkParser): - """Test that the JSONRepairer can repair various malformed JSON structures.""" + """Test that the JSONChunkParser can repair various malformed JSON structures.""" # Case 0: Missing closing array brace in the inner key-value structure # Given @@ -1724,7 +1724,7 @@ def test_repair_malformed_json(repairer: _JSONChunkParser): def test_repair_malformed_indended_json(repairer: _JSONChunkParser): - """Test that the JSONRepairer can repair various malformed JSON structures.""" + """Test that the JSONChunkParser can repair various malformed JSON structures.""" # Case 0: Missing closing array brace in the inner key-value structure # Given malformed_json = """{ @@ -1830,7 +1830,7 @@ def test_repair_malformed_indended_json(repairer: _JSONChunkParser): def test_repair_malformed_json_with_whitespace(repairer: _JSONChunkParser): - """Test that the JSONRepairer can handle JSON with newlines, carriage returns, and tabs.""" + """Test that the JSONChunkParser can handle JSON with newlines, carriage returns, and tabs.""" # Malformed JSON with newlines, carriage returns, and tabs # Given From 51c9c4235f20b14c8ddd20f6e0004ee4c828ce0c Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:42:58 +0100 Subject: [PATCH 17/19] Added documentations --- .../pydantic_ai/models/mistral.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 3a73a676..793887ed 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -514,8 +514,16 @@ class _JSONChunkParser: """A class to repair JSON chunks that might be corrupted (e.g. missing closing quotes).""" def __init__(self) -> None: + """Initialize the state of the JSON chunk parser. + + State variables: + - `new_chars`: A list of characters that have been processed. + - `closing_stack`: A stack of characters to add closing quotes (e.g., `", }, ]`). + - `is_inside_string`: A boolean indicating whether the parser is currently inside a string. + - `escaped`: A boolean indicating whether the last character seen was an escape character. + """ self.new_chars: list[str] = [] - self.stack: list[str] = [] + self.closing_stack: list[str] = [] self.is_inside_string = False self.escaped = False @@ -560,16 +568,16 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | None: # Track expected closing brace elif char == '{': - self.stack.append('}') + self.closing_stack.append('}') # Track expected closing bracket elif char == '[': - self.stack.append(']') + self.closing_stack.append(']') # Handle closing characters and check for mismatches elif char == '}' or char == ']': - if self.stack and self.stack[-1] == char: - self.stack.pop() + if self.closing_stack and self.closing_stack[-1] == char: + self.closing_stack.pop() else: # Mismatched closing character means malformed input return None @@ -577,7 +585,7 @@ def _repair_json(self, chunk: str) -> dict[str, Any] | None: self.new_chars.append(char) # Prepare closing characters to balance the structure (Copy) - closing_chars = self.stack[::] + closing_chars = self.closing_stack[::] # If inside a string, ensure it is closed if self.is_inside_string: From 703754b3c5d34a35530e503e8c75d595fb255517 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 22:57:51 +0100 Subject: [PATCH 18/19] Removed JSONChunkParser to use native pydantic core from json --- .../pydantic_ai/models/mistral.py | 115 +-------- tests/models/test_mistral.py | 223 ------------------ 2 files changed, 8 insertions(+), 330 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index 793887ed..f826ec67 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -1,6 +1,5 @@ from __future__ import annotations as _annotations -import json import os from collections.abc import AsyncIterator, Iterable from contextlib import asynccontextmanager @@ -9,6 +8,7 @@ from itertools import chain from typing import Any, Callable, Literal, Union +import pydantic_core from httpx import AsyncClient as AsyncHTTPClient, Timeout from typing_extensions import assert_never @@ -330,7 +330,6 @@ async def _process_streamed_response( content, timestamp, start_usage, - _JSONChunkParser(), ) elif content: @@ -510,109 +509,6 @@ def timestamp(self) -> datetime: return self._timestamp -class _JSONChunkParser: - """A class to repair JSON chunks that might be corrupted (e.g. missing closing quotes).""" - - def __init__(self) -> None: - """Initialize the state of the JSON chunk parser. - - State variables: - - `new_chars`: A list of characters that have been processed. - - `closing_stack`: A stack of characters to add closing quotes (e.g., `", }, ]`). - - `is_inside_string`: A boolean indicating whether the parser is currently inside a string. - - `escaped`: A boolean indicating whether the last character seen was an escape character. - """ - self.new_chars: list[str] = [] - self.closing_stack: list[str] = [] - self.is_inside_string = False - self.escaped = False - - def process_chunk(self, chunk: str) -> dict[str, Any] | None: - """Process a JSON chunk, attempting to parse it into a valid JSON object by repairing issues.""" - # Strip whitespace, newlines, backtick from the start and end - chunk = chunk.strip(' \n\r\t`') - try: - output_json: dict[str, Any] | None = json.loads(chunk) - return output_json - except json.JSONDecodeError: - pass # Continue to attempt repairing - - return self._repair_json(chunk) - - def _repair_json(self, chunk: str) -> dict[str, Any] | None: - """Attempts to repair and parse the accumulated buffer as JSON, handling common issues.""" - # Next string to continue processing from the previous iteration. - start_index = len(self.new_chars) - for char in chunk[start_index:]: - if self.is_inside_string: - # End of string detected - if char == '"' and not self.escaped: - self.is_inside_string = False - - # Replace newline with escape sequence within a string - elif char == '\n' and not self.escaped: - char = '\\n' - - # Toggle escaped status on encountering backslash - elif char == '\\': - self.escaped = not self.escaped - - # Reset escaped status for other characters - else: - self.escaped = False - else: - # Start of string detected - if char == '"': - self.is_inside_string = True - self.escaped = False - - # Track expected closing brace - elif char == '{': - self.closing_stack.append('}') - - # Track expected closing bracket - elif char == '[': - self.closing_stack.append(']') - - # Handle closing characters and check for mismatches - elif char == '}' or char == ']': - if self.closing_stack and self.closing_stack[-1] == char: - self.closing_stack.pop() - else: - # Mismatched closing character means malformed input - return None - - self.new_chars.append(char) - - # Prepare closing characters to balance the structure (Copy) - closing_chars = self.closing_stack[::] - - # If inside a string, ensure it is closed - if self.is_inside_string: - closing_chars.append('"') - self.is_inside_string = True - - # Reverse to maintain correct order of closing characters - closing_chars.reverse() - - # (Copy) - repaired_chars = self.new_chars[::] - - # Attempt to parse the repaired JSON string - while repaired_chars: - try: - value = ''.join(repaired_chars + closing_chars) - return json.loads(value) - except json.JSONDecodeError: - # Remove the last character and retry parsing - value = repaired_chars.pop() - # Check if the last character removed was an opening character - if closing_chars and closing_chars[0] == {'"': '"', '{': '}', '[': ']'}.get(value): - closing_chars.pop(0) - - return None - - @dataclass class MistralStreamStructuredResponse(StreamStructuredResponse): """Implementation of `StreamStructuredResponse` for Mistral models.""" @@ -623,7 +519,6 @@ class MistralStreamStructuredResponse(StreamStructuredResponse): _delta_content: str | None _timestamp: datetime _usage: Usage - _json: _JSONChunkParser async def __anext__(self) -> None: chunk = await self._response.__anext__() @@ -651,7 +546,13 @@ def get(self, *, final: bool = False) -> ModelResponse: calls.append(tool) elif self._delta_content and self._result_tools: - output_json: dict[str, Any] | None = self._json.process_chunk(self._delta_content) + output_json: dict[str, Any] | None = pydantic_core.from_json( + self._delta_content, allow_partial='trailing-strings' + ) + + assert isinstance( + output_json, (dict, type(None)) + ), f'Expected `pydantic_core.from_json` as type `dict` or `None`, invalid type: {type(output_json)}' if output_json: for result_tool in self._result_tools.values(): diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py index 8aa06c02..1214f3a7 100644 --- a/tests/models/test_mistral.py +++ b/tests/models/test_mistral.py @@ -53,7 +53,6 @@ MistralAgentModel, MistralModel, MistralStreamStructuredResponse, - _JSONChunkParser, # type: ignore ) pytestmark = [ @@ -1631,228 +1630,6 @@ async def get_location(loc_name: str) -> str: ) -##################### -## SONChunkParser -##################### - - -@pytest.fixture -def repairer(): - """Fixture to set up a JSONChunkParser instance.""" - return _JSONChunkParser() - - -def test_valid_json_parsing(repairer: _JSONChunkParser): - """Test that the JSONChunkParser valid JSON chunks are correctly parsed.""" - # Given - valid_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3]}, "boolean": true}' - # When - result = repairer.process_chunk(valid_json) - # Then - expected = json.loads(valid_json) - assert result == expected - - -def test_repair_malformed_json(repairer: _JSONChunkParser): - """Test that the JSONChunkParser can repair various malformed JSON structures.""" - - # Case 0: Missing closing array brace in the inner key-value structure - # Given - malformed_json = '{"key": "value", "nested": {"list": [1' - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "value", "nested": {"list": [1]}}') - assert result == expected - - # Case 1: Missing closing and closing string in inter key-value structure - # Given - malformed_json = '{"key": "value", "nested": {"list": [1, {"i' - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "value", "nested": {"list": [1, {}]}}') - assert result == expected - - # Case 2: Missing closing brace in the inner key-value structure - # Given - malformed_json = '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_' - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_"}]}}') - assert result == expected - - # Case 3: Missing closing brace in the outer structure with extra space for test strip - # Given - malformed_json = """{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true - - - - """ # Extra space for test strip - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') - assert result == expected - - # Case 4: Missing closing brace with additional key with extra space for test strip - # Given - malformed_json = """{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test": - - - - """ # Extra space for test strip - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}') - assert result == expected - - # Case 5: Missing closing quote and brace in the "test" key - # Given - malformed_json = ( - '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok' - ) - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads( - '{"key": "value", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"ok"}}' - ) - assert result == expected - - -def test_repair_malformed_indended_json(repairer: _JSONChunkParser): - """Test that the JSONChunkParser can repair various malformed JSON structures.""" - # Case 0: Missing closing array brace in the inner key-value structure - # Given - malformed_json = """{ - "key": "val\\nue", - "nested": { - "list": [1""" # Extra \\n to test indent in string - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "val\\nue", "nested": {"list": [1]}}') - assert result == expected - - # Case 1: Missing closing and closing string in inter key-value structure - # Given - malformed_json = """{ - "key": "val\\nue", - "nested": { - "list": [1, {"inner_key": "inner_""" - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads('{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_"}]}}') - assert result == expected - - # Case 2: Missing closing brace in the inner key-value structure - # Given - malformed_json = """{ - "key": "val\\nue", - "nested": { - "list": [1, {"inner_key": "inner_""" - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads("""{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_"}]}}""") - assert result == expected - - # Case 3: Missing closing brace in the outer structure with extra space for test strip - # Given - malformed_json = """{ - "key": "val\\nue", - "nested": { - "list": [1, {"inner_key": "inner_value"}, 3], - "boolean": true - - - """ # Extra space for test strip - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads( - """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}""" - ) - assert result == expected - - # Case 4: Missing closing brace with additional key - # Given - malformed_json = ( - """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":""" - ) - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads( - """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}""" - ) - assert result == expected - - # Case 5: Missing closing quote and brace in the "test" key with extra space for test strip - # Given - malformed_json = """{ - "key": "val\\nue", - "nested": { - "list": [1, {"inner_key": "inner_value"}, 3], - "boolean": true, - "test":"okay - - """ # Extra space for test strip - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads( - '{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"okay"}}' - ) - assert result == expected - - # Case 5: Missing closing quote and brace in the "test" key - # Given - malformed_json = """{ - "key": "val\\nue", - "nested": { - "list": [1, {"inner_key": "inner_value"}, 3], - "boolean": true, - "test":"okay" - } - }""" - # When - result = repairer.process_chunk(malformed_json) - # Then - expected = json.loads( - '{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true, "test":"okay"}}' - ) - assert result == expected - - -def test_repair_malformed_json_with_whitespace(repairer: _JSONChunkParser): - """Test that the JSONChunkParser can handle JSON with newlines, carriage returns, and tabs.""" - - # Malformed JSON with newlines, carriage returns, and tabs - # Given - malformed_json = """\n\r\t{ - "key": "val\\nue", - "nested": { - "list": [1, {"inner_key": "inner_value"}, 3], - "boolean": true - }\n\r\t""" # Extra \\n to test indent in string - - # When - result = repairer.process_chunk(malformed_json) - - # Them - expected = json.loads( - """{"key": "val\\nue", "nested": {"list": [1, {"inner_key": "inner_value"}, 3], "boolean": true}}""" - ) - - # Assert that the result matches the expected JSON - assert result == expected - - ##################### ## Test methods ##################### From d7818ac57073dc4ad3f0dcb736ba193dfbf167dc Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 18 Dec 2024 23:03:52 +0100 Subject: [PATCH 19/19] Cleaned code --- pydantic_ai_slim/pydantic_ai/models/mistral.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/mistral.py b/pydantic_ai_slim/pydantic_ai/models/mistral.py index f826ec67..a6c8b648 100644 --- a/pydantic_ai_slim/pydantic_ai/models/mistral.py +++ b/pydantic_ai_slim/pydantic_ai/models/mistral.py @@ -550,10 +550,6 @@ def get(self, *, final: bool = False) -> ModelResponse: self._delta_content, allow_partial='trailing-strings' ) - assert isinstance( - output_json, (dict, type(None)) - ), f'Expected `pydantic_core.from_json` as type `dict` or `None`, invalid type: {type(output_json)}' - if output_json: for result_tool in self._result_tools.values(): # NOTE: Additional verification to prevent JSON validation to crash in `_result.py`