From 22713fd02a31c1dfe332a691190db5ecae1e39f4 Mon Sep 17 00:00:00 2001 From: Stainless Bot <107565488+stainless-bot@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:47:48 +0100 Subject: [PATCH] chore(internal): support multipart data with overlapping keys (#1104) --- src/openai/_base_client.py | 32 +++++++++++++++++---- tests/test_client.py | 58 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index 7a1562461f..d7e5127dd8 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -61,7 +61,7 @@ RequestOptions, ModelBuilderProtocol, ) -from ._utils import is_dict, is_given, is_mapping +from ._utils import is_dict, is_list, is_given, is_mapping from ._compat import model_copy, model_dump from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type from ._response import ( @@ -451,14 +451,18 @@ def _build_request( headers = self._build_headers(options) params = _merge_mappings(self._custom_query, options.params) + content_type = headers.get("Content-Type") # If the given Content-Type header is multipart/form-data then it # has to be removed so that httpx can generate the header with # additional information for us as it has to be in this form # for the server to be able to correctly parse the request: # multipart/form-data; boundary=---abc-- - if headers.get("Content-Type") == "multipart/form-data": - headers.pop("Content-Type") + if content_type is not None and content_type.startswith("multipart/form-data"): + if "boundary" not in content_type: + # only remove the header if the boundary hasn't been explicitly set + # as the caller doesn't want httpx to come up with their own boundary + headers.pop("Content-Type") # As we are now sending multipart/form-data instead of application/json # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding @@ -494,9 +498,25 @@ def _serialize_multipartform(self, data: Mapping[object, object]) -> dict[str, o ) serialized: dict[str, object] = {} for key, value in items: - if key in serialized: - raise ValueError(f"Duplicate key encountered: {key}; This behaviour is not supported") - serialized[key] = value + existing = serialized.get(key) + + if not existing: + serialized[key] = value + continue + + # If a value has already been set for this key then that + # means we're sending data like `array[]=[1, 2, 3]` and we + # need to tell httpx that we want to send multiple values with + # the same key which is done by using a list or a tuple. + # + # Note: 2d arrays should never result in the same key at both + # levels so it's safe to assume that if the value is a list, + # it was because we changed it to be a list. + if is_list(existing): + existing.append(value) + else: + serialized[key] = [existing, value] + return serialized def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]: diff --git a/tests/test_client.py b/tests/test_client.py index 3d2dd35821..24933456bd 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -437,6 +437,35 @@ def test_request_extra_query(self) -> None: params = dict(request.url.params) assert params == {"foo": "2"} + def test_multipart_repeating_array(self, client: OpenAI) -> None: + request = client._build_request( + FinalRequestOptions.construct( + method="get", + url="/foo", + headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"}, + json_data={"array": ["foo", "bar"]}, + files=[("foo.txt", b"hello world")], + ) + ) + + assert request.read().split(b"\r\n") == [ + b"--6b7ba517decee4a450543ea6ae821c82", + b'Content-Disposition: form-data; name="array[]"', + b"", + b"foo", + b"--6b7ba517decee4a450543ea6ae821c82", + b'Content-Disposition: form-data; name="array[]"', + b"", + b"bar", + b"--6b7ba517decee4a450543ea6ae821c82", + b'Content-Disposition: form-data; name="foo.txt"; filename="upload"', + b"Content-Type: application/octet-stream", + b"", + b"hello world", + b"--6b7ba517decee4a450543ea6ae821c82--", + b"", + ] + @pytest.mark.respx(base_url=base_url) def test_basic_union_response(self, respx_mock: MockRouter) -> None: class Model1(BaseModel): @@ -1104,6 +1133,35 @@ def test_request_extra_query(self) -> None: params = dict(request.url.params) assert params == {"foo": "2"} + def test_multipart_repeating_array(self, async_client: AsyncOpenAI) -> None: + request = async_client._build_request( + FinalRequestOptions.construct( + method="get", + url="/foo", + headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"}, + json_data={"array": ["foo", "bar"]}, + files=[("foo.txt", b"hello world")], + ) + ) + + assert request.read().split(b"\r\n") == [ + b"--6b7ba517decee4a450543ea6ae821c82", + b'Content-Disposition: form-data; name="array[]"', + b"", + b"foo", + b"--6b7ba517decee4a450543ea6ae821c82", + b'Content-Disposition: form-data; name="array[]"', + b"", + b"bar", + b"--6b7ba517decee4a450543ea6ae821c82", + b'Content-Disposition: form-data; name="foo.txt"; filename="upload"', + b"Content-Type: application/octet-stream", + b"", + b"hello world", + b"--6b7ba517decee4a450543ea6ae821c82--", + b"", + ] + @pytest.mark.respx(base_url=base_url) async def test_basic_union_response(self, respx_mock: MockRouter) -> None: class Model1(BaseModel):