Skip to content

Commit

Permalink
Streaming multipart support (#2382)
Browse files Browse the repository at this point in the history
* Streaming multipart support

* Update tests for streaming multipary
  • Loading branch information
tomchristie authored Dec 12, 2022
1 parent af56476 commit b97c059
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 25 deletions.
51 changes: 29 additions & 22 deletions httpx/_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,19 +135,18 @@ def __init__(self, name: str, value: FileTypes) -> None:
self.file = fileobj
self.headers = headers

def get_length(self) -> int:
def get_length(self) -> typing.Optional[int]:
headers = self.render_headers()

if isinstance(self.file, (str, bytes)):
return len(headers) + len(to_bytes(self.file))

# Let's do our best not to read `file` into memory.
file_length = peek_filelike_length(self.file)

# If we can't determine the filesize without reading it into memory,
# then return `None` here, to indicate an unknown file length.
if file_length is None:
# As a last resort, read file and cache contents for later.
assert not hasattr(self, "_data")
self._data = to_bytes(self.file.read())
file_length = len(self._data)
return None

return len(headers) + file_length

Expand All @@ -173,13 +172,11 @@ def render_data(self) -> typing.Iterator[bytes]:
yield to_bytes(self.file)
return

if hasattr(self, "_data"):
# Already rendered.
yield self._data
return

if hasattr(self.file, "seek"):
self.file.seek(0)
try:
self.file.seek(0)
except io.UnsupportedOperation:
pass

chunk = self.file.read(self.CHUNK_SIZE)
while chunk:
Expand Down Expand Up @@ -232,24 +229,34 @@ def iter_chunks(self) -> typing.Iterator[bytes]:
yield b"\r\n"
yield b"--%s--\r\n" % self.boundary

def iter_chunks_lengths(self) -> typing.Iterator[int]:
def get_content_length(self) -> typing.Optional[int]:
"""
Return the length of the multipart encoded content, or `None` if
any of the files have a length that cannot be determined upfront.
"""
boundary_length = len(self.boundary)
# Follow closely what `.iter_chunks()` does.
length = 0

for field in self.fields:
yield 2 + boundary_length + 2
yield field.get_length()
yield 2
yield 2 + boundary_length + 4
field_length = field.get_length()
if field_length is None:
return None

length += 2 + boundary_length + 2 # b"--{boundary}\r\n"
length += field_length
length += 2 # b"\r\n"

def get_content_length(self) -> int:
return sum(self.iter_chunks_lengths())
length += 2 + boundary_length + 4 # b"--{boundary}--\r\n"
return length

# Content stream interface.

def get_headers(self) -> typing.Dict[str, str]:
content_length = str(self.get_content_length())
content_length = self.get_content_length()
content_type = self.content_type
return {"Content-Length": content_length, "Content-Type": content_type}
if content_length is None:
return {"Transfer-Encoding": "chunked", "Content-Type": content_type}
return {"Content-Length": str(content_length), "Content-Type": content_type}

def __iter__(self) -> typing.Iterator[bytes]:
for chunk in self.iter_chunks():
Expand Down
7 changes: 4 additions & 3 deletions tests/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,9 @@ def test_multipart_encode_files_raises_exception_with_text_mode_file() -> None:

def test_multipart_encode_non_seekable_filelike() -> None:
"""
Test that special readable but non-seekable filelike objects are supported,
at the cost of reading them into memory at most once.
Test that special readable but non-seekable filelike objects are supported.
In this case uploads with use 'Transfer-Encoding: chunked', instead of
a 'Content-Length' header.
"""

class IteratorIO(io.IOBase):
Expand Down Expand Up @@ -410,7 +411,7 @@ def data() -> typing.Iterator[bytes]:
)
assert headers == {
"Content-Type": "multipart/form-data; boundary=+++",
"Content-Length": str(len(content)),
"Transfer-Encoding": "chunked",
}
assert content == b"".join(stream)

Expand Down

0 comments on commit b97c059

Please sign in to comment.