Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[s3] Fix newline handling for text-mode files #1381

Merged
merged 1 commit into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions storages/backends/s3.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import mimetypes
import os
import posixpath
Expand Down Expand Up @@ -123,7 +124,6 @@ def __init__(self, name, mode, storage, buffer_size=None):
self._storage = storage
self.name = name[len(self._storage.location) :].lstrip("/")
self._mode = mode
self._force_mode = (lambda b: b) if "b" in mode else (lambda b: b.decode())
self.obj = storage.bucket.Object(name)
if "w" not in mode:
# Force early RAII-style exception if object does not exist
Expand Down Expand Up @@ -184,6 +184,19 @@ def _get_file(self):
self._file.seek(0)
if self._storage.gzip and self.obj.content_encoding == "gzip":
self._file = self._decompress_file(mode=self._mode, file=self._file)
elif "b" not in self._mode:
if hasattr(self._file, "readable"):
# For versions > Python 3.10 compatibility
# See SpooledTemporaryFile changes in 3.11 (https://docs.python.org/3/library/tempfile.html) # noqa: E501
# Now fully implements the io.BufferedIOBase and io.TextIOBase abstract base classes allowing the file # noqa: E501
# to be readable in the mode that it was specified (without accessing the underlying _file object). # noqa: E501
# In this case, we need to wrap the file in a TextIOWrapper to ensure that the file is read as a text file. # noqa: E501
self._file = io.TextIOWrapper(self._file, encoding="utf-8")
else:
# For versions <= Python 3.10 compatibility
self._file = io.TextIOWrapper(
self._file._file, encoding="utf-8"
)
self._closed = False
return self._file

Expand All @@ -195,12 +208,12 @@ def _set_file(self, value):
def read(self, *args, **kwargs):
if "r" not in self._mode:
raise AttributeError("File was not opened in read mode.")
return self._force_mode(super().read(*args, **kwargs))
return super().read(*args, **kwargs)

def readline(self, *args, **kwargs):
if "r" not in self._mode:
raise AttributeError("File was not opened in read mode.")
return self._force_mode(super().readline(*args, **kwargs))
return super().readline(*args, **kwargs)

def readlines(self):
return list(self)
Expand Down
60 changes: 39 additions & 21 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,23 +317,6 @@ def test_storage_open_read_string(self):
content_str = file.read()
self.assertEqual(content_str, "")

def test_storage_open_readlines(self):
"""
Test readlines with file opened in "r" and "rb" modes
"""
name = "test_open_readlines_string.txt"
with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2")
with self.storage.open(name, "r") as f1:
f1._file = temp_file
content_lines = f1.readlines()
self.assertEqual(content_lines, ["line1\n", "line2"])
temp_file.seek(0)
with self.storage.open(name, "rb") as f2:
f2._file = temp_file
content_lines = f2.readlines()
self.assertEqual(content_lines, [b"line1\n", b"line2"])

def test_storage_open_write(self):
"""
Test opening a file in write mode
Expand All @@ -348,7 +331,7 @@ def test_storage_open_write(self):
"ACL": "public-read",
}

with self.storage.open(name, "w") as file:
with self.storage.open(name, "wb") as file:
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value
# Set the name of the mock object
Expand Down Expand Up @@ -398,7 +381,7 @@ def test_storage_open_no_write(self):
"StorageClass": "REDUCED_REDUNDANCY",
}

with self.storage.open(name, "w"):
with self.storage.open(name, "wb"):
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value
obj.load.side_effect = ClientError(
Expand Down Expand Up @@ -429,7 +412,7 @@ def test_storage_open_no_overwrite_existing(self):
"StorageClass": "REDUCED_REDUNDANCY",
}

with self.storage.open(name, "w"):
with self.storage.open(name, "wb"):
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value

Expand All @@ -451,7 +434,7 @@ def test_storage_write_beyond_buffer_size(self):
"StorageClass": "REDUCED_REDUNDANCY",
}

with self.storage.open(name, "w") as file:
with self.storage.open(name, "wb") as file:
self.storage.bucket.Object.assert_called_with(name)
obj = self.storage.bucket.Object.return_value
# Set the name of the mock object
Expand Down Expand Up @@ -1167,6 +1150,41 @@ def test_content_type_not_detectable(self):
s3.S3Storage.default_content_type,
)

def test_storage_open_reading_with_newlines(self):
"""Test file reading with "r" and "rb" and various newline characters."""
name = "test_storage_open_read_with_newlines.txt"
with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext\n")
self.storage.save(name, temp_file)
file = self.storage.open(name, "r")
content_str = file.read()
file.close()
self.assertEqual(content_str, "line1\nline2\nmore\ntext\n")

with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext\n")
self.storage.save(name, temp_file)
file = self.storage.open(name, "rb")
content_str = file.read()
file.close()
self.assertEqual(content_str, b"line1\nline2\r\nmore\rtext\n")

with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext")
self.storage.save(name, temp_file)
file = self.storage.open(name, "r")
content_lines = file.readlines()
file.close()
self.assertEqual(content_lines, ["line1\n", "line2\n", "more\n", "text"])

with io.BytesIO() as temp_file:
temp_file.write(b"line1\nline2\r\nmore\rtext")
self.storage.save(name, temp_file)
file = self.storage.open(name, "rb")
content_lines = file.readlines()
file.close()
self.assertEqual(content_lines, [b"line1\n", b"line2\r\n", b"more\r", b"text"])


class TestBackwardsNames(TestCase):
def test_importing(self):
Expand Down
Loading