Skip to content

Commit ecbdfd3

Browse files
authored
ENH: support SpooledTemporaryFile (#44761)
1 parent 1113779 commit ecbdfd3

File tree

4 files changed

+58
-39
lines changed

4 files changed

+58
-39
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ I/O
752752
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
753753
- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
754754
- Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`)
755+
- Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`)
755756
-
756757

757758
Period

pandas/io/common.py

+47-29
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from collections import abc
77
import dataclasses
88
import gzip
9-
import io
109
from io import (
1110
BufferedIOBase,
1211
BytesIO,
@@ -18,7 +17,6 @@
1817
import mmap
1918
import os
2019
from pathlib import Path
21-
import tempfile
2220
from typing import (
2321
IO,
2422
Any,
@@ -104,7 +102,7 @@ def close(self) -> None:
104102
avoid closing the potentially user-created buffer.
105103
"""
106104
if self.is_wrapped:
107-
assert isinstance(self.handle, (TextIOWrapper, BytesIOWrapper))
105+
assert isinstance(self.handle, TextIOWrapper)
108106
self.handle.flush()
109107
self.handle.detach()
110108
self.created_handles.remove(self.handle)
@@ -779,20 +777,17 @@ def get_handle(
779777
# Convert BytesIO or file objects passed with an encoding
780778
is_wrapped = False
781779
if not is_text and ioargs.mode == "rb" and isinstance(handle, TextIOBase):
782-
handle = BytesIOWrapper(
780+
# not added to handles as it does not open/buffer resources
781+
handle = _BytesIOWrapper(
783782
handle,
784783
encoding=ioargs.encoding,
785784
)
786-
handles.append(handle)
787-
# the (text) handle is always provided by the caller
788-
# since get_handle would have opened it in binary mode
789-
is_wrapped = True
790785
elif is_text and (compression or _is_binary_mode(handle, ioargs.mode)):
791786
handle = TextIOWrapper(
792787
# error: Argument 1 to "TextIOWrapper" has incompatible type
793788
# "Union[IO[bytes], IO[Any], RawIOBase, BufferedIOBase, TextIOBase, mmap]";
794789
# expected "IO[bytes]"
795-
handle, # type: ignore[arg-type]
790+
_IOWrapper(handle), # type: ignore[arg-type]
796791
encoding=ioargs.encoding,
797792
errors=errors,
798793
newline="",
@@ -935,7 +930,7 @@ def __init__(
935930
self.decode = decode
936931

937932
self.attributes = {}
938-
for attribute in ("seekable", "readable", "writeable"):
933+
for attribute in ("seekable", "readable"):
939934
if not hasattr(f, attribute):
940935
continue
941936
self.attributes[attribute] = getattr(f, attribute)()
@@ -976,11 +971,40 @@ def __next__(self) -> str:
976971
return newline.lstrip("\n")
977972

978973

979-
# Wrapper that wraps a StringIO buffer and reads bytes from it
980-
# Created for compat with pyarrow read_csv
981-
class BytesIOWrapper(io.BytesIO):
982-
buffer: StringIO | TextIOBase | None
974+
class _IOWrapper:
975+
# TextIOWrapper is overly strict: it request that the buffer has seekable, readable,
976+
# and writable. If we have a read-only buffer, we shouldn't need writable and vice
977+
# versa. Some buffers, are seek/read/writ-able but they do not have the "-able"
978+
# methods, e.g., tempfile.SpooledTemporaryFile.
979+
# If a buffer does not have the above "-able" methods, we simple assume they are
980+
# seek/read/writ-able.
981+
def __init__(self, buffer: BaseBuffer):
982+
self.buffer = buffer
983+
984+
def __getattr__(self, name: str):
985+
return getattr(self.buffer, name)
986+
987+
def readable(self) -> bool:
988+
if hasattr(self.buffer, "readable"):
989+
# error: "BaseBuffer" has no attribute "readable"
990+
return self.buffer.readable() # type: ignore[attr-defined]
991+
return True
992+
993+
def seekable(self) -> bool:
994+
if hasattr(self.buffer, "seekable"):
995+
return self.buffer.seekable()
996+
return True
997+
998+
def writable(self) -> bool:
999+
if hasattr(self.buffer, "writable"):
1000+
# error: "BaseBuffer" has no attribute "writable"
1001+
return self.buffer.writable() # type: ignore[attr-defined]
1002+
return True
9831003

1004+
1005+
class _BytesIOWrapper:
1006+
# Wrapper that wraps a StringIO buffer and reads bytes from it
1007+
# Created for compat with pyarrow read_csv
9841008
def __init__(self, buffer: StringIO | TextIOBase, encoding: str = "utf-8"):
9851009
self.buffer = buffer
9861010
self.encoding = encoding
@@ -1006,15 +1030,6 @@ def read(self, n: int | None = -1) -> bytes:
10061030
self.overflow = combined_bytestring[n:]
10071031
return to_return
10081032

1009-
def detach(self):
1010-
# Slightly modified from Python's TextIOWrapper detach method
1011-
if self.buffer is None:
1012-
raise ValueError("buffer is already detached")
1013-
self.flush()
1014-
buffer = self.buffer
1015-
self.buffer = None
1016-
return buffer
1017-
10181033

10191034
def _maybe_memory_map(
10201035
handle: str | BaseBuffer,
@@ -1042,10 +1057,15 @@ def _maybe_memory_map(
10421057

10431058
# error: Argument 1 to "_MMapWrapper" has incompatible type "Union[IO[Any],
10441059
# RawIOBase, BufferedIOBase, TextIOBase, mmap]"; expected "IO[Any]"
1045-
wrapped = cast(
1046-
BaseBuffer,
1047-
_MMapWrapper(handle, encoding, errors, decode), # type: ignore[arg-type]
1048-
)
1060+
try:
1061+
wrapped = cast(
1062+
BaseBuffer,
1063+
_MMapWrapper(handle, encoding, errors, decode), # type: ignore[arg-type]
1064+
)
1065+
finally:
1066+
for handle in reversed(handles):
1067+
# error: "BaseBuffer" has no attribute "close"
1068+
handle.close() # type: ignore[attr-defined]
10491069
handles.append(wrapped)
10501070

10511071
return wrapped, memory_map, handles
@@ -1077,8 +1097,6 @@ def _is_binary_mode(handle: FilePath | BaseBuffer, mode: str) -> bool:
10771097
codecs.StreamWriter,
10781098
codecs.StreamReader,
10791099
codecs.StreamReaderWriter,
1080-
# cannot be wrapped in TextIOWrapper GH43439
1081-
tempfile.SpooledTemporaryFile,
10821100
)
10831101
if issubclass(type(handle), text_classes):
10841102
return False

pandas/tests/io/parser/test_encoding.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -299,17 +299,16 @@ def test_readcsv_memmap_utf8(all_parsers):
299299
tm.assert_frame_equal(df, dfr)
300300

301301

302-
def test_not_readable(all_parsers, request):
302+
@pytest.mark.usefixtures("pyarrow_xfail")
303+
@pytest.mark.parametrize("mode", ["w+b", "w+t"])
304+
def test_not_readable(all_parsers, mode):
303305
# GH43439
304306
parser = all_parsers
305-
if parser.engine in ("python", "pyarrow"):
306-
mark = pytest.mark.xfail(
307-
reason="SpooledTemporaryFile does only work with the c-engine"
308-
)
309-
request.node.add_marker(mark)
310-
311-
with tempfile.SpooledTemporaryFile() as handle:
312-
handle.write(b"abcd")
307+
content = b"abcd"
308+
if "t" in mode:
309+
content = "abcd"
310+
with tempfile.SpooledTemporaryFile(mode=mode) as handle:
311+
handle.write(content)
313312
handle.seek(0)
314313
df = parser.read_csv(handle)
315314
expected = DataFrame([], columns=["abcd"])

pandas/tests/io/test_common.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -607,4 +607,5 @@ def test_errno_attribute():
607607

608608
def test_fail_mmap():
609609
with pytest.raises(UnsupportedOperation, match="fileno"):
610-
icom.get_handle(BytesIO(), "rb", memory_map=True)
610+
with BytesIO() as buffer:
611+
icom.get_handle(buffer, "rb", memory_map=True)

0 commit comments

Comments
 (0)