Skip to content

Commit 2cc1227

Browse files
authored
TYP: Use Protocols for file-like objects in read/to_* (#43951)
1 parent b3f33a1 commit 2cc1227

32 files changed

+479
-341
lines changed

pandas/_testing/_io.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
)
1111
import zipfile
1212

13-
from pandas._typing import FilePathOrBuffer
13+
from pandas._typing import (
14+
FilePath,
15+
ReadPickleBuffer,
16+
)
1417
from pandas.compat import (
1518
get_lzma_file,
1619
import_lzma,
@@ -277,7 +280,7 @@ def can_connect(url, error_classes=None):
277280

278281

279282
def round_trip_pickle(
280-
obj: Any, path: FilePathOrBuffer | None = None
283+
obj: Any, path: FilePath | ReadPickleBuffer | None = None
281284
) -> DataFrame | Series:
282285
"""
283286
Pickle an object and then read it again.

pandas/_typing.py

+74-11
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,24 @@
1+
from __future__ import annotations
2+
13
from datetime import (
24
datetime,
35
timedelta,
46
tzinfo,
57
)
6-
from io import (
7-
BufferedIOBase,
8-
RawIOBase,
9-
TextIOBase,
10-
)
11-
from mmap import mmap
128
from os import PathLike
139
from typing import (
14-
IO,
1510
TYPE_CHECKING,
1611
Any,
17-
AnyStr,
1812
Callable,
1913
Collection,
2014
Dict,
2115
Hashable,
16+
Iterator,
2217
List,
2318
Literal,
2419
Mapping,
2520
Optional,
21+
Protocol,
2622
Sequence,
2723
Tuple,
2824
Type as type_t,
@@ -169,9 +165,76 @@
169165
PythonFuncType = Callable[[Any], Any]
170166

171167
# filenames and file-like-objects
172-
Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, mmap]
173-
FileOrBuffer = Union[str, Buffer[AnyStr]]
174-
FilePathOrBuffer = Union["PathLike[str]", FileOrBuffer[AnyStr]]
168+
AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True)
169+
AnyStr_con = TypeVar("AnyStr_con", str, bytes, contravariant=True)
170+
171+
172+
class BaseBuffer(Protocol):
173+
@property
174+
def mode(self) -> str:
175+
# for _get_filepath_or_buffer
176+
...
177+
178+
def fileno(self) -> int:
179+
# for _MMapWrapper
180+
...
181+
182+
def seek(self, __offset: int, __whence: int = ...) -> int:
183+
# with one argument: gzip.GzipFile, bz2.BZ2File
184+
# with two arguments: zip.ZipFile, read_sas
185+
...
186+
187+
def seekable(self) -> bool:
188+
# for bz2.BZ2File
189+
...
190+
191+
def tell(self) -> int:
192+
# for zip.ZipFile, read_stata, to_stata
193+
...
194+
195+
196+
class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]):
197+
def read(self, __n: int | None = ...) -> AnyStr_cov:
198+
# for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
199+
...
200+
201+
202+
class WriteBuffer(BaseBuffer, Protocol[AnyStr_con]):
203+
def write(self, __b: AnyStr_con) -> Any:
204+
# for gzip.GzipFile, bz2.BZ2File
205+
...
206+
207+
def flush(self) -> Any:
208+
# for gzip.GzipFile, bz2.BZ2File
209+
...
210+
211+
212+
class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
213+
def readline(self) -> AnyStr_cov:
214+
...
215+
216+
217+
class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
218+
def truncate(self, size: int | None = ...) -> int:
219+
...
220+
221+
222+
class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol):
223+
def __iter__(self) -> Iterator[AnyStr_cov]:
224+
# for engine=python
225+
...
226+
227+
def readline(self) -> AnyStr_cov:
228+
# for engine=python
229+
...
230+
231+
@property
232+
def closed(self) -> bool:
233+
# for enine=pyarrow
234+
...
235+
236+
237+
FilePath = Union[str, "PathLike[str]"]
175238

176239
# for arbitrary kwargs passed during reading/writing files
177240
StorageOptions = Optional[Dict[str, Any]]

pandas/core/frame.py

+27-29
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import functools
1717
from io import StringIO
1818
import itertools
19-
import mmap
2019
from textwrap import dedent
2120
from typing import (
2221
IO,
@@ -55,7 +54,7 @@
5554
CompressionOptions,
5655
Dtype,
5756
DtypeObj,
58-
FilePathOrBuffer,
57+
FilePath,
5958
FillnaOptions,
6059
FloatFormatType,
6160
FormattersType,
@@ -71,6 +70,7 @@
7170
TimedeltaConvertibleTypes,
7271
TimestampConvertibleTypes,
7372
ValueKeyFunc,
73+
WriteBuffer,
7474
npt,
7575
)
7676
from pandas.compat._optional import import_optional_dependency
@@ -1056,7 +1056,7 @@ def _repr_html_(self) -> str | None:
10561056
@Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
10571057
def to_string(
10581058
self,
1059-
buf: FilePathOrBuffer[str] | None = None,
1059+
buf: FilePath | WriteBuffer[str] | None = None,
10601060
columns: Sequence[str] | None = None,
10611061
col_space: int | None = None,
10621062
header: bool | Sequence[str] = True,
@@ -2432,7 +2432,7 @@ def _from_arrays(
24322432
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
24332433
def to_stata(
24342434
self,
2435-
path: FilePathOrBuffer,
2435+
path: FilePath | WriteBuffer[bytes],
24362436
convert_dates: dict[Hashable, str] | None = None,
24372437
write_index: bool = True,
24382438
byteorder: str | None = None,
@@ -2454,11 +2454,9 @@ def to_stata(
24542454
24552455
Parameters
24562456
----------
2457-
path : str, buffer or path object
2458-
String, path object (pathlib.Path or py._path.local.LocalPath) or
2459-
object implementing a binary write() function. If using a buffer
2460-
then the buffer will not be automatically closed after the file
2461-
data has been written.
2457+
path : str, path object, or buffer
2458+
String, path object (implementing ``os.PathLike[str]``), or file-like
2459+
object implementing a binary ``write()`` function.
24622460
24632461
.. versionchanged:: 1.0.0
24642462
@@ -2600,14 +2598,16 @@ def to_stata(
26002598
writer.write_file()
26012599

26022600
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
2603-
def to_feather(self, path: FilePathOrBuffer[bytes], **kwargs) -> None:
2601+
def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None:
26042602
"""
26052603
Write a DataFrame to the binary Feather format.
26062604
26072605
Parameters
26082606
----------
2609-
path : str or file-like object
2610-
If a string, it will be used as Root Directory path.
2607+
path : str, path object, file-like object
2608+
String, path object (implementing ``os.PathLike[str]``), or file-like
2609+
object implementing a binary ``write()`` function. If a string or a path,
2610+
it will be used as Root Directory path when writing a partitioned dataset.
26112611
**kwargs :
26122612
Additional keywords passed to :func:`pyarrow.feather.write_feather`.
26132613
Starting with pyarrow 0.17, this includes the `compression`,
@@ -2677,15 +2677,14 @@ def to_markdown(
26772677
return result
26782678

26792679
with get_handle(buf, mode, storage_options=storage_options) as handles:
2680-
assert not isinstance(handles.handle, (str, mmap.mmap))
2681-
handles.handle.writelines(result)
2680+
handles.handle.write(result)
26822681
return None
26832682

26842683
@doc(storage_options=generic._shared_docs["storage_options"])
26852684
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
26862685
def to_parquet(
26872686
self,
2688-
path: FilePathOrBuffer | None = None,
2687+
path: FilePath | WriteBuffer[bytes] | None = None,
26892688
engine: str = "auto",
26902689
compression: str | None = "snappy",
26912690
index: bool | None = None,
@@ -2703,13 +2702,11 @@ def to_parquet(
27032702
27042703
Parameters
27052704
----------
2706-
path : str or file-like object, default None
2707-
If a string, it will be used as Root Directory path
2708-
when writing a partitioned dataset. By file-like object,
2709-
we refer to objects with a write() method, such as a file handle
2710-
(e.g. via builtin open function) or io.BytesIO. The engine
2711-
fastparquet does not accept file-like objects. If path is None,
2712-
a bytes object is returned.
2705+
path : str, path object, file-like object, or None, default None
2706+
String, path object (implementing ``os.PathLike[str]``), or file-like
2707+
object implementing a binary ``write()`` function. If None, the result is
2708+
returned as bytes. If a string or path, it will be used as Root Directory
2709+
path when writing a partitioned dataset.
27132710
27142711
.. versionchanged:: 1.2.0
27152712
@@ -2804,7 +2801,7 @@ def to_parquet(
28042801
@Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
28052802
def to_html(
28062803
self,
2807-
buf: FilePathOrBuffer[str] | None = None,
2804+
buf: FilePath | WriteBuffer[str] | None = None,
28082805
columns: Sequence[str] | None = None,
28092806
col_space: ColspaceArgType | None = None,
28102807
header: bool | Sequence[str] = True,
@@ -2891,7 +2888,7 @@ def to_html(
28912888
@doc(storage_options=generic._shared_docs["storage_options"])
28922889
def to_xml(
28932890
self,
2894-
path_or_buffer: FilePathOrBuffer | None = None,
2891+
path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
28952892
index: bool = True,
28962893
root_name: str | None = "data",
28972894
row_name: str | None = "row",
@@ -2904,7 +2901,7 @@ def to_xml(
29042901
xml_declaration: bool | None = True,
29052902
pretty_print: bool | None = True,
29062903
parser: str | None = "lxml",
2907-
stylesheet: FilePathOrBuffer | None = None,
2904+
stylesheet: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
29082905
compression: CompressionOptions = "infer",
29092906
storage_options: StorageOptions = None,
29102907
) -> str | None:
@@ -2915,9 +2912,10 @@ def to_xml(
29152912
29162913
Parameters
29172914
----------
2918-
path_or_buffer : str, path object or file-like object, optional
2919-
File to write output to. If None, the output is returned as a
2920-
string.
2915+
path_or_buffer : str, path object, file-like object, or None, default None
2916+
String, path object (implementing ``os.PathLike[str]``), or file-like
2917+
object implementing a ``write()`` function. If None, the result is returned
2918+
as a string.
29212919
index : bool, default True
29222920
Whether to include index in XML document.
29232921
root_name : str, default 'data'
@@ -3211,7 +3209,7 @@ def to_xml(
32113209
def info(
32123210
self,
32133211
verbose: bool | None = None,
3214-
buf: IO[str] | None = None,
3212+
buf: WriteBuffer[str] | None = None,
32153213
max_cols: int | None = None,
32163214
memory_usage: bool | str | None = None,
32173215
show_counts: bool | None = None,

pandas/core/generic.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from typing import (
1313
TYPE_CHECKING,
1414
Any,
15-
AnyStr,
1615
Callable,
1716
Hashable,
1817
Literal,
@@ -44,7 +43,7 @@
4443
Dtype,
4544
DtypeArg,
4645
DtypeObj,
47-
FilePathOrBuffer,
46+
FilePath,
4847
IndexKeyFunc,
4948
IndexLabel,
5049
JSONSerializable,
@@ -58,6 +57,7 @@
5857
TimedeltaConvertibleTypes,
5958
TimestampConvertibleTypes,
6059
ValueKeyFunc,
60+
WriteBuffer,
6161
npt,
6262
)
6363
from pandas.compat._optional import import_optional_dependency
@@ -2332,7 +2332,7 @@ def to_excel(
23322332
@doc(storage_options=_shared_docs["storage_options"])
23332333
def to_json(
23342334
self,
2335-
path_or_buf: FilePathOrBuffer | None = None,
2335+
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
23362336
orient: str | None = None,
23372337
date_format: str | None = None,
23382338
double_precision: int = 10,
@@ -2353,9 +2353,10 @@ def to_json(
23532353
23542354
Parameters
23552355
----------
2356-
path_or_buf : str or file handle, optional
2357-
File path or object. If not specified, the result is returned as
2358-
a string.
2356+
path_or_buf : str, path object, file-like object, or None, default None
2357+
String, path object (implementing os.PathLike[str]), or file-like
2358+
object implementing a write() function. If None, the result is
2359+
returned as a string.
23592360
orient : str
23602361
Indication of expected JSON string format.
23612362
@@ -3337,7 +3338,7 @@ def to_latex(
33373338
@doc(storage_options=_shared_docs["storage_options"])
33383339
def to_csv(
33393340
self,
3340-
path_or_buf: FilePathOrBuffer[AnyStr] | None = None,
3341+
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
33413342
sep: str = ",",
33423343
na_rep: str = "",
33433344
float_format: str | None = None,
@@ -3364,10 +3365,11 @@ def to_csv(
33643365
33653366
Parameters
33663367
----------
3367-
path_or_buf : str or file handle, default None
3368-
File path or object, if None is provided the result is returned as
3369-
a string. If a non-binary file object is passed, it should be opened
3370-
with `newline=''`, disabling universal newlines. If a binary
3368+
path_or_buf : str, path object, file-like object, or None, default None
3369+
String, path object (implementing os.PathLike[str]), or file-like
3370+
object implementing a write() function. If None, the result is
3371+
returned as a string. If a non-binary file object is passed, it should
3372+
be opened with `newline=''`, disabling universal newlines. If a binary
33713373
file object is passed, `mode` might need to contain a `'b'`.
33723374
33733375
.. versionchanged:: 1.2.0

0 commit comments

Comments
 (0)