Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TYP: Use Protocols for file-like objects in read/to_* #43951

Merged
merged 5 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pandas/_testing/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
)
import zipfile

from pandas._typing import FilePathOrBuffer
from pandas._typing import (
FilePath,
ReadPickleBuffer,
)
from pandas.compat import (
get_lzma_file,
import_lzma,
Expand Down Expand Up @@ -277,7 +280,7 @@ def can_connect(url, error_classes=None):


def round_trip_pickle(
obj: Any, path: FilePathOrBuffer | None = None
obj: Any, path: FilePath | ReadPickleBuffer | None = None
) -> DataFrame | Series:
"""
Pickle an object and then read it again.
Expand Down
85 changes: 74 additions & 11 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,24 @@
from __future__ import annotations

from datetime import (
datetime,
timedelta,
tzinfo,
)
from io import (
BufferedIOBase,
RawIOBase,
TextIOBase,
)
from mmap import mmap
from os import PathLike
from typing import (
IO,
TYPE_CHECKING,
Any,
AnyStr,
Callable,
Collection,
Dict,
Hashable,
Iterator,
List,
Literal,
Mapping,
Optional,
Protocol,
Sequence,
Tuple,
Type as type_t,
Expand Down Expand Up @@ -169,9 +165,76 @@
PythonFuncType = Callable[[Any], Any]

# filenames and file-like-objects
Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, mmap]
FileOrBuffer = Union[str, Buffer[AnyStr]]
FilePathOrBuffer = Union["PathLike[str]", FileOrBuffer[AnyStr]]
AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True)
AnyStr_con = TypeVar("AnyStr_con", str, bytes, contravariant=True)


class BaseBuffer(Protocol):
@property
def mode(self) -> str:
# for _get_filepath_or_buffer
...

def fileno(self) -> int:
# for _MMapWrapper
...

def seek(self, __offset: int, __whence: int = ...) -> int:
# with one argument: gzip.GzipFile, bz2.BZ2File
# with two arguments: zip.ZipFile, read_sas
...

def seekable(self) -> bool:
# for bz2.BZ2File
...

def tell(self) -> int:
# for zip.ZipFile, read_stata, to_stata
...


class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]):
def read(self, __n: int | None = ...) -> AnyStr_cov:
# for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
...


class WriteBuffer(BaseBuffer, Protocol[AnyStr_con]):
def write(self, __b: AnyStr_con) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...

def flush(self) -> Any:
# for gzip.GzipFile, bz2.BZ2File
...


class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
def readline(self) -> AnyStr_cov:
...


class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
def truncate(self, size: int | None = ...) -> int:
...


class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol):
def __iter__(self) -> Iterator[AnyStr_cov]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not for the othe rengines?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The c-engine is fine with ReadBuffer. I'll test whether ReadBuffer is also sufficient for the pyarrow engine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pyarrow needs closed

# for engine=python
...

def readline(self) -> AnyStr_cov:
# for engine=python
...

@property
def closed(self) -> bool:
# for enine=pyarrow
...


FilePath = Union[str, "PathLike[str]"]

# for arbitrary kwargs passed during reading/writing files
StorageOptions = Optional[Dict[str, Any]]
Expand Down
56 changes: 27 additions & 29 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import functools
from io import StringIO
import itertools
import mmap
from textwrap import dedent
from typing import (
IO,
Expand Down Expand Up @@ -55,7 +54,7 @@
CompressionOptions,
Dtype,
DtypeObj,
FilePathOrBuffer,
FilePath,
FillnaOptions,
FloatFormatType,
FormattersType,
Expand All @@ -71,6 +70,7 @@
TimedeltaConvertibleTypes,
TimestampConvertibleTypes,
ValueKeyFunc,
WriteBuffer,
npt,
)
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -1056,7 +1056,7 @@ def _repr_html_(self) -> str | None:
@Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
def to_string(
self,
buf: FilePathOrBuffer[str] | None = None,
buf: FilePath | WriteBuffer[str] | None = None,
columns: Sequence[str] | None = None,
col_space: int | None = None,
header: bool | Sequence[str] = True,
Expand Down Expand Up @@ -2432,7 +2432,7 @@ def _from_arrays(
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
def to_stata(
self,
path: FilePathOrBuffer,
path: FilePath | WriteBuffer[bytes],
convert_dates: dict[Hashable, str] | None = None,
write_index: bool = True,
byteorder: str | None = None,
Expand All @@ -2454,11 +2454,9 @@ def to_stata(

Parameters
----------
path : str, buffer or path object
String, path object (pathlib.Path or py._path.local.LocalPath) or
object implementing a binary write() function. If using a buffer
then the buffer will not be automatically closed after the file
data has been written.
path : str, path object, or buffer
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``write()`` function.

.. versionchanged:: 1.0.0

Expand Down Expand Up @@ -2600,14 +2598,16 @@ def to_stata(
writer.write_file()

@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
def to_feather(self, path: FilePathOrBuffer[bytes], **kwargs) -> None:
def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None:
"""
Write a DataFrame to the binary Feather format.

Parameters
----------
path : str or file-like object
If a string, it will be used as Root Directory path.
path : str, path object, file-like object
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``write()`` function. If a string or a path,
it will be used as Root Directory path when writing a partitioned dataset.
**kwargs :
Additional keywords passed to :func:`pyarrow.feather.write_feather`.
Starting with pyarrow 0.17, this includes the `compression`,
Expand Down Expand Up @@ -2677,15 +2677,14 @@ def to_markdown(
return result

with get_handle(buf, mode, storage_options=storage_options) as handles:
assert not isinstance(handles.handle, (str, mmap.mmap))
handles.handle.writelines(result)
handles.handle.write(result)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result is a str, writelines worked because str also a Sequence.

return None

@doc(storage_options=generic._shared_docs["storage_options"])
@deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
def to_parquet(
self,
path: FilePathOrBuffer | None = None,
path: FilePath | WriteBuffer[bytes] | None = None,
engine: str = "auto",
compression: str | None = "snappy",
index: bool | None = None,
Expand All @@ -2703,13 +2702,11 @@ def to_parquet(

Parameters
----------
path : str or file-like object, default None
If a string, it will be used as Root Directory path
when writing a partitioned dataset. By file-like object,
we refer to objects with a write() method, such as a file handle
(e.g. via builtin open function) or io.BytesIO. The engine
fastparquet does not accept file-like objects. If path is None,
a bytes object is returned.
path : str, path object, file-like object, or None, default None
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``write()`` function. If None, the result is
returned as bytes. If a string or path, it will be used as Root Directory
path when writing a partitioned dataset.

.. versionchanged:: 1.2.0

Expand Down Expand Up @@ -2804,7 +2801,7 @@ def to_parquet(
@Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
def to_html(
self,
buf: FilePathOrBuffer[str] | None = None,
buf: FilePath | WriteBuffer[str] | None = None,
columns: Sequence[str] | None = None,
col_space: ColspaceArgType | None = None,
header: bool | Sequence[str] = True,
Expand Down Expand Up @@ -2891,7 +2888,7 @@ def to_html(
@doc(storage_options=generic._shared_docs["storage_options"])
def to_xml(
self,
path_or_buffer: FilePathOrBuffer | None = None,
path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
index: bool = True,
root_name: str | None = "data",
row_name: str | None = "row",
Expand All @@ -2904,7 +2901,7 @@ def to_xml(
xml_declaration: bool | None = True,
pretty_print: bool | None = True,
parser: str | None = "lxml",
stylesheet: FilePathOrBuffer | None = None,
stylesheet: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
compression: CompressionOptions = "infer",
storage_options: StorageOptions = None,
) -> str | None:
Expand All @@ -2915,9 +2912,10 @@ def to_xml(

Parameters
----------
path_or_buffer : str, path object or file-like object, optional
File to write output to. If None, the output is returned as a
string.
path_or_buffer : str, path object, file-like object, or None, default None
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a ``write()`` function. If None, the result is returned
as a string.
index : bool, default True
Whether to include index in XML document.
root_name : str, default 'data'
Expand Down Expand Up @@ -3211,7 +3209,7 @@ def to_xml(
def info(
self,
verbose: bool | None = None,
buf: IO[str] | None = None,
buf: WriteBuffer[str] | None = None,
max_cols: int | None = None,
memory_usage: bool | str | None = None,
show_counts: bool | None = None,
Expand Down
24 changes: 13 additions & 11 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import (
TYPE_CHECKING,
Any,
AnyStr,
Callable,
Hashable,
Literal,
Expand Down Expand Up @@ -44,7 +43,7 @@
Dtype,
DtypeArg,
DtypeObj,
FilePathOrBuffer,
FilePath,
IndexKeyFunc,
IndexLabel,
JSONSerializable,
Expand All @@ -58,6 +57,7 @@
TimedeltaConvertibleTypes,
TimestampConvertibleTypes,
ValueKeyFunc,
WriteBuffer,
npt,
)
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -2332,7 +2332,7 @@ def to_excel(
@doc(storage_options=_shared_docs["storage_options"])
def to_json(
self,
path_or_buf: FilePathOrBuffer | None = None,
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
orient: str | None = None,
date_format: str | None = None,
double_precision: int = 10,
Expand All @@ -2353,9 +2353,10 @@ def to_json(

Parameters
----------
path_or_buf : str or file handle, optional
File path or object. If not specified, the result is returned as
a string.
path_or_buf : str, path object, file-like object, or None, default None
String, path object (implementing os.PathLike[str]), or file-like
object implementing a write() function. If None, the result is
returned as a string.
orient : str
Indication of expected JSON string format.

Expand Down Expand Up @@ -3337,7 +3338,7 @@ def to_latex(
@doc(storage_options=_shared_docs["storage_options"])
def to_csv(
self,
path_or_buf: FilePathOrBuffer[AnyStr] | None = None,
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
sep: str = ",",
na_rep: str = "",
float_format: str | None = None,
Expand All @@ -3364,10 +3365,11 @@ def to_csv(

Parameters
----------
path_or_buf : str or file handle, default None
File path or object, if None is provided the result is returned as
a string. If a non-binary file object is passed, it should be opened
with `newline=''`, disabling universal newlines. If a binary
path_or_buf : str, path object, file-like object, or None, default None
String, path object (implementing os.PathLike[str]), or file-like
object implementing a write() function. If None, the result is
returned as a string. If a non-binary file object is passed, it should
be opened with `newline=''`, disabling universal newlines. If a binary
file object is passed, `mode` might need to contain a `'b'`.

.. versionchanged:: 1.2.0
Expand Down
Loading