From e2305001c8d3e62ee1c5d95af8dd85c6a8f78228 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 26 Mar 2019 12:15:56 -0700 Subject: [PATCH 01/15] Initial typing module --- pandas/typing.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 pandas/typing.py diff --git a/pandas/typing.py b/pandas/typing.py new file mode 100644 index 0000000000000..cd23cbe38ecca --- /dev/null +++ b/pandas/typing.py @@ -0,0 +1,14 @@ +from datetime import datetime, timedelta +from pathlib import Path +from typing import AnyStr, IO, Union + +from pandas._libs.tslibs import Period, NaT, Timedelta, Timestamp + + +DateTimeLike = Union[datetime, timedelta, Period, Timedelta, Timestamp] + + +NullableDateTimeLike = Union[NaT, DateTimeLike] + + +FilePathOrBuffer = Union[str, Path, IO[AnyStr]] From 49c0c69b24434555375a9dc7493526ed01d63451 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 26 Mar 2019 20:55:26 -0700 Subject: [PATCH 02/15] Added FilePathOrBuffer type to io.parsers --- pandas/io/parsers.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5400d9bc60218..eebc3d43c4dd7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -46,6 +46,8 @@ is_file_like) from pandas.io.date_converters import generic_parser +from pandas.typing import FilePathOrBuffer + # BOM character (byte order mark) # This exists at the beginning of a file to indicate endianness # of a file (stream). Unfortunately, this marker screws up parsing, @@ -402,7 +404,7 @@ def _validate_names(names): return names -def _read(filepath_or_buffer, kwds): +def _read(filepath_or_buffer: FilePathOrBuffer, kwds): """Generic reader of line files.""" encoding = kwds.get('encoding', None) if encoding is not None: @@ -535,7 +537,7 @@ def _make_parser_function(name, default_sep=','): else: sep = default_sep - def parser_f(filepath_or_buffer, + def parser_f(filepath_or_buffer: FilePathOrBuffer, sep=sep, delimiter=None, @@ -727,8 +729,11 @@ def parser_f(filepath_or_buffer, )(read_table) -def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, - infer_nrows=100, **kwds): +def read_fwf(filepath_or_buffer: FilePathOrBuffer, + colspecs='infer', + widths=None, + infer_nrows=100, + **kwds): r""" Read a table of fixed-width formatted lines into DataFrame. From 766ad86a965f132d36499296dd541b57ce19066e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 26 Mar 2019 21:13:58 -0700 Subject: [PATCH 03/15] Added FilePathOrBuffer to io.common --- pandas/io/common.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index c1cacf39c5b08..074e734fd14fa 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -17,6 +17,8 @@ from pandas.io.formats.printing import pprint_thing +from pandas.typing import FilePathOrBuffer + # gh-12665: Alias for now and remove later. CParserError = ParserError @@ -91,7 +93,7 @@ def _is_url(url): return False -def _expand_user(filepath_or_buffer): +def _expand_user(filepath_or_buffer: FilePathOrBuffer): """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -117,7 +119,7 @@ def _validate_header_arg(header): "the row(s) making up the column names") -def _stringify_path(filepath_or_buffer): +def _stringify_path(filepath_or_buffer: FilePathOrBuffer): """Attempt to convert a path-like object to a string. Parameters @@ -176,8 +178,10 @@ def is_gcs_url(url): return False -def get_filepath_or_buffer(filepath_or_buffer, encoding=None, - compression=None, mode=None): +def get_filepath_or_buffer(filepath_or_buffer: FilePathOrBuffer, + encoding=None, + compression=None, + mode=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. @@ -257,7 +261,8 @@ def file_path_to_url(path): } -def _infer_compression(filepath_or_buffer, compression): +def _infer_compression(filepath_or_buffer: FilePathOrBuffer, + compression): """ Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input @@ -311,7 +316,8 @@ def _infer_compression(filepath_or_buffer, compression): raise ValueError(msg) -def _get_handle(path_or_buf, mode, encoding=None, compression=None, +def _get_handle(path_or_buf: FilePathOrBuffer, + mode, encoding=None, compression=None, memory_map=False, is_text=True): """ Get file handle for given path/buffer and mode. From 8f4973cf75153692cac3c2493e18abbc38efdcea Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 26 Mar 2019 21:15:39 -0700 Subject: [PATCH 04/15] Added cast to suppress mypy errors --- pandas/io/parsers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index eebc3d43c4dd7..3af4e5efca52c 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -11,6 +11,7 @@ import sys from textwrap import fill import warnings +from typing import cast, IO import numpy as np @@ -442,7 +443,8 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): if should_close: try: - filepath_or_buffer.close() + # See MyPy GH issue 1424 + cast(IO, filepath_or_buffer).close() except ValueError: pass From b621f325d6297a6104534379e6d8d440942dca09 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 27 Mar 2019 09:57:43 -0700 Subject: [PATCH 05/15] Removed DateTimeLike types --- pandas/typing.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/typing.py b/pandas/typing.py index cd23cbe38ecca..6a2056981e733 100644 --- a/pandas/typing.py +++ b/pandas/typing.py @@ -1,14 +1,5 @@ -from datetime import datetime, timedelta from pathlib import Path from typing import AnyStr, IO, Union -from pandas._libs.tslibs import Period, NaT, Timedelta, Timestamp - - -DateTimeLike = Union[datetime, timedelta, Period, Timedelta, Timestamp] - - -NullableDateTimeLike = Union[NaT, DateTimeLike] - FilePathOrBuffer = Union[str, Path, IO[AnyStr]] From 6b9f0eb9c6de0a1ef3d2ea742040f99e41a7044b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 27 Mar 2019 09:59:09 -0700 Subject: [PATCH 06/15] Removed type hints from pandas.io.common --- pandas/io/common.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 074e734fd14fa..c1cacf39c5b08 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -17,8 +17,6 @@ from pandas.io.formats.printing import pprint_thing -from pandas.typing import FilePathOrBuffer - # gh-12665: Alias for now and remove later. CParserError = ParserError @@ -93,7 +91,7 @@ def _is_url(url): return False -def _expand_user(filepath_or_buffer: FilePathOrBuffer): +def _expand_user(filepath_or_buffer): """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -119,7 +117,7 @@ def _validate_header_arg(header): "the row(s) making up the column names") -def _stringify_path(filepath_or_buffer: FilePathOrBuffer): +def _stringify_path(filepath_or_buffer): """Attempt to convert a path-like object to a string. Parameters @@ -178,10 +176,8 @@ def is_gcs_url(url): return False -def get_filepath_or_buffer(filepath_or_buffer: FilePathOrBuffer, - encoding=None, - compression=None, - mode=None): +def get_filepath_or_buffer(filepath_or_buffer, encoding=None, + compression=None, mode=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. @@ -261,8 +257,7 @@ def file_path_to_url(path): } -def _infer_compression(filepath_or_buffer: FilePathOrBuffer, - compression): +def _infer_compression(filepath_or_buffer, compression): """ Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input @@ -316,8 +311,7 @@ def _infer_compression(filepath_or_buffer: FilePathOrBuffer, raise ValueError(msg) -def _get_handle(path_or_buf: FilePathOrBuffer, - mode, encoding=None, compression=None, +def _get_handle(path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True): """ Get file handle for given path/buffer and mode. From b17ad4630ff552ad229fbabbf9eade4ef0bcc172 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 27 Mar 2019 10:34:46 -0700 Subject: [PATCH 07/15] Removed local variable shadowing, refactored for new types --- pandas/io/gcs.py | 3 ++- pandas/io/parsers.py | 13 ++++++++----- pandas/io/s3.py | 3 ++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/io/gcs.py b/pandas/io/gcs.py index aa1cb648f05d1..89dade27ad543 100644 --- a/pandas/io/gcs.py +++ b/pandas/io/gcs.py @@ -12,5 +12,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, mode = 'rb' fs = gcsfs.GCSFileSystem() - filepath_or_buffer = fs.open(filepath_or_buffer, mode) + filepath_or_buffer = fs.open( + filepath_or_buffer, mode) # type: gcsfs.GCSFile return filepath_or_buffer, None, compression, True diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4b3fd44b0be6f..fc4e62c2fde2e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -9,7 +9,6 @@ import sys from textwrap import fill import warnings -from typing import cast, IO import numpy as np @@ -412,7 +411,12 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): compression = kwds.get('compression', 'infer') compression = _infer_compression(filepath_or_buffer, compression) - filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( + + # TODO: get_filepath_or_buffer could return + # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] + # though mypy handling of conditional imports is difficult. + # See https://github.com/python/mypy/issues/1297 + fp_or_buf, _, compression, should_close = get_filepath_or_buffer( filepath_or_buffer, encoding, compression) kwds['compression'] = compression @@ -429,7 +433,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): _validate_names(kwds.get("names", None)) # Create the parser. - parser = TextFileReader(filepath_or_buffer, **kwds) + parser = TextFileReader(fp_or_buf, **kwds) if chunksize or iterator: return parser @@ -441,8 +445,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): if should_close: try: - # See MyPy GH issue 1424 - cast(IO, filepath_or_buffer).close() + fp_or_buf.close() except ValueError: pass diff --git a/pandas/io/s3.py b/pandas/io/s3.py index bf86db279df5a..e7742946d3dd9 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -36,5 +36,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, # A NoCredentialsError is raised if you don't have creds # for that bucket. fs = s3fs.S3FileSystem(anon=True) - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) + filepath_or_buffer = fs.open( + _strip_schema(filepath_or_buffer), mode) # type: s3fs.S3File return filepath_or_buffer, None, compression, True From ed3fc589478e77f7c2f7754c9862de0b7152b5b5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 27 Mar 2019 10:47:46 -0700 Subject: [PATCH 08/15] Doc note for pandas.typing --- doc/source/whatsnew/v0.25.0.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 49b2349851479..541d477ea9523 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -18,6 +18,18 @@ What's New in 0.25.0 (April XX, 2019) These are the changes in pandas 0.25.0. See :ref:`release` for a full changelog including other versions of pandas. +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0250.enhancements.typing: + +Type Hints and ``pandas.typing`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In accordance with :pep:`484` pandas has introduced Type Hints and a new ``pandas.typing`` module containing aliases for idiomatic pandas types into the code base. We will be continually adding annotations to the code base to improve readability, reduce code maintenance and proactively identify bugs. + +`MyPy `__ has been configured as part of our CI to perform compile-time type checking. + .. _whatsnew_0250.enhancements.other: From 34c00bf219f2bf8fe415069de79e65d4f5234755 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 27 Mar 2019 13:55:25 -0700 Subject: [PATCH 09/15] Added typing to exposed API test --- pandas/tests/api/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 8a0a27a71784c..edb2c01d32987 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -27,7 +27,7 @@ class TestPDApi(Base): # top-level sub-packages lib = ['api', 'arrays', 'compat', 'core', 'errors', 'pandas', - 'plotting', 'test', 'testing', 'tseries', + 'plotting', 'test', 'testing', 'tseries', 'typing', 'util', 'options', 'io'] # these are already deprecated; awaiting removal From 3daf02fff867a7fdebc5c13271c1aa036d31276f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 27 Mar 2019 15:30:04 -0700 Subject: [PATCH 10/15] isort fixup --- pandas/io/parsers.py | 3 +-- pandas/typing.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index fc4e62c2fde2e..5311887c61df1 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -37,6 +37,7 @@ Index, MultiIndex, RangeIndex, ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.tools import datetimes as tools +from pandas.typing import FilePathOrBuffer from pandas.io.common import ( _NA_VALUES, BaseIterator, UnicodeReader, UTF8Recoder, _get_handle, @@ -44,8 +45,6 @@ is_file_like) from pandas.io.date_converters import generic_parser -from pandas.typing import FilePathOrBuffer - # BOM character (byte order mark) # This exists at the beginning of a file to indicate endianness # of a file (stream). Unfortunately, this marker screws up parsing, diff --git a/pandas/typing.py b/pandas/typing.py index 6a2056981e733..5225be33d7604 100644 --- a/pandas/typing.py +++ b/pandas/typing.py @@ -1,5 +1,4 @@ from pathlib import Path -from typing import AnyStr, IO, Union - +from typing import IO, AnyStr, Union FilePathOrBuffer = Union[str, Path, IO[AnyStr]] From 3ff41d322ab9efc7cf012698b6f700a3af44a0eb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 28 Mar 2019 15:56:22 -0700 Subject: [PATCH 11/15] Privated typing module --- pandas/{typing.py => _typing.py} | 0 pandas/io/parsers.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pandas/{typing.py => _typing.py} (100%) diff --git a/pandas/typing.py b/pandas/_typing.py similarity index 100% rename from pandas/typing.py rename to pandas/_typing.py diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 87252e8b0dea8..316d1326eea83 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -30,6 +30,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.missing import isna +from pandas._typing import FilePathOrBuffer from pandas.core import algorithms from pandas.core.arrays import Categorical from pandas.core.frame import DataFrame @@ -37,7 +38,6 @@ Index, MultiIndex, RangeIndex, ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.tools import datetimes as tools -from pandas.typing import FilePathOrBuffer from pandas.io.common import ( _NA_VALUES, BaseIterator, UnicodeReader, UTF8Recoder, _get_handle, From 133e045fde56d01291c339a25cbcefde8a7046d3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 28 Mar 2019 15:58:21 -0700 Subject: [PATCH 12/15] Updated docs --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index a6c146e0dd0b4..97291fa9c9ff6 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -26,7 +26,7 @@ Enhancements Type Hints and ``pandas.typing`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In accordance with :pep:`484` pandas has introduced Type Hints and a new ``pandas.typing`` module containing aliases for idiomatic pandas types into the code base. We will be continually adding annotations to the code base to improve readability, reduce code maintenance and proactively identify bugs. +In accordance with :pep:`484` pandas has introduced Type Hints and a new ``pandas._typing`` module containing aliases for idiomatic pandas types into the code base. We will be continually adding annotations to the code base to improve readability, reduce code maintenance and proactively identify bugs. Note that ``pandas._typing`` is currently private as it is developmental and subject to change, though this will eventually be exposed as ``pandas.typing`` for third party libraries to leverage when type checking their own code which interfaces with pandas. `MyPy `__ has been configured as part of our CI to perform compile-time type checking. From 688f705bbaf5c99682dd2366c800ffabbf1f13b0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 28 Mar 2019 16:40:22 -0700 Subject: [PATCH 13/15] Privatized typing in test_api --- pandas/tests/api/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index edb2c01d32987..62c13698662f7 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -27,7 +27,7 @@ class TestPDApi(Base): # top-level sub-packages lib = ['api', 'arrays', 'compat', 'core', 'errors', 'pandas', - 'plotting', 'test', 'testing', 'tseries', 'typing', + 'plotting', 'test', 'testing', 'tseries', '_typing', 'util', 'options', 'io'] # these are already deprecated; awaiting removal From 7bd5a624f9d7095a88d648ff195a710b743ac831 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 28 Mar 2019 19:41:47 -0700 Subject: [PATCH 14/15] Reset changes to test_api --- pandas/tests/api/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 62c13698662f7..8a0a27a71784c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -27,7 +27,7 @@ class TestPDApi(Base): # top-level sub-packages lib = ['api', 'arrays', 'compat', 'core', 'errors', 'pandas', - 'plotting', 'test', 'testing', 'tseries', '_typing', + 'plotting', 'test', 'testing', 'tseries', 'util', 'options', 'io'] # these are already deprecated; awaiting removal From 7e31345eb573f51718047d131e820370b7a54355 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 29 Mar 2019 09:44:11 -0700 Subject: [PATCH 15/15] Reverted docstring changes --- doc/source/whatsnew/v0.25.0.rst | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 97291fa9c9ff6..99b57e2427509 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -18,18 +18,6 @@ What's New in 0.25.0 (April XX, 2019) These are the changes in pandas 0.25.0. See :ref:`release` for a full changelog including other versions of pandas. -Enhancements -~~~~~~~~~~~~ - -.. _whatsnew_0250.enhancements.typing: - -Type Hints and ``pandas.typing`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In accordance with :pep:`484` pandas has introduced Type Hints and a new ``pandas._typing`` module containing aliases for idiomatic pandas types into the code base. We will be continually adding annotations to the code base to improve readability, reduce code maintenance and proactively identify bugs. Note that ``pandas._typing`` is currently private as it is developmental and subject to change, though this will eventually be exposed as ``pandas.typing`` for third party libraries to leverage when type checking their own code which interfaces with pandas. - -`MyPy `__ has been configured as part of our CI to perform compile-time type checking. - .. _whatsnew_0250.enhancements.other: