From 493b0d2fd40ce8a394e8239f918433b4ccbd7828 Mon Sep 17 00:00:00 2001 From: Richard Date: Tue, 28 Jul 2020 17:11:08 -0400 Subject: [PATCH 01/10] BUG: Attributes are lost when subsetting columns --- asv_bench/benchmarks/frame_ctor.py | 2 +- asv_bench/benchmarks/gil.py | 8 +- asv_bench/benchmarks/io/parsers.py | 2 +- asv_bench/benchmarks/tslibs/normalize.py | 2 +- ci/code_checks.sh | 2 +- ci/deps/azure-36-32bit.yaml | 2 +- ci/deps/azure-36-locale.yaml | 2 +- ci/deps/azure-36-locale_slow.yaml | 2 +- ci/deps/azure-36-slow.yaml | 2 +- ci/deps/azure-37-locale.yaml | 2 +- ci/deps/azure-37-numpydev.yaml | 2 +- ci/deps/azure-macos-36.yaml | 2 +- ci/deps/azure-windows-36.yaml | 2 +- ci/deps/azure-windows-37.yaml | 2 +- ci/deps/travis-36-cov.yaml | 2 +- ci/deps/travis-36-locale.yaml | 2 +- ci/deps/travis-37-arm64.yaml | 2 +- ci/deps/travis-37.yaml | 2 +- ci/deps/travis-38.yaml | 2 +- doc/source/development/contributing.rst | 36 +++- doc/source/development/policies.rst | 2 +- doc/source/user_guide/style.ipynb | 2 +- doc/source/whatsnew/index.rst | 9 + doc/source/whatsnew/v1.1.1.rst | 54 ++++++ doc/source/whatsnew/v1.2.0.rst | 168 ++++++++++++++++++ environment.yml | 4 +- pandas/_config/config.py | 4 +- pandas/_libs/algos.pyx | 10 +- pandas/_libs/groupby.pyx | 48 +++-- pandas/_libs/hashing.pyx | 7 +- pandas/_libs/hashtable.pyx | 75 ++++---- pandas/_libs/index.pyx | 8 +- pandas/_libs/internals.pyx | 3 + pandas/_libs/interval.pyx | 16 +- pandas/_libs/join.pyx | 9 +- pandas/_libs/lib.pyx | 48 +++-- pandas/_libs/missing.pyx | 16 +- pandas/_libs/ops.pyx | 9 +- pandas/_libs/parsers.pyx | 65 ++++--- pandas/_libs/reduction.pyx | 9 +- pandas/_libs/reshape.pyx | 2 + pandas/_libs/sparse.pyx | 15 +- pandas/_libs/testing.pyx | 7 +- pandas/_libs/tslib.pyx | 19 +- pandas/_libs/tslibs/ccalendar.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 58 ++++-- pandas/_libs/tslibs/fields.pyx | 29 ++- pandas/_libs/tslibs/nattype.pyx | 22 ++- pandas/_libs/tslibs/np_datetime.pyx | 6 +- pandas/_libs/tslibs/offsets.pyx | 43 +++-- pandas/_libs/tslibs/parsing.pyx | 41 +++-- pandas/_libs/tslibs/period.pyx | 62 +++---- pandas/_libs/tslibs/strptime.pyx | 17 +- pandas/_libs/tslibs/timedeltas.pyx | 38 ++-- pandas/_libs/tslibs/timestamps.pyx | 62 ++++--- pandas/_libs/tslibs/timezones.pyx | 12 +- pandas/_libs/tslibs/tzconversion.pyx | 18 +- pandas/_libs/tslibs/vectorized.pyx | 9 +- pandas/_libs/window/aggregations.pyx | 9 +- pandas/_libs/window/indexers.pyx | 3 +- pandas/_libs/writers.pyx | 2 +- pandas/_testing.py | 8 +- pandas/_typing.py | 10 +- pandas/compat/pickle_compat.py | 2 +- pandas/core/apply.py | 2 +- pandas/core/arrays/categorical.py | 4 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/integer.py | 1 + pandas/core/arrays/interval.py | 1 + pandas/core/arrays/period.py | 1 + pandas/core/arrays/sparse/accessor.py | 7 +- pandas/core/config_init.py | 6 +- pandas/core/construction.py | 10 +- pandas/core/dtypes/cast.py | 3 +- pandas/core/dtypes/dtypes.py | 12 +- pandas/core/frame.py | 9 +- pandas/core/groupby/generic.py | 11 +- pandas/core/groupby/grouper.py | 1 - pandas/core/indexes/base.py | 2 +- pandas/core/internals/ops.py | 2 +- pandas/core/strings.py | 6 +- pandas/core/tools/datetimes.py | 5 +- pandas/core/util/hashing.py | 2 +- pandas/io/clipboard/__init__.py | 16 +- pandas/io/common.py | 29 ++- pandas/io/excel/_base.py | 2 +- pandas/io/excel/_odfreader.py | 4 +- pandas/io/excel/_openpyxl.py | 2 +- pandas/io/excel/_xlrd.py | 4 +- pandas/io/formats/format.py | 2 +- pandas/io/formats/style.py | 2 +- pandas/io/html.py | 2 +- pandas/io/pytables.py | 2 +- pandas/io/sas/sas.pyx | 2 +- pandas/io/sql.py | 19 +- pandas/plotting/_matplotlib/core.py | 2 +- pandas/plotting/_matplotlib/timeseries.py | 2 +- pandas/tests/api/test_api.py | 3 +- pandas/tests/arrays/interval/test_interval.py | 4 + pandas/tests/arrays/test_period.py | 3 + pandas/tests/frame/test_analytics.py | 4 +- pandas/tests/groupby/test_categorical.py | 10 +- pandas/tests/groupby/test_groupby.py | 28 +++ .../tests/groupby/transform/test_transform.py | 12 +- pandas/tests/indexes/common.py | 12 +- .../indexes/datetimes/test_constructors.py | 59 ++++++ .../tests/indexes/datetimes/test_datetime.py | 1 + .../indexing/multiindex/test_indexing_slow.py | 2 +- pandas/tests/indexing/test_loc.py | 3 + pandas/tests/io/test_fsspec.py | 2 +- pandas/tests/io/test_gcs.py | 9 +- pandas/tests/io/test_sql.py | 4 +- pandas/tests/plotting/common.py | 4 +- pandas/tests/plotting/test_frame.py | 8 +- pandas/tests/plotting/test_hist_method.py | 3 +- pandas/tests/plotting/test_misc.py | 10 +- pandas/tests/plotting/test_series.py | 3 +- pandas/tests/reshape/merge/test_join.py | 8 +- pandas/tests/series/indexing/test_datetime.py | 2 +- pandas/tests/series/methods/test_asof.py | 2 +- pandas/tests/series/test_arithmetic.py | 2 +- pandas/tests/test_algos.py | 7 +- pandas/tests/test_downstream.py | 2 +- pandas/tests/window/test_base_indexer.py | 4 +- pandas/tests/window/test_ewm.py | 2 +- pandas/util/_doctools.py | 2 +- pandas/util/_test_decorators.py | 18 +- requirements-dev.txt | 4 +- setup.cfg | 2 +- 130 files changed, 1049 insertions(+), 513 deletions(-) create mode 100644 doc/source/whatsnew/v1.1.1.rst create mode 100644 doc/source/whatsnew/v1.2.0.rst diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index dc6f45f810f3d..e0a2257b0ca1f 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -6,7 +6,7 @@ from .pandas_vb_common import tm try: - from pandas.tseries.offsets import Nano, Hour + from pandas.tseries.offsets import Hour, Nano except ImportError: # For compatibility with older versions from pandas.core.datetools import * # noqa diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index e266d871f5bc6..5d9070de92ec7 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -7,14 +7,14 @@ try: from pandas import ( - rolling_median, + rolling_kurt, + rolling_max, rolling_mean, + rolling_median, rolling_min, - rolling_max, - rolling_var, rolling_skew, - rolling_kurt, rolling_std, + rolling_var, ) have_rolling_methods = True diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py index ec3eddfff7184..5390056ba36f2 100644 --- a/asv_bench/benchmarks/io/parsers.py +++ b/asv_bench/benchmarks/io/parsers.py @@ -2,8 +2,8 @@ try: from pandas._libs.tslibs.parsing import ( - concat_date_cols, _does_string_look_like_datetime, + concat_date_cols, ) except ImportError: # Avoid whole benchmark suite import failure on asv (currently 0.4) diff --git a/asv_bench/benchmarks/tslibs/normalize.py b/asv_bench/benchmarks/tslibs/normalize.py index 7d4e0556f4d96..9a206410d8775 100644 --- a/asv_bench/benchmarks/tslibs/normalize.py +++ b/asv_bench/benchmarks/tslibs/normalize.py @@ -1,5 +1,5 @@ try: - from pandas._libs.tslibs import normalize_i8_timestamps, is_date_array_normalized + from pandas._libs.tslibs import is_date_array_normalized, normalize_i8_timestamps except ImportError: from pandas._libs.tslibs.conversion import ( normalize_i8_timestamps, diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7b12de387d648..69ce0f1adce22 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -121,7 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # Imports - Check formatting using isort see setup.cfg for settings MSG='Check import format using isort' ; echo $MSG - ISORT_CMD="isort --quiet --recursive --check-only pandas asv_bench scripts" + ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts" if [[ "$GITHUB_ACTIONS" == "true" ]]; then eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]})) else diff --git a/ci/deps/azure-36-32bit.yaml b/ci/deps/azure-36-32bit.yaml index 2dc53f8181ac4..15704cf0d5427 100644 --- a/ci/deps/azure-36-32bit.yaml +++ b/ci/deps/azure-36-32bit.yaml @@ -23,4 +23,4 @@ dependencies: - pip - pip: - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 diff --git a/ci/deps/azure-36-locale.yaml b/ci/deps/azure-36-locale.yaml index d31015fde4741..a9b9a5a47ccf5 100644 --- a/ci/deps/azure-36-locale.yaml +++ b/ci/deps/azure-36-locale.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - pytest-asyncio - hypothesis>=3.58.0 diff --git a/ci/deps/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml index 23121b985492e..c086b3651afc3 100644 --- a/ci/deps/azure-36-locale_slow.yaml +++ b/ci/deps/azure-36-locale_slow.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 - pytest-azurepipelines diff --git a/ci/deps/azure-36-slow.yaml b/ci/deps/azure-36-slow.yaml index 0a6d1d13c8549..87bad59fa4873 100644 --- a/ci/deps/azure-36-slow.yaml +++ b/ci/deps/azure-36-slow.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index 4dbb6a5344976..6f64c81f299d1 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -6,7 +6,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - pytest-asyncio - hypothesis>=3.58.0 diff --git a/ci/deps/azure-37-numpydev.yaml b/ci/deps/azure-37-numpydev.yaml index 451fb5884a4af..5cb58756a6ac1 100644 --- a/ci/deps/azure-37-numpydev.yaml +++ b/ci/deps/azure-37-numpydev.yaml @@ -5,7 +5,7 @@ dependencies: - python=3.7.* # tools - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 - pytest-azurepipelines diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml index 81a27465f9e61..eeea249a19ca1 100644 --- a/ci/deps/azure-macos-36.yaml +++ b/ci/deps/azure-macos-36.yaml @@ -5,7 +5,7 @@ dependencies: - python=3.6.* # tools - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 - pytest-azurepipelines diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml index 4d7e1d821037b..548660cabaa67 100644 --- a/ci/deps/azure-windows-36.yaml +++ b/ci/deps/azure-windows-36.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 - pytest-azurepipelines diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index 34fca631df6c1..5bbd0e2795d7e 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 - pytest-azurepipelines diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml index 5f5ea8034cddf..177e0d3f4c0af 100644 --- a/ci/deps/travis-36-cov.yaml +++ b/ci/deps/travis-36-cov.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 - pytest-cov # this is only needed in the coverage build diff --git a/ci/deps/travis-36-locale.yaml b/ci/deps/travis-36-locale.yaml index 6bc4aba733ee5..03a1e751b6a86 100644 --- a/ci/deps/travis-36-locale.yaml +++ b/ci/deps/travis-36-locale.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-37-arm64.yaml b/ci/deps/travis-37-arm64.yaml index f434a03609b26..5cb53489be225 100644 --- a/ci/deps/travis-37-arm64.yaml +++ b/ci/deps/travis-37-arm64.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.13 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml index aaf706d61fe5c..e896233aac63c 100644 --- a/ci/deps/travis-37.yaml +++ b/ci/deps/travis-37.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index ac39a223cd086..b879c0f81dab2 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -7,7 +7,7 @@ dependencies: # tools - cython>=0.29.16 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index b85e9403038ab..4ffd1d586a99a 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -153,14 +153,38 @@ to build the documentation locally before pushing your changes. Using a Docker container ~~~~~~~~~~~~~~~~~~~~~~~~ -Instead of manually setting up a development environment, you can use Docker to -automatically create the environment with just several commands. Pandas provides a `DockerFile` -in the root directory to build a Docker image with a full pandas development environment. +Instead of manually setting up a development environment, you can use `Docker +`_ to automatically create the environment with just several +commands. Pandas provides a `DockerFile` in the root directory to build a Docker image +with a full pandas development environment. -Even easier, you can use the DockerFile to launch a remote session with Visual Studio Code, +**Docker Commands** + +Pass your GitHub username in the `DockerFile` to use your own fork:: + + # Build the image pandas-yourname-env + docker build --tag pandas-yourname-env . + # Run a container and bind your local forked repo, pandas-yourname, to the container + docker run -it --rm -v path-to-pandas-yourname:/home/pandas-yourname pandas-yourname-env + +Even easier, you can integrate Docker with the following IDEs: + +**Visual Studio Code** + +You can use the DockerFile to launch a remote session with Visual Studio Code, a popular free IDE, using the `.devcontainer.json` file. See https://code.visualstudio.com/docs/remote/containers for details. +**PyCharm (Professional)** + +Enable Docker support and use the Services tool window to build and manage images as well as +run and interact with containers. +See https://www.jetbrains.com/help/pycharm/docker.html for details. + +Note that you might need to rebuild the C extensions if/when you merge with upstream/master using:: + + python setup.py build_ext --inplace -j 4 + .. _contributing.dev_c: Installing a C compiler @@ -751,7 +775,7 @@ Imports are alphabetically sorted within these sections. As part of :ref:`Continuous Integration ` checks we run:: - isort --recursive --check-only pandas + isort --check-only pandas to check that imports are correctly formatted as per the `setup.cfg`. @@ -770,8 +794,6 @@ You should run:: to automatically format imports correctly. This will modify your local copy of the files. -The `--recursive` flag can be passed to sort all files in a directory. - Alternatively, you can run a command similar to what was suggested for ``black`` and ``flake8`` :ref:`right above `:: git diff upstream/master --name-only -- "*.py" | xargs -r isort diff --git a/doc/source/development/policies.rst b/doc/source/development/policies.rst index 1031bbfc46457..a564afc408df9 100644 --- a/doc/source/development/policies.rst +++ b/doc/source/development/policies.rst @@ -52,6 +52,6 @@ Python support ~~~~~~~~~~~~~~ pandas will only drop support for specific Python versions (e.g. 3.6.x, 3.7.x) in -pandas **major** releases. +pandas **major** or **minor** releases. .. _SemVer: https://semver.org diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index fd8dda4fe365e..77a1fef28f373 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -141,7 +141,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this case, the cell's style depends only on it's own value.\n", + "In this case, the cell's style depends only on its own value.\n", "That means we should use the `Styler.applymap` method which works elementwise." ] }, diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index ad5bb5a5b2d72..a280a981c789b 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -10,12 +10,21 @@ This is the list of changes to pandas between each release. For full details, see the `commit logs `_. For install and upgrade instructions, see :ref:`install`. +Version 1.2 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.2.0 + Version 1.1 ----------- .. toctree:: :maxdepth: 2 + v1.1.1 v1.1.0 Version 1.0 diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst new file mode 100644 index 0000000000000..443589308ad4c --- /dev/null +++ b/doc/source/whatsnew/v1.1.1.rst @@ -0,0 +1,54 @@ +.. _whatsnew_111: + +What's new in 1.1.1 (?) +----------------------- + +These are the changes in pandas 1.1.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_111.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_111.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Datetimelike** + +- +- + +**Numeric** + +- +- + +**Plotting** + +- + +**Indexing** + +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_111.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.0..v1.1.1|HEAD diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst new file mode 100644 index 0000000000000..b16ca0a80c5b4 --- /dev/null +++ b/doc/source/whatsnew/v1.2.0.rst @@ -0,0 +1,168 @@ +.. _whatsnew_120: + +What's new in 1.2.0 (??) +------------------------ + +These are the changes in pandas 1.2.0. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_120.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- +- + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- +- + +.. --------------------------------------------------------------------------- + + +.. _whatsnew_120.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- +- + +Datetimelike +^^^^^^^^^^^^ +- +- + +Timedelta +^^^^^^^^^ + +- +- + +Timezones +^^^^^^^^^ + +- Bug in :func:`date_range` was raising AmbiguousTimeError for valid input with `ambiguous=False` (:issue:`35297`) +- + + +Numeric +^^^^^^^ +- +- + +Conversion +^^^^^^^^^^ + +- +- + +Strings +^^^^^^^ + +- +- + + +Interval +^^^^^^^^ + +- +- + +Indexing +^^^^^^^^ + +- +- + +Missing +^^^^^^^ + +- +- + +MultiIndex +^^^^^^^^^^ + +- +- + +I/O +^^^ + +- +- + +Plotting +^^^^^^^^ + +- +- + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- +- + + +Reshaping +^^^^^^^^^ + +- +- + +Sparse +^^^^^^ + +- +- + +ExtensionArray +^^^^^^^^^^^^^^ + +- +- + + +Other +^^^^^ +- +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_120.contributors: + +Contributors +~~~~~~~~~~~~ \ No newline at end of file diff --git a/environment.yml b/environment.yml index 53222624619de..9efb995e29497 100644 --- a/environment.yml +++ b/environment.yml @@ -21,7 +21,7 @@ dependencies: - flake8<3.8.0 # temporary pin, GH#34150 - flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions - flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files - - isort=4.3.21 # check that imports are in the right order + - isort>=5.2.1 # check that imports are in the right order - mypy=0.730 - pycodestyle # used by flake8 @@ -52,7 +52,7 @@ dependencies: - botocore>=1.11 - hypothesis>=3.82 - moto # mock S3 - - pytest>=5.0.1,<6.0.0rc0 + - pytest>=5.0.1 - pytest-cov - pytest-xdist>=1.21 - pytest-asyncio diff --git a/pandas/_config/config.py b/pandas/_config/config.py index f5e16cddeb04c..d7b73a0a685d3 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -442,8 +442,8 @@ def register_option( ValueError if `validator` is specified and `defval` is not a valid value. """ - import tokenize import keyword + import tokenize key = key.lower() @@ -660,8 +660,8 @@ def _build_option_description(k: str) -> str: def pp_options_list(keys: Iterable[str], width=80, _print: bool = False): """ Builds a concise listing of available options, grouped by prefix """ - from textwrap import wrap from itertools import groupby + from textwrap import wrap def pp(name: str, ks: Iterable[str]) -> List[str]: pfx = "- " + name + ".[" if name else "" diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 6b6ead795584f..7e90a8cc681ef 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1,11 +1,12 @@ import cython from cython import Py_ssize_t -from libc.stdlib cimport malloc, free -from libc.string cimport memmove from libc.math cimport fabs, sqrt +from libc.stdlib cimport free, malloc +from libc.string cimport memmove import numpy as np + cimport numpy as cnp from numpy cimport ( NPY_FLOAT32, @@ -31,12 +32,11 @@ from numpy cimport ( uint32_t, uint64_t, ) + cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.util cimport numeric, get_nat - from pandas._libs.khash cimport ( kh_destroy_int64, kh_get_int64, @@ -46,7 +46,7 @@ from pandas._libs.khash cimport ( kh_resize_int64, khiter_t, ) - +from pandas._libs.util cimport get_nat, numeric import pandas._libs.missing as missing diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 7c57e6ee9dbfd..38cb973d6dde9 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,27 +1,51 @@ import cython from cython import Py_ssize_t -from cython cimport floating -from libc.stdlib cimport malloc, free +from cython cimport floating +from libc.stdlib cimport free, malloc import numpy as np + cimport numpy as cnp -from numpy cimport (ndarray, - int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t) +from numpy cimport ( + complex64_t, + complex128_t, + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) from numpy.math cimport NAN -cnp.import_array() -from pandas._libs.util cimport numeric, get_nat +cnp.import_array() -from pandas._libs.algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, - TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, - TIEBREAK_DENSE) -from pandas._libs.algos import (take_2d_axis1_float64_float64, - groupsort_indexer, tiebreakers) +from pandas._libs.algos cimport ( + TIEBREAK_AVERAGE, + TIEBREAK_DENSE, + TIEBREAK_FIRST, + TIEBREAK_MAX, + TIEBREAK_MIN, + TiebreakEnumType, + swap, +) +from pandas._libs.util cimport get_nat, numeric + +from pandas._libs.algos import ( + groupsort_indexer, + take_2d_axis1_float64_float64, + tiebreakers, +) from pandas._libs.missing cimport checknull + cdef int64_t NPY_NAT = get_nat() _int64_max = np.iinfo(np.int64).max diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index a98820ca57895..f2af04d91a3e3 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -2,10 +2,13 @@ # at https://github.com/veorq/SipHash import cython -from libc.stdlib cimport malloc, free + +from libc.stdlib cimport free, malloc import numpy as np -from numpy cimport ndarray, uint8_t, uint32_t, uint64_t, import_array + +from numpy cimport import_array, ndarray, uint8_t, uint32_t, uint64_t + import_array() from pandas._libs.util cimport is_nan diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index c3dcbb942d7fe..ffaf6d6505955 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -1,60 +1,57 @@ cimport cython - -from cpython.ref cimport PyObject, Py_INCREF -from cpython.mem cimport PyMem_Malloc, PyMem_Free - -from libc.stdlib cimport malloc, free +from cpython.mem cimport PyMem_Free, PyMem_Malloc +from cpython.ref cimport Py_INCREF, PyObject +from libc.stdlib cimport free, malloc import numpy as np + cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint32_t, float64_t +from numpy cimport float64_t, ndarray, uint8_t, uint32_t from numpy.math cimport NAN + cnp.import_array() +from pandas._libs cimport util from pandas._libs.khash cimport ( - khiter_t, - kh_str_t, - kh_init_str, - kh_put_str, - kh_exist_str, - kh_get_str, - kh_destroy_str, - kh_resize_str, - kh_put_strbox, - kh_get_strbox, - kh_init_strbox, - kh_int64_t, - kh_init_int64, - kh_resize_int64, + kh_destroy_float64, kh_destroy_int64, - kh_get_int64, + kh_destroy_pymap, + kh_destroy_str, + kh_destroy_uint64, + kh_exist_float64, kh_exist_int64, - kh_put_int64, + kh_exist_pymap, + kh_exist_str, + kh_exist_uint64, kh_float64_t, - kh_exist_float64, - kh_put_float64, - kh_init_float64, kh_get_float64, - kh_destroy_float64, - kh_resize_float64, - kh_resize_uint64, - kh_exist_uint64, - kh_destroy_uint64, - kh_put_uint64, + kh_get_int64, + kh_get_pymap, + kh_get_str, + kh_get_strbox, kh_get_uint64, - kh_init_uint64, - kh_destroy_pymap, - kh_exist_pymap, + kh_init_float64, + kh_init_int64, kh_init_pymap, - kh_get_pymap, + kh_init_str, + kh_init_strbox, + kh_init_uint64, + kh_int64_t, + kh_put_float64, + kh_put_int64, kh_put_pymap, + kh_put_str, + kh_put_strbox, + kh_put_uint64, + kh_resize_float64, + kh_resize_int64, kh_resize_pymap, + kh_resize_str, + kh_resize_uint64, + kh_str_t, + khiter_t, ) - - -from pandas._libs cimport util - from pandas._libs.missing cimport checknull diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 35c4b73b47695..d6659cc1895b1 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,6 +1,7 @@ import warnings import numpy as np + cimport numpy as cnp from numpy cimport ( float32_t, @@ -16,17 +17,16 @@ from numpy cimport ( uint32_t, uint64_t, ) + cnp.import_array() from pandas._libs cimport util - +from pandas._libs.hashtable cimport HashTable from pandas._libs.tslibs.nattype cimport c_NaT as NaT from pandas._libs.tslibs.period cimport is_period_object -from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timedeltas cimport _Timedelta - -from pandas._libs.hashtable cimport HashTable +from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs import algos, hashtable as _hash from pandas._libs.missing import checknull diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 8b4b490f49b12..4f27fde52414a 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -5,12 +5,15 @@ from cython import Py_ssize_t from cpython.slice cimport PySlice_GetIndicesEx + cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX import numpy as np + cimport numpy as cnp from numpy cimport NPY_INT64, int64_t + cnp.import_array() from pandas._libs.algos import ensure_int64 diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 95881ebf1385c..6867e8aba7411 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,7 +1,8 @@ import numbers from operator import le, lt -from cpython.datetime cimport PyDelta_Check, PyDateTime_IMPORT +from cpython.datetime cimport PyDateTime_IMPORT, PyDelta_Check + PyDateTime_IMPORT from cpython.object cimport ( @@ -16,8 +17,8 @@ from cpython.object cimport ( import cython from cython import Py_ssize_t - import numpy as np + cimport numpy as cnp from numpy cimport ( NPY_QUICKSORT, @@ -30,22 +31,21 @@ from numpy cimport ( ndarray, uint64_t, ) + cnp.import_array() from pandas._libs cimport util - from pandas._libs.hashtable cimport Int64Vector +from pandas._libs.tslibs.timedeltas cimport _Timedelta +from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport tz_compare from pandas._libs.tslibs.util cimport ( - is_integer_object, is_float_object, + is_integer_object, is_timedelta64_object, ) -from pandas._libs.tslibs.timezones cimport tz_compare -from pandas._libs.tslibs.timestamps cimport _Timestamp -from pandas._libs.tslibs.timedeltas cimport _Timedelta - _VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 54892a7e4bc77..13c7187923473 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -1,7 +1,7 @@ import cython from cython import Py_ssize_t - import numpy as np + cimport numpy as cnp from numpy cimport ( float32_t, @@ -16,6 +16,7 @@ from numpy cimport ( uint32_t, uint64_t, ) + cnp.import_array() from pandas._libs.algos import ( @@ -640,7 +641,11 @@ def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right): # ---------------------------------------------------------------------- from pandas._libs.hashtable cimport ( - HashTable, PyObjectHashTable, UInt64HashTable, Int64HashTable) + HashTable, + Int64HashTable, + PyObjectHashTable, + UInt64HashTable, +) ctypedef fused asof_t: uint8_t diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5ecbb2c3ffd35..5fa91ffee8ea8 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -5,23 +5,24 @@ import warnings import cython from cython import Py_ssize_t -from cpython.object cimport PyObject_RichCompareBool, Py_EQ -from cpython.ref cimport Py_INCREF -from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New -from cpython.iterator cimport PyIter_Check -from cpython.sequence cimport PySequence_Check -from cpython.number cimport PyNumber_Check - from cpython.datetime cimport ( - PyDateTime_Check, PyDate_Check, - PyTime_Check, - PyDelta_Check, + PyDateTime_Check, PyDateTime_IMPORT, + PyDelta_Check, + PyTime_Check, ) +from cpython.iterator cimport PyIter_Check +from cpython.number cimport PyNumber_Check +from cpython.object cimport Py_EQ, PyObject_RichCompareBool +from cpython.ref cimport Py_INCREF +from cpython.sequence cimport PySequence_Check +from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM + PyDateTime_IMPORT import numpy as np + cimport numpy as cnp from numpy cimport ( NPY_OBJECT, @@ -39,6 +40,7 @@ from numpy cimport ( uint8_t, uint64_t, ) + cnp.import_array() cdef extern from "numpy/arrayobject.h": @@ -63,28 +65,23 @@ cdef extern from "src/parse_helper.h": int floatify(object, float64_t *result, int *maybe_int) except -1 from pandas._libs cimport util -from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN +from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX, is_nan from pandas._libs.tslib import array_to_datetime -from pandas._libs.tslibs.nattype cimport ( - NPY_NAT, - c_NaT as NaT, - checknull_with_nat, -) -from pandas._libs.tslibs.conversion cimport convert_to_tsobject -from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 -from pandas._libs.tslibs.timezones cimport tz_compare -from pandas._libs.tslibs.period cimport is_period_object -from pandas._libs.tslibs.offsets cimport is_offset_object from pandas._libs.missing cimport ( + C_NA, checknull, - isnaobj, is_null_datetime64, is_null_timedelta64, - C_NA, + isnaobj, ) - +from pandas._libs.tslibs.conversion cimport convert_to_tsobject +from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT, checknull_with_nat +from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.period cimport is_period_object +from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 +from pandas._libs.tslibs.timezones cimport tz_compare # constants that will be compared to potentially arbitrarily large # python int @@ -1317,8 +1314,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str: if not isinstance(value, list): value = list(value) - from pandas.core.dtypes.cast import ( - construct_1d_object_array_from_listlike) + from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike values = construct_1d_object_array_from_listlike(value) # make contiguous diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index fdd06fe631b97..760fab3781fd4 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,27 +1,25 @@ -import cython -from cython import Py_ssize_t - import numbers +import cython +from cython import Py_ssize_t import numpy as np + cimport numpy as cnp -from numpy cimport ndarray, int64_t, uint8_t, float64_t +from numpy cimport float64_t, int64_t, ndarray, uint8_t + cnp.import_array() from pandas._libs cimport util - - -from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, checknull_with_nat, is_null_datetimelike, ) -from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op +from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op from pandas.compat import is_platform_32bit - cdef: float64_t INF = np.inf float64_t NEGINF = -INF diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 658600cdfbe6c..d1f897d237c1b 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -10,18 +10,17 @@ from cpython.object cimport ( PyObject_RichCompareBool, ) - import cython from cython import Py_ssize_t - import numpy as np -from numpy cimport ndarray, uint8_t, import_array -import_array() +from numpy cimport import_array, ndarray, uint8_t + +import_array() -from pandas._libs.util cimport UINT8_MAX, is_nan from pandas._libs.missing cimport checknull +from pandas._libs.util cimport UINT8_MAX, is_nan @cython.wraparound(False) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 6ffb036e01595..fa77af6bd5a25 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1,6 +1,8 @@ # Copyright (c) 2012, Lambda Foundry, Inc. # See LICENSE for the license import bz2 +from csv import QUOTE_MINIMAL, QUOTE_NONE, QUOTE_NONNUMERIC +from errno import ENOENT import gzip import io import os @@ -9,17 +11,14 @@ import time import warnings import zipfile -from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE -from errno import ENOENT - from libc.stdlib cimport free -from libc.string cimport strncpy, strlen, strcasecmp +from libc.string cimport strcasecmp, strlen, strncpy import cython from cython import Py_ssize_t from cpython.bytes cimport PyBytes_AsString, PyBytes_FromString -from cpython.exc cimport PyErr_Occurred, PyErr_Fetch +from cpython.exc cimport PyErr_Fetch, PyErr_Occurred from cpython.object cimport PyObject from cpython.ref cimport Py_XDECREF from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_Decode @@ -30,37 +29,59 @@ cdef extern from "Python.h": import numpy as np + cimport numpy as cnp -from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t +from numpy cimport float64_t, int64_t, ndarray, uint8_t, uint64_t + cnp.import_array() from pandas._libs cimport util -from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN +from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX + import pandas._libs.lib as lib from pandas._libs.khash cimport ( - khiter_t, - kh_str_t, kh_init_str, kh_put_str, kh_exist_str, - kh_get_str, kh_destroy_str, - kh_float64_t, kh_get_float64, kh_destroy_float64, - kh_put_float64, kh_init_float64, kh_resize_float64, - kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox, + kh_destroy_float64, + kh_destroy_str, + kh_destroy_str_starts, kh_destroy_strbox, - kh_str_starts_t, kh_put_str_starts_item, kh_init_str_starts, - kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts) + kh_exist_str, + kh_float64_t, + kh_get_float64, + kh_get_str, + kh_get_str_starts_item, + kh_get_strbox, + kh_init_float64, + kh_init_str, + kh_init_str_starts, + kh_init_strbox, + kh_put_float64, + kh_put_str, + kh_put_str_starts_item, + kh_put_strbox, + kh_resize_float64, + kh_resize_str_starts, + kh_str_starts_t, + kh_str_t, + kh_strbox_t, + khiter_t, +) + +from pandas.compat import _get_lzma_file, _import_lzma +from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning from pandas.core.dtypes.common import ( + is_bool_dtype, is_categorical_dtype, - is_integer_dtype, is_float_dtype, - is_bool_dtype, is_object_dtype, is_datetime64_dtype, - pandas_dtype, is_extension_array_dtype) + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + pandas_dtype, +) from pandas.core.dtypes.concat import union_categoricals -from pandas.compat import _import_lzma, _get_lzma_file -from pandas.errors import (ParserError, DtypeWarning, - EmptyDataError, ParserWarning) - lzma = _import_lzma() cdef: diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index a01e0c5705dcf..7b36bc8baf891 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -2,15 +2,18 @@ from copy import copy from cython import Py_ssize_t -from libc.stdlib cimport malloc, free +from libc.stdlib cimport free, malloc import numpy as np + cimport numpy as cnp -from numpy cimport ndarray, int64_t +from numpy cimport int64_t, ndarray + cnp.import_array() from pandas._libs cimport util -from pandas._libs.lib import maybe_convert_objects, is_scalar + +from pandas._libs.lib import is_scalar, maybe_convert_objects cdef _check_result_array(object obj, Py_ssize_t cnt): diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index da4dd00027395..5c6c15fb50fed 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -16,7 +16,9 @@ from numpy cimport ( ) import numpy as np + cimport numpy as cnp + cnp.import_array() from pandas._libs.lib cimport c_is_list_like diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 7c9575d921dc9..321d7c374d8ec 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -1,9 +1,18 @@ import cython - import numpy as np + cimport numpy as cnp -from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t, - float64_t, float32_t) +from numpy cimport ( + float32_t, + float64_t, + int8_t, + int16_t, + int32_t, + int64_t, + ndarray, + uint8_t, +) + cnp.import_array() diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 785a4d1f8b923..64fc8d615ea9c 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -1,13 +1,16 @@ import math import numpy as np + from numpy cimport import_array + import_array() from pandas._libs.util cimport is_array -from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.dtypes.missing import array_equivalent, isna + cdef NUMERIC_TYPES = ( bool, @@ -129,6 +132,7 @@ cpdef assert_almost_equal(a, b, if not isiterable(b): from pandas._testing import assert_class_equal + # classes can't be the same, to raise error assert_class_equal(a, b, obj=obj) @@ -181,6 +185,7 @@ cpdef assert_almost_equal(a, b, elif isiterable(b): from pandas._testing import assert_class_equal + # classes can't be the same, to raise error assert_class_equal(a, b, obj=obj) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 35d5cd8f1e275..e4128af62d06d 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -7,23 +7,20 @@ from cpython.datetime cimport ( datetime, tzinfo, ) + # import datetime C API PyDateTime_IMPORT cimport numpy as cnp from numpy cimport float64_t, int64_t, ndarray + import numpy as np + cnp.import_array() import pytz -from pandas._libs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) - from pandas._libs.tslibs.np_datetime cimport ( _string_to_dts, check_dts_bounds, @@ -34,9 +31,9 @@ from pandas._libs.tslibs.np_datetime cimport ( pydate_to_dt64, pydatetime_to_dt64, ) +from pandas._libs.util cimport is_datetime64_object, is_float_object, is_integer_object from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime - from pandas._libs.tslibs.parsing import parse_datetime_string from pandas._libs.tslibs.conversion cimport ( @@ -45,22 +42,18 @@ from pandas._libs.tslibs.conversion cimport ( convert_datetime_to_tsobject, get_datetime64_nanos, ) - from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, ) - from pandas._libs.tslibs.timestamps cimport _Timestamp -from pandas._libs.tslibs.timestamps import Timestamp -from pandas._libs.tslibs.tzconversion cimport ( - tz_localize_to_utc_single, -) +from pandas._libs.tslibs.timestamps import Timestamp # Note: this is the only non-tslibs intra-pandas dependency here from pandas._libs.missing cimport checknull_with_nat_and_na +from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single def _test_parse_iso8601(ts: str): diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 00cecd25e5225..6cce2f5e1fd95 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -5,7 +5,7 @@ Cython implementations of functions resembling the stdlib calendar module import cython -from numpy cimport int64_t, int32_t +from numpy cimport int32_t, int64_t # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8cc3d25e86340..adf1dfbc1ac72 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,44 +1,68 @@ import cython - import numpy as np + cimport numpy as cnp -from numpy cimport int64_t, int32_t, intp_t, ndarray +from numpy cimport int32_t, int64_t, intp_t, ndarray + cnp.import_array() import pytz # stdlib datetime imports -from cpython.datetime cimport (datetime, time, tzinfo, - PyDateTime_Check, PyDate_Check, - PyDateTime_IMPORT) + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + datetime, + time, + tzinfo, +) + PyDateTime_IMPORT from pandas._libs.tslibs.base cimport ABCTimestamp - from pandas._libs.tslibs.np_datetime cimport ( - check_dts_bounds, npy_datetimestruct, pandas_datetime_to_datetimestruct, - _string_to_dts, npy_datetime, dt64_to_dtstruct, dtstruct_to_dt64, - get_datetime64_unit, get_datetime64_value, pydatetime_to_dt64, - NPY_DATETIMEUNIT, NPY_FR_ns) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + NPY_DATETIMEUNIT, + NPY_FR_ns, + _string_to_dts, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + get_datetime64_unit, + get_datetime64_value, + npy_datetime, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, + pydatetime_to_dt64, +) -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, is_integer_object, is_float_object) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timezones cimport ( - is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info, - maybe_get_tz, tz_compare, + get_dst_info, + get_utcoffset, + is_fixed_offset, + is_tzlocal, + is_utc, + maybe_get_tz, + tz_compare, utc_pytz as UTC, ) +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) + from pandas._libs.tslibs.parsing import parse_datetime_string from pandas._libs.tslibs.nattype cimport ( NPY_NAT, - checknull_with_nat, c_NaT as NaT, c_nat_strings as nat_strings, + checknull_with_nat, ) - from pandas._libs.tslibs.tzconversion cimport ( tz_convert_utc_to_tzlocal, tz_localize_to_utc_single, diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 1d1f900bc18b3..16fa05c3801c6 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -6,26 +6,37 @@ from locale import LC_TIME import cython from cython import Py_ssize_t - import numpy as np + cimport numpy as cnp -from numpy cimport ndarray, int64_t, int32_t, int8_t, uint32_t +from numpy cimport int8_t, int32_t, int64_t, ndarray, uint32_t + cnp.import_array() from pandas._config.localization import set_locale -from pandas._libs.tslibs.ccalendar import MONTHS_FULL, DAYS_FULL +from pandas._libs.tslibs.ccalendar import DAYS_FULL, MONTHS_FULL + from pandas._libs.tslibs.ccalendar cimport ( - get_days_in_month, is_leapyear, dayofweek, get_week_of_year, - get_day_of_year, get_iso_calendar, iso_calendar_t, - month_offset, + dayofweek, + get_day_of_year, + get_days_in_month, get_firstbday, + get_iso_calendar, get_lastbday, + get_week_of_year, + is_leapyear, + iso_calendar_t, + month_offset, ) -from pandas._libs.tslibs.np_datetime cimport ( - npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, - td64_to_tdstruct) from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + dt64_to_dtstruct, + npy_datetimestruct, + pandas_timedeltastruct, + td64_to_tdstruct, +) + from pandas._libs.tslibs.strptime import LocaleTime diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 264013f928d22..73df51832d700 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1,3 +1,10 @@ +from cpython.datetime cimport ( + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + datetime, + timedelta, +) from cpython.object cimport ( Py_EQ, Py_GE, @@ -8,28 +15,19 @@ from cpython.object cimport ( PyObject_RichCompare, ) -from cpython.datetime cimport ( - PyDateTime_Check, - PyDateTime_IMPORT, - PyDelta_Check, - datetime, - timedelta, -) PyDateTime_IMPORT from cpython.version cimport PY_MINOR_VERSION import numpy as np + cimport numpy as cnp from numpy cimport int64_t + cnp.import_array() -from pandas._libs.tslibs.np_datetime cimport ( - get_datetime64_value, - get_timedelta64_value, -) cimport pandas._libs.tslibs.util as util - +from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 31cc55ad981bb..12aaaf4ce3977 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -1,5 +1,3 @@ -from cpython.object cimport Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE - from cpython.datetime cimport ( PyDateTime_DATE_GET_HOUR, PyDateTime_DATE_GET_MICROSECOND, @@ -10,11 +8,15 @@ from cpython.datetime cimport ( PyDateTime_GET_YEAR, PyDateTime_IMPORT, ) +from cpython.object cimport Py_EQ, Py_GE, Py_GT, Py_LE, Py_LT, Py_NE + PyDateTime_IMPORT from numpy cimport int64_t + from pandas._libs.tslibs.util cimport get_c_string_buf_and_size + cdef extern from "src/datetime/np_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 9a7ca15a2a1c2..ac2725fc58aee 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,39 +1,51 @@ -import cython - import operator import re import time from typing import Any import warnings -from cpython.datetime cimport (PyDateTime_IMPORT, - PyDateTime_Check, - PyDate_Check, - PyDelta_Check, - datetime, timedelta, date, - time as dt_time) + +import cython + +from cpython.datetime cimport ( + PyDate_Check, + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + date, + datetime, + time as dt_time, + timedelta, +) + PyDateTime_IMPORT -from dateutil.relativedelta import relativedelta from dateutil.easter import easter - +from dateutil.relativedelta import relativedelta import numpy as np + cimport numpy as cnp from numpy cimport int64_t, ndarray + cnp.import_array() # TODO: formalize having _libs.properties "above" tslibs in the dependency structure + from pandas._libs.properties import cache_readonly from pandas._libs.tslibs cimport util from pandas._libs.tslibs.util cimport ( - is_integer_object, is_datetime64_object, is_float_object, + is_integer_object, ) from pandas._libs.tslibs.ccalendar import ( - MONTH_ALIASES, MONTH_TO_CAL_NUM, weekday_to_int, int_to_weekday, + MONTH_ALIASES, + MONTH_TO_CAL_NUM, + int_to_weekday, + weekday_to_int, ) + from pandas._libs.tslibs.ccalendar cimport ( DAY_NANOS, dayofweek, @@ -47,17 +59,20 @@ from pandas._libs.tslibs.conversion cimport ( ) from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( - npy_datetimestruct, - dtstruct_to_dt64, dt64_to_dtstruct, + dtstruct_to_dt64, + npy_datetimestruct, pydate_to_dtstruct, ) from pandas._libs.tslibs.tzconversion cimport tz_convert_from_utc_single from .dtypes cimport PeriodDtypeCode from .timedeltas cimport delta_to_nanoseconds + from .timedeltas import Timedelta + from .timestamps cimport _Timestamp + from .timestamps import Timestamp # --------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index c4f369d0d3b3f..8429aebbd85b8 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -9,39 +9,44 @@ from libc.string cimport strchr import cython from cython import Py_ssize_t -from cpython.object cimport PyObject_Str - from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo +from cpython.object cimport PyObject_Str from cpython.version cimport PY_VERSION_HEX + import_datetime() import numpy as np + cimport numpy as cnp -from numpy cimport (PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, - PyArray_IterNew, flatiter, float64_t) +from numpy cimport ( + PyArray_GETITEM, + PyArray_ITER_DATA, + PyArray_ITER_NEXT, + PyArray_IterNew, + flatiter, + float64_t, +) + cnp.import_array() # dateutil compat -from dateutil.tz import (tzoffset, - tzlocal as _dateutil_tzlocal, - tzutc as _dateutil_tzutc, - tzstr as _dateutil_tzstr) + +from dateutil.parser import DEFAULTPARSER, parse as du_parse from dateutil.relativedelta import relativedelta -from dateutil.parser import DEFAULTPARSER -from dateutil.parser import parse as du_parse +from dateutil.tz import ( + tzlocal as _dateutil_tzlocal, + tzoffset, + tzstr as _dateutil_tzstr, + tzutc as _dateutil_tzutc, +) from pandas._config import get_option from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS -from pandas._libs.tslibs.nattype cimport ( - c_nat_strings as nat_strings, - c_NaT as NaT, -) -from pandas._libs.tslibs.util cimport ( - is_array, - get_c_string_buf_and_size, -) +from pandas._libs.tslibs.nattype cimport c_NaT as NaT, c_nat_strings as nat_strings from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.util cimport get_c_string_buf_and_size, is_array + cdef extern from "../src/headers/portable.h": int getdigit_ascii(char c, int default) nogil diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 20961c6da56bd..86b6533f5caf5 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1,96 +1,98 @@ import warnings -from cpython.object cimport PyObject_RichCompareBool, Py_EQ, Py_NE +from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompareBool +from numpy cimport import_array, int64_t, ndarray -from numpy cimport int64_t, import_array, ndarray import numpy as np + import_array() from libc.stdlib cimport free, malloc +from libc.string cimport memset, strlen from libc.time cimport strftime, tm -from libc.string cimport strlen, memset import cython from cpython.datetime cimport ( - datetime, PyDate_Check, PyDateTime_Check, PyDateTime_IMPORT, PyDelta_Check, + datetime, ) + # import datetime C API PyDateTime_IMPORT from pandas._libs.tslibs.np_datetime cimport ( - npy_datetimestruct, - dtstruct_to_dt64, - dt64_to_dtstruct, - pandas_datetime_to_datetimestruct, - check_dts_bounds, NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us, + check_dts_bounds, + dt64_to_dtstruct, + dtstruct_to_dt64, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, ) + cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, npy_datetimestruct *d) nogil cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.timedeltas import Timedelta -from pandas._libs.tslibs.timedeltas cimport ( - delta_to_nanoseconds, - is_any_td_scalar, -) +from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.ccalendar cimport ( + c_MONTH_NUMBERS, dayofweek, get_day_of_year, - is_leapyear, - get_week_of_year, get_days_in_month, + get_week_of_year, + is_leapyear, ) -from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS +from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds, is_any_td_scalar + from pandas._libs.tslibs.conversion import ensure_datetime64ns from pandas._libs.tslibs.dtypes cimport ( - PeriodDtypeBase, - FR_UND, FR_ANN, - FR_QTR, - FR_MTH, - FR_WK, FR_BUS, FR_DAY, FR_HR, FR_MIN, - FR_SEC, FR_MS, - FR_US, + FR_MTH, FR_NS, + FR_QTR, + FR_SEC, + FR_UND, + FR_US, + FR_WK, + PeriodDtypeBase, attrname_to_abbrevs, ) - from pandas._libs.tslibs.parsing cimport get_rule_month + from pandas._libs.tslibs.parsing import parse_time_string + from pandas._libs.tslibs.nattype cimport ( - _nat_scalar_rules, NPY_NAT, - is_null_datetimelike, + _nat_scalar_rules, c_NaT as NaT, c_nat_strings as nat_strings, + is_null_datetimelike, ) from pandas._libs.tslibs.offsets cimport ( BaseOffset, - to_offset, - is_tick_object, is_offset_object, + is_tick_object, + to_offset, ) -from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG cdef: enum: diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 660b582f73e6e..d2690be905a68 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,27 +1,30 @@ """Strptime-related classes and functions. """ -import time -import locale import calendar +import locale import re +import time from cpython.datetime cimport date, tzinfo from _thread import allocate_lock as _thread_allocate_lock +import numpy as np import pytz -import numpy as np from numpy cimport int64_t -from pandas._libs.tslibs.np_datetime cimport ( - check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct) - from pandas._libs.tslibs.nattype cimport ( - checknull_with_nat, NPY_NAT, c_nat_strings as nat_strings, + checknull_with_nat, ) +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, + dtstruct_to_dt64, + npy_datetimestruct, +) + cdef dict _parse_code_table = {'y': 0, 'Y': 1, diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 8f3a599bf107c..ee32ed53a908b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -2,39 +2,47 @@ import collections import cython -from cpython.object cimport Py_NE, Py_EQ, PyObject_RichCompare +from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompare import numpy as np + cimport numpy as cnp from numpy cimport int64_t, ndarray + cnp.import_array() -from cpython.datetime cimport (timedelta, - PyDateTime_Check, PyDelta_Check, - PyDateTime_IMPORT) +from cpython.datetime cimport ( + PyDateTime_Check, + PyDateTime_IMPORT, + PyDelta_Check, + timedelta, +) + PyDateTime_IMPORT cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.util cimport ( - is_timedelta64_object, is_datetime64_object, is_integer_object, - is_float_object, is_array -) - from pandas._libs.tslibs.base cimport ABCTimestamp - from pandas._libs.tslibs.conversion cimport cast_from_unit - -from pandas._libs.tslibs.np_datetime cimport ( - cmp_scalar, td64_to_tdstruct, pandas_timedeltastruct) - from pandas._libs.tslibs.nattype cimport ( - checknull_with_nat, NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, + checknull_with_nat, +) +from pandas._libs.tslibs.np_datetime cimport ( + cmp_scalar, + pandas_timedeltastruct, + td64_to_tdstruct, ) from pandas._libs.tslibs.offsets cimport is_tick_object +from pandas._libs.tslibs.util cimport ( + is_array, + is_datetime64_object, + is_float_object, + is_integer_object, + is_timedelta64_object, +) # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8cef685933863..bddfc30d86a53 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -9,54 +9,66 @@ shadows the python class, where we do any heavy lifting. import warnings import numpy as np + cimport numpy as cnp -from numpy cimport int64_t, int8_t, uint8_t, ndarray -cnp.import_array() +from numpy cimport int8_t, int64_t, ndarray, uint8_t -from cpython.object cimport (PyObject_RichCompareBool, PyObject_RichCompare, - Py_EQ, Py_NE) +cnp.import_array() -from cpython.datetime cimport ( - datetime, - time, - tzinfo, - tzinfo as tzinfo_type, # alias bc `tzinfo` is a kwarg below +from cpython.datetime cimport ( # alias bc `tzinfo` is a kwarg below PyDateTime_Check, + PyDateTime_IMPORT, PyDelta_Check, PyTZInfo_Check, - PyDateTime_IMPORT, -) -PyDateTime_IMPORT - -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, is_float_object, is_integer_object, - is_timedelta64_object, is_array, + datetime, + time, + tzinfo as tzinfo_type, ) +from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompare, PyObject_RichCompareBool -from pandas._libs.tslibs.base cimport ABCTimestamp +PyDateTime_IMPORT from pandas._libs.tslibs cimport ccalendar - +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( _TSObject, - convert_to_tsobject, convert_datetime_to_tsobject, + convert_to_tsobject, normalize_i8_stamp, ) -from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field +from pandas._libs.tslibs.util cimport ( + is_array, + is_datetime64_object, + is_float_object, + is_integer_object, + is_timedelta64_object, +) + +from pandas._libs.tslibs.fields import get_date_name_field, get_start_end_field + from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( - check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct, + check_dts_bounds, cmp_scalar, + dt64_to_dtstruct, + npy_datetimestruct, pydatetime_to_dt64, ) + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.offsets cimport to_offset, is_offset_object -from pandas._libs.tslibs.timedeltas cimport is_any_td_scalar, delta_to_nanoseconds + +from pandas._libs.tslibs.offsets cimport is_offset_object, to_offset +from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds, is_any_td_scalar + from pandas._libs.tslibs.timedeltas import Timedelta + from pandas._libs.tslibs.timezones cimport ( - is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC, - get_timezone, tz_compare, + get_timezone, + is_utc, + maybe_get_tz, + treat_tz_as_pytz, + tz_compare, + utc_pytz as UTC, ) from pandas._libs.tslibs.tzconversion cimport ( tz_convert_from_utc_single, diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index a8c785704d8e8..b82291a71057e 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,27 +1,31 @@ from datetime import timezone + from cpython.datetime cimport datetime, timedelta, tzinfo # dateutil compat + from dateutil.tz import ( gettz as dateutil_gettz, tzfile as _dateutil_tzfile, tzlocal as _dateutil_tzlocal, tzutc as _dateutil_tzutc, ) - - -from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo import pytz +from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo + UTC = pytz.utc import numpy as np + cimport numpy as cnp from numpy cimport int64_t + cnp.import_array() # ---------------------------------------------------------------------- -from pandas._libs.tslibs.util cimport is_integer_object, get_nat +from pandas._libs.tslibs.util cimport get_nat, is_integer_object + cdef int64_t NPY_NAT = get_nat() cdef tzinfo utc_stdlib = timezone.utc diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 606639af16a18..2b148cd8849f1 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -5,21 +5,27 @@ import cython from cython import Py_ssize_t from cpython.datetime cimport ( - PyDateTime_IMPORT, PyDelta_Check, datetime, timedelta, tzinfo) + PyDateTime_IMPORT, + PyDelta_Check, + datetime, + timedelta, + tzinfo, +) + PyDateTime_IMPORT -import pytz from dateutil.tz import tzutc - import numpy as np +import pytz + cimport numpy as cnp -from numpy cimport ndarray, int64_t, uint8_t, intp_t +from numpy cimport int64_t, intp_t, ndarray, uint8_t + cnp.import_array() from pandas._libs.tslibs.ccalendar cimport DAY_NANOS, HOUR_NANOS from pandas._libs.tslibs.nattype cimport NPY_NAT -from pandas._libs.tslibs.np_datetime cimport ( - npy_datetimestruct, dt64_to_dtstruct) +from pandas._libs.tslibs.np_datetime cimport dt64_to_dtstruct, npy_datetimestruct from pandas._libs.tslibs.timezones cimport ( get_dst_info, get_utcoffset, diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index c8f8daf6724c2..bdc00f6c6e21a 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -1,18 +1,21 @@ import cython -from cpython.datetime cimport datetime, date, time, tzinfo +from cpython.datetime cimport date, datetime, time, tzinfo import numpy as np + from numpy cimport int64_t, intp_t, ndarray from .conversion cimport normalize_i8_stamp + from .dtypes import Resolution + from .nattype cimport NPY_NAT, c_NaT as NaT -from .np_datetime cimport npy_datetimestruct, dt64_to_dtstruct +from .np_datetime cimport dt64_to_dtstruct, npy_datetimestruct from .offsets cimport to_offset from .period cimport get_period_ordinal from .timestamps cimport create_timestamp_from_ts -from .timezones cimport is_utc, is_tzlocal, get_dst_info +from .timezones cimport get_dst_info, is_tzlocal, is_utc from .tzconversion cimport tz_convert_utc_to_tzlocal # ------------------------------------------------------------------------- diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 362d0e6263697..3ec4547d223ce 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -2,13 +2,15 @@ import cython from cython import Py_ssize_t -from libcpp.deque cimport deque -from libc.stdlib cimport malloc, free +from libc.stdlib cimport free, malloc +from libcpp.deque cimport deque import numpy as np + cimport numpy as cnp -from numpy cimport ndarray, int64_t, float64_t, float32_t, uint8_t +from numpy cimport float32_t, float64_t, int64_t, ndarray, uint8_t + cnp.import_array() @@ -22,6 +24,7 @@ from pandas._libs.algos import is_monotonic from pandas._libs.util cimport numeric + cdef extern from "../src/skiplist.h": ctypedef struct node_t: node_t **next diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 8a1e7feb57ace..9af1159a805ec 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -1,7 +1,8 @@ # cython: boundscheck=False, wraparound=False, cdivision=True import numpy as np -from numpy cimport ndarray, int64_t + +from numpy cimport int64_t, ndarray # Cython routines for window indexers diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 2d5b31d7ccbcf..40c39aabb7a7a 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -5,8 +5,8 @@ from cpython.bytes cimport PyBytes_GET_SIZE from cpython.unicode cimport PyUnicode_GET_SIZE import numpy as np -from numpy cimport ndarray, uint8_t +from numpy cimport ndarray, uint8_t ctypedef fused pandas_string: str diff --git a/pandas/_testing.py b/pandas/_testing.py index fc6df7a95e348..a020fbff3553a 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -9,7 +9,7 @@ from shutil import rmtree import string import tempfile -from typing import Any, Callable, List, Optional, Type, Union, cast +from typing import Any, Callable, ContextManager, List, Optional, Type, Union, cast import warnings import zipfile @@ -535,7 +535,7 @@ def rands(nchars): def close(fignum=None): - from matplotlib.pyplot import get_fignums, close as _close + from matplotlib.pyplot import close as _close, get_fignums if fignum is None: for fignum in get_fignums(): @@ -2880,9 +2880,7 @@ def convert_rows_list_to_csv_str(rows_list: List[str]): return expected -def external_error_raised( - expected_exception: Type[Exception], -) -> Callable[[Type[Exception], None], None]: +def external_error_raised(expected_exception: Type[Exception],) -> ContextManager: """ Helper function to mark pytest.raises that have an external error message. diff --git a/pandas/_typing.py b/pandas/_typing.py index 8e98833ad37f7..76ec527e6e258 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -24,13 +24,15 @@ # https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles if TYPE_CHECKING: from pandas._libs import Period, Timedelta, Timestamp # noqa: F401 - from pandas.core.arrays.base import ExtensionArray # noqa: F401 + from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401 - from pandas.core.indexes.base import Index # noqa: F401 - from pandas.core.generic import NDFrame # noqa: F401 + from pandas import Interval # noqa: F401 - from pandas.core.series import Series # noqa: F401 + from pandas.core.arrays.base import ExtensionArray # noqa: F401 from pandas.core.frame import DataFrame # noqa: F401 + from pandas.core.generic import NDFrame # noqa: F401 + from pandas.core.indexes.base import Index # noqa: F401 + from pandas.core.series import Series # noqa: F401 # array-like diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 0484de3fa165d..015b203a60256 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -14,7 +14,7 @@ from pandas import Index if TYPE_CHECKING: - from pandas import Series, DataFrame + from pandas import DataFrame, Series def load_reduce(self): diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 733dbeed34b72..6b8d7dc35fe95 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -15,7 +15,7 @@ from pandas.core.construction import create_series_with_explicit_dtype if TYPE_CHECKING: - from pandas import DataFrame, Series, Index + from pandas import DataFrame, Index, Series ResType = Dict[int, Any] diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index db9cfd9d7fc59..6e5c7bc699962 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,7 +520,7 @@ def _from_inferred_categories( ------- Categorical """ - from pandas import Index, to_numeric, to_datetime, to_timedelta + from pandas import Index, to_datetime, to_numeric, to_timedelta cats = Index(inferred_categories) known_categories = ( @@ -1403,7 +1403,7 @@ def value_counts(self, dropna=True): -------- Series.value_counts """ - from pandas import Series, CategoricalIndex + from pandas import CategoricalIndex, Series code, cat = self._codes, self.categories ncat, mask = len(cat), 0 <= code diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ee4d43fdb3bc2..c6945e2f78b5a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -959,7 +959,7 @@ def value_counts(self, dropna=False): ------- Series """ - from pandas import Series, Index + from pandas import Index, Series if dropna: values = self[~self.isna()]._data diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d674b1c476d2c..8b2bb7832b5d0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -418,9 +418,9 @@ def _generate_range( # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: - start = start.tz_localize(tz).asm8 + start = start.tz_localize(tz, ambiguous, nonexistent).asm8 if end is not None: - end = end.tz_localize(tz).asm8 + end = end.tz_localize(tz, ambiguous, nonexistent).asm8 else: # Create a linearly spaced date_range in local time # Nanosecond-granularity timestamps aren't always correctly diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index b0958af41158c..57df067c7b16e 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -116,6 +116,7 @@ def __from_arrow__( Construct IntegerArray from pyarrow Array/ChunkedArray. """ import pyarrow # noqa: F811 + from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask pyarrow_type = pyarrow.from_numpy_dtype(self.type) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index c861d25afd13f..ed2437cc061bd 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1105,6 +1105,7 @@ def __arrow_array__(self, type=None): Convert myself into a pyarrow Array. """ import pyarrow + from pandas.core.arrays._arrow_utils import ArrowIntervalType try: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8d5cb12d60e4d..fe78481d99d30 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -300,6 +300,7 @@ def __arrow_array__(self, type=None): Convert myself into a pyarrow Array. """ import pyarrow + from pandas.core.arrays._arrow_utils import ArrowPeriodType if type is not None: diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 8a30d2b954b55..da8d695c59b9e 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -87,8 +87,8 @@ def from_coo(cls, A, dense_index=False): 1 0 3.0 dtype: Sparse[float64, nan] """ - from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series from pandas import Series + from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series result = _coo_to_sparse_series(A, dense_index=dense_index) result = Series(result.array, index=result.index, copy=False) @@ -253,9 +253,10 @@ def from_spmatrix(cls, data, index=None, columns=None): 1 0.0 1.0 0.0 2 0.0 0.0 1.0 """ - from pandas import DataFrame from pandas._libs.sparse import IntIndex + from pandas import DataFrame + data = data.tocsc() index, columns = cls._prep_index(data, index, columns) n_rows, n_columns = data.shape @@ -354,8 +355,8 @@ def density(self) -> float: @staticmethod def _prep_index(data, index, columns): - import pandas.core.indexes.base as ibase from pandas.core.indexes.api import ensure_index + import pandas.core.indexes.base as ibase N, K = data.shape if index is None: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 86f6be77bc505..2b2431149e230 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -662,8 +662,10 @@ def register_plotting_backend_cb(key): def register_converter_cb(key): - from pandas.plotting import register_matplotlib_converters - from pandas.plotting import deregister_matplotlib_converters + from pandas.plotting import ( + deregister_matplotlib_converters, + register_matplotlib_converters, + ) if cf.get_option(key): register_matplotlib_converters() diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 6c58698989e96..47f10f1f65f4a 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -48,9 +48,9 @@ import pandas.core.common as com if TYPE_CHECKING: - from pandas.core.series import Series # noqa: F401 - from pandas.core.indexes.api import Index # noqa: F401 from pandas.core.arrays import ExtensionArray # noqa: F401 + from pandas.core.indexes.api import Index # noqa: F401 + from pandas.core.series import Series # noqa: F401 def array( @@ -255,14 +255,14 @@ def array( ValueError: Cannot pass scalar '1' to 'pandas.array'. """ from pandas.core.arrays import ( - period_array, BooleanArray, + DatetimeArray, IntegerArray, IntervalArray, PandasArray, - DatetimeArray, - TimedeltaArray, StringArray, + TimedeltaArray, + period_array, ) if lib.is_scalar(data): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6b84f0e81f48b..228329898b6a4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1244,6 +1244,7 @@ def try_datetime(v): # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype, xref GH19671 from pandas._libs.tslibs import conversion + from pandas import DatetimeIndex try: @@ -1303,8 +1304,8 @@ def maybe_cast_to_datetime(value, dtype, errors: str = "raise"): try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ - from pandas.core.tools.timedeltas import to_timedelta from pandas.core.tools.datetimes import to_datetime + from pandas.core.tools.timedeltas import to_timedelta if dtype is not None: if isinstance(dtype, str): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 22480fbc47508..8350e136417b1 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -30,12 +30,13 @@ if TYPE_CHECKING: import pyarrow # noqa: F401 + + from pandas import Categorical # noqa: F401 from pandas.core.arrays import ( # noqa: F401 + DatetimeArray, IntervalArray, PeriodArray, - DatetimeArray, ) - from pandas import Categorical # noqa: F401 str_type = str @@ -391,12 +392,13 @@ def __repr__(self) -> str_type: @staticmethod def _hash_categories(categories, ordered: Ordered = True) -> int: + from pandas.core.dtypes.common import DT64NS_DTYPE, is_datetime64tz_dtype + from pandas.core.util.hashing import ( - hash_array, _combine_hash_arrays, + hash_array, hash_tuples, ) - from pandas.core.dtypes.common import is_datetime64tz_dtype, DT64NS_DTYPE if len(categories) and isinstance(categories[0], tuple): # assumes if any individual category is a tuple, then all our. ATM @@ -939,6 +941,7 @@ def __from_arrow__( Construct PeriodArray from pyarrow Array/ChunkedArray. """ import pyarrow # noqa: F811 + from pandas.core.arrays import PeriodArray from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask @@ -1136,6 +1139,7 @@ def __from_arrow__( Construct IntervalArray from pyarrow Array/ChunkedArray. """ import pyarrow # noqa: F811 + from pandas.core.arrays import IntervalArray if isinstance(array, pyarrow.Array): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f52341ed782d8..79627e43d78c2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -150,6 +150,7 @@ if TYPE_CHECKING: from pandas.core.groupby.generic import DataFrameGroupBy + from pandas.io.formats.style import Styler # --------------------------------------------------------------------- @@ -4966,9 +4967,10 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): Define in which columns to look for missing values. - >>> df.dropna(subset=['name', 'born']) + >>> df.dropna(subset=['name', 'toy']) name toy born 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT Keep the DataFrame with valid entries in the same variable. @@ -5204,8 +5206,9 @@ def duplicated( 4 True dtype: bool """ + from pandas._libs.hashtable import _SIZE_HINT_LIMIT, duplicated_int64 + from pandas.core.sorting import get_group_index - from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: return self._constructor_sliced(dtype=bool) @@ -7867,8 +7870,8 @@ def join( def _join_compat( self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False ): - from pandas.core.reshape.merge import merge from pandas.core.reshape.concat import concat + from pandas.core.reshape.merge import merge if isinstance(other, Series): if other.name is None: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ec7b14f27c5a1..516876c4a5174 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -681,8 +681,8 @@ def value_counts( self, normalize=False, sort=True, ascending=False, bins=None, dropna=True ): - from pandas.core.reshape.tile import cut from pandas.core.reshape.merge import _get_join_indexers + from pandas.core.reshape.tile import cut if bins is not None and not np.iterable(bins): # scalar bins cannot be done at top level @@ -1662,11 +1662,18 @@ def _gotitem(self, key, ndim: int, subset=None): return DataFrameGroupBy( subset, self.grouper, - selection=key, + axis=self.axis, + level=self.level, grouper=self.grouper, exclusions=self.exclusions, + selection=key, as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, observed=self.observed, + mutated=self.mutated, + dropna=self.dropna, ) elif ndim == 1: if subset is None: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 67003dffb90bb..8239a792c65dd 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -237,7 +237,6 @@ def __new__(cls, *args, **kwargs): # core/groupby/grouper.py::Grouper # raising these warnings from TimeGrouper directly would fail the test: # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base - # hacky way to set the stacklevel: if cls is TimeGrouper it means # that the call comes from a pandas internal call of resample, # otherwise it comes from pd.Grouper diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 986d6323e704e..1be381e38b157 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5731,9 +5731,9 @@ def _maybe_cast_data_without_dtype(subarr): """ # Runtime import needed bc IntervalArray imports Index from pandas.core.arrays import ( + DatetimeArray, IntervalArray, PeriodArray, - DatetimeArray, TimedeltaArray, ) diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index fd9a9a5ef6c93..6eedf72726acb 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -5,8 +5,8 @@ from pandas._typing import ArrayLike if TYPE_CHECKING: - from pandas.core.internals.managers import BlockManager # noqa:F401 from pandas.core.internals.blocks import Block # noqa:F401 + from pandas.core.internals.managers import BlockManager # noqa:F401 def operate_blockwise( diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a1db7742916de..6702bf519c52e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -155,7 +155,7 @@ def _map_stringarray( an ndarray. """ - from pandas.arrays import IntegerArray, StringArray, BooleanArray + from pandas.arrays import BooleanArray, IntegerArray, StringArray mask = isna(arr) @@ -2186,7 +2186,7 @@ def _wrap_result( returns_string=True, ): - from pandas import Index, Series, MultiIndex + from pandas import Index, MultiIndex, Series # for category, we do the stuff on the categories, so blow it up # to the full series again @@ -2292,7 +2292,7 @@ def _get_series_list(self, others): list of Series Others transformed into list of Series. """ - from pandas import Series, DataFrame + from pandas import DataFrame, Series # self._orig is either Series or Index idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0adab143f6052..7aac2f793f61a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -53,9 +53,10 @@ from pandas.core.indexes.datetimes import DatetimeIndex if TYPE_CHECKING: - from pandas import Series # noqa:F401 from pandas._libs.tslibs.nattype import NaTType # noqa:F401 + from pandas import Series # noqa:F401 + # --------------------------------------------------------------------- # types used in annotations @@ -876,7 +877,7 @@ def _assemble_from_unit_mappings(arg, errors, tz): ------- Series """ - from pandas import to_timedelta, to_numeric, DataFrame + from pandas import DataFrame, to_numeric, to_timedelta arg = DataFrame(arg) if not arg.columns.is_unique: diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 1b56b6d5a46fa..d79b9f4092325 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -275,7 +275,7 @@ def hash_array( # then hash and rename categories. We allow skipping the categorization # when the values are known/likely to be unique. if categorize: - from pandas import factorize, Categorical, Index + from pandas import Categorical, Index, factorize codes, categories = factorize(vals, sort=False) cat = Categorical(codes, Index(categories), ordered=False, fastpath=True) diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 40bff5a75709b..d16955a98b62f 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -311,17 +311,17 @@ def init_windows_clipboard(): global HGLOBAL, LPVOID, DWORD, LPCSTR, INT global HWND, HINSTANCE, HMENU, BOOL, UINT, HANDLE from ctypes.wintypes import ( - HGLOBAL, - LPVOID, + BOOL, DWORD, - LPCSTR, - INT, - HWND, + HANDLE, + HGLOBAL, HINSTANCE, HMENU, - BOOL, + HWND, + INT, + LPCSTR, + LPVOID, UINT, - HANDLE, ) windll = ctypes.windll @@ -528,8 +528,8 @@ def determine_clipboard(): # Setup for the MAC OS X platform: if os.name == "mac" or platform.system() == "Darwin": try: - import Foundation # check if pyobjc is installed import AppKit + import Foundation # check if pyobjc is installed except ImportError: return init_osx_pbcopy_clipboard() else: diff --git a/pandas/io/common.py b/pandas/io/common.py index bd77a1e69c138..6ac8051f35b6f 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -267,8 +267,8 @@ def file_path_to_url(path: str) -> str: def get_compression_method( - compression: Optional[Union[str, Mapping[str, str]]] -) -> Tuple[Optional[str], Dict[str, str]]: + compression: Optional[Union[str, Mapping[str, Any]]] +) -> Tuple[Optional[str], Dict[str, Any]]: """ Simplifies a compression argument to a compression method string and a mapping containing additional arguments. @@ -282,21 +282,23 @@ def get_compression_method( Returns ------- tuple of ({compression method}, Optional[str] - {compression arguments}, Dict[str, str]) + {compression arguments}, Dict[str, Any]) Raises ------ ValueError on mapping missing 'method' key """ + compression_method: Optional[str] if isinstance(compression, Mapping): compression_args = dict(compression) try: - compression = compression_args.pop("method") + compression_method = compression_args.pop("method") except KeyError as err: raise ValueError("If mapping, compression must have key 'method'") from err else: compression_args = {} - return compression, compression_args + compression_method = compression + return compression_method, compression_args def infer_compression( @@ -434,28 +436,19 @@ def get_handle( if compression: - # GH33398 the type ignores here seem related to mypy issue #5382; - # it may be possible to remove them once that is resolved. - # GZ Compression if compression == "gzip": if is_path: - f = gzip.open( - path_or_buf, mode, **compression_args # type: ignore - ) + f = gzip.open(path_or_buf, mode, **compression_args) else: - f = gzip.GzipFile( - fileobj=path_or_buf, **compression_args # type: ignore - ) + f = gzip.GzipFile(fileobj=path_or_buf, **compression_args) # BZ Compression elif compression == "bz2": if is_path: - f = bz2.BZ2File( - path_or_buf, mode, **compression_args # type: ignore - ) + f = bz2.BZ2File(path_or_buf, mode, **compression_args) else: - f = bz2.BZ2File(path_or_buf, **compression_args) # type: ignore + f = bz2.BZ2File(path_or_buf, **compression_args) # ZIP Compression elif compression == "zip": diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2a12f779230b2..b1bbda4a4b7e0 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -834,8 +834,8 @@ class ExcelFile: from pandas.io.excel._odfreader import _ODFReader from pandas.io.excel._openpyxl import _OpenpyxlReader - from pandas.io.excel._xlrd import _XlrdReader from pandas.io.excel._pyxlsb import _PyxlsbReader + from pandas.io.excel._xlrd import _XlrdReader _engines = { "xlrd": _XlrdReader, diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 85ec9afaaec25..44abaf5d3b3c9 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -191,9 +191,9 @@ def _get_cell_string_value(self, cell) -> str: Find and decode OpenDocument text:s tags that represent a run length encoded sequence of space characters. """ - from odf.element import Text, Element - from odf.text import S, P + from odf.element import Element, Text from odf.namespaces import TEXTNS + from odf.text import P, S text_p = P().qname text_s = S().qname diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 0696d82e51f34..03a30cbd62f9a 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -225,7 +225,7 @@ def _convert_to_fill(cls, fill_dict): ------- fill : openpyxl.styles.Fill """ - from openpyxl.styles import PatternFill, GradientFill + from openpyxl.styles import GradientFill, PatternFill _pattern_fill_key_map = { "patternType": "fill_type", diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 8f7d3b1368fc7..af82c15fd6b66 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -48,11 +48,11 @@ def get_sheet_by_index(self, index): def get_sheet_data(self, sheet, convert_float): from xlrd import ( - xldate, + XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_ERROR, - XL_CELL_BOOLEAN, XL_CELL_NUMBER, + xldate, ) epoch1904 = self.book.datemode diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fe85eab4bfbf5..c05f79f935548 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -72,7 +72,7 @@ from pandas.io.formats.printing import adjoin, justify, pprint_thing if TYPE_CHECKING: - from pandas import Series, DataFrame, Categorical + from pandas import Categorical, DataFrame, Series FormattersType = Union[ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d11144938eb26..fd1efa2d1b668 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -42,8 +42,8 @@ try: - import matplotlib.pyplot as plt from matplotlib import colors + import matplotlib.pyplot as plt has_mpl = True except ImportError: diff --git a/pandas/io/html.py b/pandas/io/html.py index 3193f52d239f1..8354cf413814e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -707,8 +707,8 @@ def _build_doc(self): -------- pandas.io.html._HtmlFrameParser._build_doc """ - from lxml.html import parse, fromstring, HTMLParser from lxml.etree import XMLSyntaxError + from lxml.html import HTMLParser, fromstring, parse parser = HTMLParser(recover=True, encoding=self.encoding) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b67a1c5781d91..e0df4c29e543e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -57,7 +57,7 @@ from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: - from tables import File, Node, Col # noqa:F401 + from tables import Col, File, Node # noqa:F401 # versioning attribute diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 0038e39e2ffcc..17b41fd2b4379 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -1,8 +1,8 @@ # cython: profile=False # cython: boundscheck=False, initializedcheck=False from cython import Py_ssize_t - import numpy as np + import pandas.io.sas.sas_constants as const ctypedef signed long long int64_t diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9177696ca13d6..51888e5021d80 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -439,7 +439,8 @@ def read_sql( con : SQLAlchemy connectable, str, or sqlite3 connection Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible - for engine disposal and connection closure for the SQLAlchemy connectable. See + for engine disposal and connection closure for the SQLAlchemy connectable; str + connections are closed automatically. See `here `_. index_col : str or list of str, optional, default: None Column(s) to set as index(MultiIndex). @@ -937,7 +938,7 @@ def _get_column_names_and_types(self, dtype_mapper): return column_names_and_types def _create_table_setup(self): - from sqlalchemy import Table, Column, PrimaryKeyConstraint + from sqlalchemy import Column, PrimaryKeyConstraint, Table column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type) @@ -1026,15 +1027,15 @@ def _sqlalchemy_type(self, col): col_type = lib.infer_dtype(col, skipna=True) from sqlalchemy.types import ( + TIMESTAMP, BigInteger, - Integer, - Float, - Text, Boolean, - DateTime, Date, + DateTime, + Float, + Integer, + Text, Time, - TIMESTAMP, ) if col_type == "datetime64" or col_type == "datetime": @@ -1079,7 +1080,7 @@ def _sqlalchemy_type(self, col): return Text def _get_dtype(self, sqltype): - from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP + from sqlalchemy.types import TIMESTAMP, Boolean, Date, DateTime, Float, Integer if isinstance(sqltype, Float): return float @@ -1374,7 +1375,7 @@ def to_sql( dtype = {col_name: dtype for col_name in frame} if dtype is not None: - from sqlalchemy.types import to_instance, TypeEngine + from sqlalchemy.types import TypeEngine, to_instance for col, my_type in dtype.items(): if not isinstance(to_instance(my_type), TypeEngine): diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 353bc8a8936a5..b490e07e43753 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1149,8 +1149,8 @@ def _plot(cls, ax, x, y, style=None, column_num=None, stacking_id=None, **kwds): @classmethod def _ts_plot(cls, ax, x, data, style=None, **kwds): from pandas.plotting._matplotlib.timeseries import ( - _maybe_resample, _decorate_axes, + _maybe_resample, format_dateaxis, ) diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 8f3571cf13cbc..95f9fbf3995ed 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -24,7 +24,7 @@ from pandas.tseries.frequencies import get_period_alias, is_subperiod, is_superperiod if TYPE_CHECKING: - from pandas import Series, Index # noqa:F401 + from pandas import Index, Series # noqa:F401 # --------------------------------------------------------------------- diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index ecd20796b6f21..caa348d3a1fb9 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -267,9 +267,10 @@ def test_sparsearray(): def test_np(): - import numpy as np import warnings + import numpy as np + with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert (pd.np.arange(0, 10) == np.arange(0, 10)).all() diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index d517eaaec68d2..0176755b54dd1 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -142,6 +142,7 @@ def test_repr(): @pyarrow_skip def test_arrow_extension_type(): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType p1 = ArrowIntervalType(pa.int64(), "left") @@ -158,6 +159,7 @@ def test_arrow_extension_type(): @pyarrow_skip def test_arrow_array(): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType intervals = pd.interval_range(1, 5, freq=1).array @@ -187,6 +189,7 @@ def test_arrow_array(): @pyarrow_skip def test_arrow_array_missing(): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0]) @@ -221,6 +224,7 @@ def test_arrow_array_missing(): ) def test_arrow_table_roundtrip(breaks): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType arr = IntervalArray.from_breaks(breaks) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 8887dd0278afe..0d81e8e733842 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -359,6 +359,7 @@ def test_arrow_extension_type(): ) def test_arrow_array(data, freq): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowPeriodType periods = period_array(data, freq=freq) @@ -384,6 +385,7 @@ def test_arrow_array(data, freq): @pyarrow_skip def test_arrow_array_missing(): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([1, 2, 3], freq="D") @@ -399,6 +401,7 @@ def test_arrow_array_missing(): @pyarrow_skip def test_arrow_table_roundtrip(): import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([1, 2, 3], freq="D") diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 9d6b9f39a0578..52a1e3aae9058 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -287,7 +287,7 @@ def test_stat_op_api(self, float_frame, float_string_frame): assert_stat_op_api("median", float_frame, float_string_frame) try: - from scipy.stats import skew, kurtosis # noqa:F401 + from scipy.stats import kurtosis, skew # noqa:F401 assert_stat_op_api("skew", float_frame, float_string_frame) assert_stat_op_api("kurt", float_frame, float_string_frame) @@ -370,7 +370,7 @@ def kurt(x): ) try: - from scipy import skew, kurtosis # noqa:F401 + from scipy import kurtosis, skew # noqa:F401 assert_stat_op_calc("skew", skewness, float_frame_with_na) assert_stat_op_calc("kurt", kurt, float_frame_with_na) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7e4513da37dc9..0d447a70b540d 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1294,9 +1294,7 @@ def test_get_nonexistent_category(): ) -def test_series_groupby_on_2_categoricals_unobserved( - reduction_func: str, observed: bool, request -): +def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request): # GH 17605 if reduction_func == "ngroup": pytest.skip("ngroup is not truly a reduction") @@ -1326,7 +1324,7 @@ def test_series_groupby_on_2_categoricals_unobserved( def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( - reduction_func: str, request + reduction_func, request ): # GH 17605 # Tests whether the unobserved categories in the result contain 0 or NaN @@ -1374,7 +1372,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans( assert np.issubdtype(result.dtype, np.integer) -def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_func: str): +def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_func): # GH 23865 # GH 27075 # Ensure that df.groupby, when 'by' is two pd.Categorical variables, @@ -1402,7 +1400,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun @pytest.mark.parametrize("observed", [False, None]) def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( - reduction_func: str, observed: bool, request + reduction_func, observed, request ): # GH 23865 # GH 27075 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ebce5b0ef0a66..1750739353e6d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2055,3 +2055,31 @@ def test_groups_repr_truncates(max_seq_items, expected): result = df.groupby(np.array(df.a)).groups.__repr__() assert result == expected + + +@pytest.mark.parametrize( + "attr, value", + [ + ("axis", 1), + ("level", "a"), + ("as_index", False), + ("sort", False), + ("group_keys", False), + ("squeeze", True), + ("observed", True), + ("dropna", False), + ], +) +@pytest.mark.filterwarnings( + "ignore:The `squeeze` parameter is deprecated:FutureWarning" +) +def test_subsetting_columns_keeps_attrs(attr, value): + # GH 9959 - When subsetting columns, don't drop attributes + df = pd.DataFrame({"a": [1], "b": [2], "c": [3]}) + if attr != "axis": + df = df.set_index("a") + + expected = df.groupby("a", **{attr: value}) + + result = expected[["b"]] + assert getattr(result, attr) == getattr(expected, attr) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index cdaf27e214d80..c09f35526a6bf 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas._libs import groupby +from pandas._libs.groupby import group_cumprod_float64, group_cumsum from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype @@ -545,14 +545,14 @@ def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): def test_cython_group_transform_cumsum(any_real_dtype): # see gh-4095 dtype = np.dtype(any_real_dtype).type - pd_op, np_op = groupby.group_cumsum, np.cumsum + pd_op, np_op = group_cumsum, np.cumsum _check_cython_group_transform_cumulative(pd_op, np_op, dtype) def test_cython_group_transform_cumprod(): # see gh-4095 dtype = np.float64 - pd_op, np_op = groupby.group_cumprod_float64, np.cumproduct + pd_op, np_op = group_cumprod_float64, np.cumproduct _check_cython_group_transform_cumulative(pd_op, np_op, dtype) @@ -567,13 +567,13 @@ def test_cython_group_transform_algos(): data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64") actual = np.zeros_like(data) actual.fill(np.nan) - groupby.group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike) + group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike) expected = np.array([1, 2, 6, np.nan, 24], dtype="float64") tm.assert_numpy_array_equal(actual[:, 0], expected) actual = np.zeros_like(data) actual.fill(np.nan) - groupby.group_cumsum(actual, data, labels, ngroups, is_datetimelike) + group_cumsum(actual, data, labels, ngroups, is_datetimelike) expected = np.array([1, 3, 6, np.nan, 10], dtype="float64") tm.assert_numpy_array_equal(actual[:, 0], expected) @@ -581,7 +581,7 @@ def test_cython_group_transform_algos(): is_datetimelike = True data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None] actual = np.zeros_like(data, dtype="int64") - groupby.group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike) + group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike) expected = np.array( [ np.timedelta64(1, "ns"), diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c8b780455f862..f5b9f4a401e60 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -5,6 +5,7 @@ import pytest from pandas._libs import iNaT +from pandas.compat.numpy import _is_numpy_dev from pandas.errors import InvalidIndexError from pandas.core.dtypes.common import is_datetime64tz_dtype @@ -417,7 +418,7 @@ def test_set_ops_error_cases(self, case, method, index): with pytest.raises(TypeError, match=msg): getattr(index, method)(case) - def test_intersection_base(self, index): + def test_intersection_base(self, index, request): if isinstance(index, CategoricalIndex): return @@ -434,6 +435,15 @@ def test_intersection_base(self, index): # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: + # https://github.com/pandas-dev/pandas/issues/35481 + if ( + _is_numpy_dev + and isinstance(case, Series) + and isinstance(index, UInt64Index) + ): + mark = pytest.mark.xfail(reason="gh-35481") + request.node.add_marker(mark) + result = first.intersection(case) assert tm.equalContents(result, second) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c150e7901c86a..9a855a1624520 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -787,6 +787,65 @@ def test_construction_with_nat_and_tzlocal(self): expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) tm.assert_index_equal(result, expected) + def test_constructor_with_ambiguous_keyword_arg(self): + # GH 35297 + + expected = DatetimeIndex( + ["2020-11-01 01:00:00", "2020-11-02 01:00:00"], + dtype="datetime64[ns, America/New_York]", + freq="D", + ambiguous=False, + ) + + # ambiguous keyword in start + timezone = "America/New_York" + start = pd.Timestamp(year=2020, month=11, day=1, hour=1).tz_localize( + timezone, ambiguous=False + ) + result = pd.date_range(start=start, periods=2, ambiguous=False) + tm.assert_index_equal(result, expected) + + # ambiguous keyword in end + timezone = "America/New_York" + end = pd.Timestamp(year=2020, month=11, day=2, hour=1).tz_localize( + timezone, ambiguous=False + ) + result = pd.date_range(end=end, periods=2, ambiguous=False) + tm.assert_index_equal(result, expected) + + def test_constructor_with_nonexistent_keyword_arg(self): + # GH 35297 + + timezone = "Europe/Warsaw" + + # nonexistent keyword in start + start = pd.Timestamp("2015-03-29 02:30:00").tz_localize( + timezone, nonexistent="shift_forward" + ) + result = pd.date_range(start=start, periods=2, freq="H") + expected = DatetimeIndex( + [ + pd.Timestamp("2015-03-29 03:00:00+02:00", tz=timezone), + pd.Timestamp("2015-03-29 04:00:00+02:00", tz=timezone), + ] + ) + + tm.assert_index_equal(result, expected) + + # nonexistent keyword in end + end = pd.Timestamp("2015-03-29 02:30:00").tz_localize( + timezone, nonexistent="shift_forward" + ) + result = pd.date_range(end=end, periods=2, freq="H") + expected = DatetimeIndex( + [ + pd.Timestamp("2015-03-29 01:00:00+01:00", tz=timezone), + pd.Timestamp("2015-03-29 03:00:00+02:00", tz=timezone), + ] + ) + + tm.assert_index_equal(result, expected) + def test_constructor_no_precision_raises(self): # GH-24753, GH-24739 diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index ec4162f87010f..7bb1d98086a91 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -59,6 +59,7 @@ def test_reindex_with_same_tz(self): def test_time_loc(self): # GH8667 from datetime import time + from pandas._libs.index import _SIZE_CUTOFF ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index be193e0854d8d..d8e56661b7d61 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -15,7 +15,7 @@ def test_multiindex_get_loc(): # GH7724, GH2646 with warnings.catch_warnings(record=True): # test indexing into a multi-index before & past the lexsort depth - from numpy.random import randint, choice, randn + from numpy.random import choice, randint, randn cols = ["jim", "joe", "jolie", "joline", "jolia"] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 30b13b6ea9fce..193800fae751f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.compat.numpy import _is_numpy_dev + import pandas as pd from pandas import DataFrame, Series, Timestamp, date_range import pandas._testing as tm @@ -945,6 +947,7 @@ def test_loc_setitem_empty_append(self): df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) + @pytest.mark.xfail(_is_numpy_dev, reason="gh-35481") def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index c397a61616c1c..d64e2d1933ace 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -37,8 +37,8 @@ def test_read_csv(cleared_fs): def test_reasonable_error(monkeypatch, cleared_fs): - from fsspec.registry import known_implementations from fsspec import registry + from fsspec.registry import known_implementations registry.target.clear() with pytest.raises(ValueError) as e: diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 4d93119ffa3f5..eacf4fa08545d 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -11,8 +11,7 @@ @td.skip_if_no("gcsfs") def test_read_csv_gcs(monkeypatch): - from fsspec import AbstractFileSystem - from fsspec import registry + from fsspec import AbstractFileSystem, registry registry.target.clear() # noqa # remove state @@ -37,8 +36,7 @@ def open(*args, **kwargs): @td.skip_if_no("gcsfs") def test_to_csv_gcs(monkeypatch): - from fsspec import AbstractFileSystem - from fsspec import registry + from fsspec import AbstractFileSystem, registry registry.target.clear() # noqa # remove state df1 = DataFrame( @@ -76,8 +74,7 @@ def mock_get_filepath_or_buffer(*args, **kwargs): @td.skip_if_no("gcsfs") def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): """Regression test for writing to a not-yet-existent GCS Parquet file.""" - from fsspec import AbstractFileSystem - from fsspec import registry + from fsspec import AbstractFileSystem, registry registry.target.clear() # noqa # remove state df1 = DataFrame( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0991fae39138e..29b787d39c09d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -48,10 +48,10 @@ try: import sqlalchemy - import sqlalchemy.schema - import sqlalchemy.sql.sqltypes as sqltypes from sqlalchemy.ext import declarative from sqlalchemy.orm import session as sa_session + import sqlalchemy.schema + import sqlalchemy.sql.sqltypes as sqltypes SQLALCHEMY_INSTALLED = True except ImportError: diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 896d3278cdde1..3b1ff233c5ec1 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -13,7 +13,6 @@ from pandas import DataFrame, Series import pandas._testing as tm - """ This is a common base class used for various plotting tests """ @@ -24,6 +23,7 @@ class TestPlotBase: def setup_method(self, method): import matplotlib as mpl + from pandas.plotting._matplotlib import compat mpl.rcdefaults() @@ -187,8 +187,8 @@ def _check_colors( Series used for color grouping key used for andrew_curves, parallel_coordinates, radviz test """ + from matplotlib.collections import Collection, LineCollection, PolyCollection from matplotlib.lines import Line2D - from matplotlib.collections import Collection, PolyCollection, LineCollection conv = self.colorconverter if linecolors is not None: diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 317a994bd9a32..ee43e5d7072fe 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2408,8 +2408,8 @@ def test_specified_props_kwd_plot_box(self, props, expected): assert result[expected][0].get_color() == "C1" def test_default_color_cycle(self): - import matplotlib.pyplot as plt import cycler + import matplotlib.pyplot as plt colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) @@ -2953,8 +2953,8 @@ def _check(axes): @td.skip_if_no_scipy def test_memory_leak(self): """ Check that every plot type gets properly collected. """ - import weakref import gc + import weakref results = {} for kind in plotting.PlotAccessor._all_kinds: @@ -3032,8 +3032,8 @@ def test_df_subplots_patterns_minorticks(self): @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 - import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec + import matplotlib.pyplot as plt ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) @@ -3422,9 +3422,9 @@ def test_xlabel_ylabel_dataframe_subplots( def _generate_4_axes_via_gridspec(): - import matplotlib.pyplot as plt import matplotlib as mpl import matplotlib.gridspec # noqa + import matplotlib.pyplot as plt gs = mpl.gridspec.GridSpec(2, 2) ax_tl = plt.subplot(gs[0, 0]) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index b6a6c326c3df3..34c881855d16a 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -101,7 +101,7 @@ def test_hist_layout_with_by(self): @pytest.mark.slow def test_hist_no_overlap(self): - from matplotlib.pyplot import subplot, gcf + from matplotlib.pyplot import gcf, subplot x = Series(randn(2)) y = Series(randn(2)) @@ -352,6 +352,7 @@ class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow def test_grouped_hist_legacy(self): from matplotlib.patches import Rectangle + from pandas.plotting._matplotlib.hist import _grouped_hist df = DataFrame(randn(500, 2), columns=["A", "B"]) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 75eeede472fe9..f5c1c58f3f7ed 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -131,9 +131,10 @@ def test_scatter_matrix_axis(self): @pytest.mark.slow def test_andrews_curves(self, iris): - from pandas.plotting import andrews_curves from matplotlib import cm + from pandas.plotting import andrews_curves + df = iris _check_plot_works(andrews_curves, frame=df, class_column="Name") @@ -206,9 +207,10 @@ def test_andrews_curves(self, iris): @pytest.mark.slow def test_parallel_coordinates(self, iris): - from pandas.plotting import parallel_coordinates from matplotlib import cm + from pandas.plotting import parallel_coordinates + df = iris ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name") @@ -279,9 +281,10 @@ def test_parallel_coordinates_with_sorted_labels(self): @pytest.mark.slow def test_radviz(self, iris): - from pandas.plotting import radviz from matplotlib import cm + from pandas.plotting import radviz + df = iris _check_plot_works(radviz, frame=df, class_column="Name") @@ -397,6 +400,7 @@ def test_get_standard_colors_no_appending(self): # Make sure not to add more colors so that matplotlib can cycle # correctly. from matplotlib import cm + from pandas.plotting._matplotlib.style import _get_standard_colors color_before = cm.gnuplot(range(5)) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 151bb3bed7207..cc00626e992f3 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -452,7 +452,7 @@ def test_hist_layout_with_by(self): @pytest.mark.slow def test_hist_no_overlap(self): - from matplotlib.pyplot import subplot, gcf + from matplotlib.pyplot import gcf, subplot x = Series(randn(2)) y = Series(randn(2)) @@ -827,6 +827,7 @@ def test_standard_colors(self): @pytest.mark.slow def test_standard_colors_all(self): import matplotlib.colors as colors + from pandas.plotting._matplotlib.style import _get_standard_colors # multiple colors like mediumaquamarine diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index c33443e24b268..d4d4c4190417e 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -2,7 +2,7 @@ from numpy.random import randn import pytest -from pandas._libs import join as libjoin +from pandas._libs.join import inner_join, left_outer_join import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, concat, merge @@ -48,7 +48,7 @@ def test_cython_left_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - ls, rs = libjoin.left_outer_join(left, right, max_group) + ls, rs = left_outer_join(left, right, max_group) exp_ls = left.argsort(kind="mergesort") exp_rs = right.argsort(kind="mergesort") @@ -70,7 +70,7 @@ def test_cython_right_outer_join(self): right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) max_group = 5 - rs, ls = libjoin.left_outer_join(right, left, max_group) + rs, ls = left_outer_join(right, left, max_group) exp_ls = left.argsort(kind="mergesort") exp_rs = right.argsort(kind="mergesort") @@ -116,7 +116,7 @@ def test_cython_inner_join(self): right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) max_group = 5 - ls, rs = libjoin.inner_join(left, right, max_group) + ls, rs = inner_join(left, right, max_group) exp_ls = left.argsort(kind="mergesort") exp_rs = right.argsort(kind="mergesort") diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 0b34fab7b80b1..088f8681feb99 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -11,7 +11,6 @@ from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range import pandas._testing as tm - """ Also test support for datetime64[ns] in Series / DataFrame """ @@ -166,6 +165,7 @@ def test_getitem_setitem_datetime_tz_pytz(): def test_getitem_setitem_datetime_tz_dateutil(): from dateutil.tz import tzutc + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz tz = ( diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 19caf4eccf748..4b4ef5ea046be 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -90,7 +90,7 @@ def test_with_nan(self): tm.assert_series_equal(result, expected) def test_periodindex(self): - from pandas import period_range, PeriodIndex + from pandas import PeriodIndex, period_range # array or list or dates N = 50 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 5c8a0d224c4f9..ef2bafd4ea2ad 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -195,8 +195,8 @@ def test_add_with_duplicate_index(self): tm.assert_series_equal(result, expected) def test_add_na_handling(self): - from decimal import Decimal from datetime import date + from decimal import Decimal s = Series( [Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)] diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a080bf0feaebc..6c6bdb6b1b2bd 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -6,7 +6,8 @@ from numpy.random import RandomState import pytest -from pandas._libs import algos as libalgos, groupby as libgroupby, hashtable as ht +from pandas._libs import algos as libalgos, hashtable as ht +from pandas._libs.groupby import group_var_float32, group_var_float64 from pandas.compat.numpy import np_array_datetime64_compat import pandas.util._test_decorators as td @@ -1493,7 +1494,7 @@ def test_group_var_constant(self): class TestGroupVarFloat64(GroupVarTestMixin): __test__ = True - algo = staticmethod(libgroupby.group_var_float64) + algo = staticmethod(group_var_float64) dtype = np.float64 rtol = 1e-5 @@ -1516,7 +1517,7 @@ def test_group_var_large_inputs(self): class TestGroupVarFloat32(GroupVarTestMixin): __test__ = True - algo = staticmethod(libgroupby.group_var_float32) + algo = staticmethod(group_var_float32) dtype = np.float32 rtol = 1e-2 diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index e718a6b759963..b32c5e91af295 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -90,7 +90,7 @@ def test_statsmodels(): def test_scikit_learn(df): sklearn = import_module("sklearn") # noqa - from sklearn import svm, datasets + from sklearn import datasets, svm digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.0) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 4a0212e890d3a..2300d8dd5529b 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -140,7 +140,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ) def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): # GH 32865 - values = np.arange(10) + values = np.arange(10.0) values[5] = 100.0 indexer = FixedForwardWindowIndexer(window_size=3) @@ -177,7 +177,7 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) @pytest.mark.parametrize("constructor", [Series, DataFrame]) def test_rolling_forward_skewness(constructor): - values = np.arange(10) + values = np.arange(10.0) values[5] = 100.0 indexer = FixedForwardWindowIndexer(window_size=5) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 12c314d5e9ec9..69cd1d1ba069c 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -108,7 +108,7 @@ def test_ewma_halflife_without_times(halflife_with_times): @pytest.mark.parametrize("min_periods", [0, 2]) def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): halflife = halflife_with_times - data = np.arange(10) + data = np.arange(10.0) data[::2] = np.nan df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean() diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py index f413490764124..3a8a1a3144269 100644 --- a/pandas/util/_doctools.py +++ b/pandas/util/_doctools.py @@ -53,8 +53,8 @@ def plot(self, left, right, labels=None, vertical: bool = True): vertical : bool, default True If True, use vertical layout. If False, use horizontal layout. """ - import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec + import matplotlib.pyplot as plt if not isinstance(left, list): left = [left] diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index a4a1d83177c50..bdf633839b2cd 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -120,7 +120,9 @@ def _skip_if_no_scipy() -> bool: ) -def skip_if_installed(package: str) -> Callable: +# TODO: return type, _pytest.mark.structures.MarkDecorator is not public +# https://github.com/pytest-dev/pytest/issues/7469 +def skip_if_installed(package: str): """ Skip a test if a package is installed. @@ -134,7 +136,9 @@ def skip_if_installed(package: str) -> Callable: ) -def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable: +# TODO: return type, _pytest.mark.structures.MarkDecorator is not public +# https://github.com/pytest-dev/pytest/issues/7469 +def skip_if_no(package: str, min_version: Optional[str] = None): """ Generic function to help skip tests when required packages are not present on the testing system. @@ -196,14 +200,12 @@ def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable: ) -def skip_if_np_lt( - ver_str: str, reason: Optional[str] = None, *args, **kwds -) -> Callable: +# TODO: return type, _pytest.mark.structures.MarkDecorator is not public +# https://github.com/pytest-dev/pytest/issues/7469 +def skip_if_np_lt(ver_str: str, *args, reason: Optional[str] = None): if reason is None: reason = f"NumPy {ver_str} or greater required" - return pytest.mark.skipif( - _np_version < LooseVersion(ver_str), reason=reason, *args, **kwds - ) + return pytest.mark.skipif(_np_version < LooseVersion(ver_str), *args, reason=reason) def parametrize_fixture_doc(*args): diff --git a/requirements-dev.txt b/requirements-dev.txt index 0c024d1b54637..c0dd77cd73ddc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,7 +11,7 @@ cpplint flake8<3.8.0 flake8-comprehensions>=3.1.0 flake8-rst>=0.6.0,<=0.7.0 -isort==4.3.21 +isort>=5.2.1 mypy==0.730 pycodestyle gitpython @@ -32,7 +32,7 @@ boto3 botocore>=1.11 hypothesis>=3.82 moto -pytest>=5.0.1,<6.0.0rc0 +pytest>=5.0.1 pytest-cov pytest-xdist>=1.21 pytest-asyncio diff --git a/setup.cfg b/setup.cfg index 00af7f6f1b79a..ee5725e36d193 100644 --- a/setup.cfg +++ b/setup.cfg @@ -105,7 +105,7 @@ known_dtypes = pandas.core.dtypes known_post_core = pandas.tseries,pandas.io,pandas.plotting sections = FUTURE,STDLIB,THIRDPARTY,PRE_LIBS,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER known_first_party = pandas -known_third_party = _pytest,announce,dateutil,docutils,flake8,git,hypothesis,jinja2,lxml,matplotlib,numpy,numpydoc,pkg_resources,pyarrow,pytest,pytz,requests,scipy,setuptools,sphinx,sqlalchemy,validate_docstrings,validate_unwanted_patterns,yaml,odf +known_third_party = announce,dateutil,docutils,flake8,git,hypothesis,jinja2,lxml,matplotlib,numpy,numpydoc,pkg_resources,pyarrow,pytest,pytz,requests,scipy,setuptools,sphinx,sqlalchemy,validate_docstrings,validate_unwanted_patterns,yaml,odf multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 From b4e0139c62fa3459bd42e2510b50005e150e17d7 Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 1 Aug 2020 17:26:05 -0400 Subject: [PATCH 02/10] Whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b16ca0a80c5b4..a0dc99c6bf86d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -132,7 +132,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')['b'])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`) - From e6099b719ca0d7cfe19c21b063698ab01a9098a9 Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 1 Aug 2020 18:12:18 -0400 Subject: [PATCH 03/10] Fixed whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a0dc99c6bf86d..77942f03ca735 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -132,7 +132,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')['b'])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`) +- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`) - From 0d05b4bfb294557e1d504c3c656c0cf2ba8f04a8 Mon Sep 17 00:00:00 2001 From: Richard Date: Thu, 13 Aug 2020 15:48:42 -0400 Subject: [PATCH 04/10] Removed trailing whitespace --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f057ee405c05c..19488e7bcb25d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -249,7 +249,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`) - Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`) - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`) -- +- Reshaping ^^^^^^^^^ From 35f03b57b54dd309427d818b2eef81e7794463a1 Mon Sep 17 00:00:00 2001 From: Richard Date: Fri, 14 Aug 2020 18:29:28 -0400 Subject: [PATCH 05/10] Added tests back --- pandas/tests/groupby/test_groupby.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8c51ebf89f5c0..680a02fd96c3b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2069,3 +2069,31 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): assert len(result) == 1 key = (1, 2) assert (result[key] == expected[key]).all() + + +@pytest.mark.parametrize( + "attr, value", + [ + ("axis", 1), + ("level", "a"), + ("as_index", False), + ("sort", False), + ("group_keys", False), + ("squeeze", True), + ("observed", True), + ("dropna", False), + ], +) +@pytest.mark.filterwarnings( + "ignore:The `squeeze` parameter is deprecated:FutureWarning" +) +def test_subsetting_columns_keeps_attrs(attr, value): + # GH 9959 - When subsetting columns, don't drop attributes + df = pd.DataFrame({"a": [1], "b": [2], "c": [3]}) + if attr != "axis": + df = df.set_index("a") + + expected = df.groupby("a", **{attr: value}) + + result = expected[["b"]] + assert getattr(result, attr) == getattr(expected, attr) From 161bbf05bff760b9d98e51f38f8bd55a76b13fb7 Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 16 Aug 2020 10:41:39 -0400 Subject: [PATCH 06/10] Added propagating attributes in the ndim=1 case --- pandas/core/groupby/generic.py | 10 +++++++++- pandas/tests/groupby/test_groupby.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1978fef2a6c48..5475fea40cc74 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1685,7 +1685,15 @@ def _gotitem(self, key, ndim: int, subset=None): if subset is None: subset = self.obj[key] return SeriesGroupBy( - subset, selection=key, grouper=self.grouper, observed=self.observed + subset, + level=self.level, + grouper=self.grouper, + selection=key, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + dropna=self.dropna, ) raise AssertionError("invalid ndim for _gotitem") diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 680a02fd96c3b..604dad6d88197 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2097,3 +2097,8 @@ def test_subsetting_columns_keeps_attrs(attr, value): result = expected[["b"]] assert getattr(result, attr) == getattr(expected, attr) + + if attr in ("axis", "as_index"): + pytest.skip("GH 35443: Attribute currently not passed on to series") + result = expected["b"] + assert getattr(result, attr) == getattr(expected, attr) From fdbd5c2c66546d72950f79389df5e7c7b1e794be Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 22 Aug 2020 12:24:35 -0400 Subject: [PATCH 07/10] Changed some tests from skip to xfail --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 604dad6d88197..bf1c58f771cd1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2099,6 +2099,6 @@ def test_subsetting_columns_keeps_attrs(attr, value): assert getattr(result, attr) == getattr(expected, attr) if attr in ("axis", "as_index"): - pytest.skip("GH 35443: Attribute currently not passed on to series") + pytest.xfail("GH 35443: Attribute currently not passed on to series") result = expected["b"] assert getattr(result, attr) == getattr(expected, attr) From 216e11b6403b1523a60864bedc068a90207220b8 Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 22 Aug 2020 18:37:12 -0400 Subject: [PATCH 08/10] Reparametrized tests --- pandas/tests/groupby/test_groupby.py | 51 ++++++++++++++++++---------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bf1c58f771cd1..148e330ce37c9 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2072,33 +2072,50 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): @pytest.mark.parametrize( - "attr, value", + "klass, attr, value", [ - ("axis", 1), - ("level", "a"), - ("as_index", False), - ("sort", False), - ("group_keys", False), - ("squeeze", True), - ("observed", True), - ("dropna", False), + (DataFrame, "axis", 1), + (DataFrame, "level", "a"), + (DataFrame, "as_index", False), + (DataFrame, "sort", False), + (DataFrame, "group_keys", False), + (DataFrame, "squeeze", True), + (DataFrame, "observed", True), + (DataFrame, "dropna", False), + pytest.param( + Series, + "axis", + 1, + marks=pytest.mark.xfail( + reason="GH 35443: Attribute currently not passed on to series" + ), + ), + (Series, "level", "a"), + pytest.param( + Series, + "as_index", + False, + marks=pytest.mark.xfail( + reason="GH 35443: Returns a DataFrameGroupBy", + strict=False, + ), + ), + (Series, "sort", False), + (Series, "group_keys", False), + (Series, "squeeze", True), + (Series, "observed", True), + (Series, "dropna", False), ], ) @pytest.mark.filterwarnings( "ignore:The `squeeze` parameter is deprecated:FutureWarning" ) -def test_subsetting_columns_keeps_attrs(attr, value): +def test_subsetting_columns_keeps_attrs(klass, attr, value): # GH 9959 - When subsetting columns, don't drop attributes df = pd.DataFrame({"a": [1], "b": [2], "c": [3]}) if attr != "axis": df = df.set_index("a") expected = df.groupby("a", **{attr: value}) - - result = expected[["b"]] - assert getattr(result, attr) == getattr(expected, attr) - - if attr in ("axis", "as_index"): - pytest.xfail("GH 35443: Attribute currently not passed on to series") - result = expected["b"] + result = expected[["b"]] if klass is DataFrame else expected["b"] assert getattr(result, attr) == getattr(expected, attr) From e51e730455a105ee425f839642eb5082a4178233 Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 22 Aug 2020 18:56:32 -0400 Subject: [PATCH 09/10] black --- pandas/tests/groupby/test_groupby.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 148e330ce37c9..884f00f91184e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2096,8 +2096,7 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): "as_index", False, marks=pytest.mark.xfail( - reason="GH 35443: Returns a DataFrameGroupBy", - strict=False, + reason="GH 35443: Returns a DataFrameGroupBy", strict=False, ), ), (Series, "sort", False), From c4515b3f3401ac053c381daf776b469543562503 Mon Sep 17 00:00:00 2001 From: Richard Date: Mon, 31 Aug 2020 16:15:35 -0400 Subject: [PATCH 10/10] Removed xfail for as_index case --- pandas/tests/groupby/test_groupby.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 884f00f91184e..c743058c988b4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2091,14 +2091,7 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): ), ), (Series, "level", "a"), - pytest.param( - Series, - "as_index", - False, - marks=pytest.mark.xfail( - reason="GH 35443: Returns a DataFrameGroupBy", strict=False, - ), - ), + (Series, "as_index", False), (Series, "sort", False), (Series, "group_keys", False), (Series, "squeeze", True),