From 1ef6c887beb0c5ec2ad595907172d0db361886e0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 12:50:08 -0700 Subject: [PATCH 01/19] DEPS: Bump optional dependencies --- ci/deps/actions-310.yaml | 54 +++++------ ci/deps/actions-311-downstream_compat.yaml | 54 +++++------ ci/deps/actions-311-numpydev.yaml | 2 +- ci/deps/actions-311-pyarrownightly.yaml | 2 +- ci/deps/actions-311.yaml | 52 +++++------ ci/deps/actions-39-minimum_versions.yaml | 54 +++++------ ci/deps/actions-39.yaml | 54 +++++------ ci/deps/actions-pypy-39.yaml | 2 +- ci/deps/circle-310-arm64.yaml | 50 +++++----- ci/meta.yaml | 2 +- doc/source/getting_started/install.rst | 56 +++++------ environment.yml | 46 +++++----- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/compat/_optional.py | 54 +++++------ pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/string_.py | 2 +- pandas/core/arrays/string_arrow.py | 2 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/frame.py | 2 +- pandas/io/orc.py | 4 +- .../tests/arrays/string_/test_string_arrow.py | 4 +- pandas/tests/extension/test_arrow.py | 2 +- pyproject.toml | 92 +++++++++---------- requirements-dev.txt | 46 +++++----- scripts/generate_pip_deps_from_conda.py | 2 +- 25 files changed, 322 insertions(+), 322 deletions(-) diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 180d425b07d82..cf4d05e5ff0a7 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -24,39 +24,39 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4>=4.11.1 - - blosc>=1.21.0 - - bottleneck>=1.3.4 - - fastparquet>=0.8.1 - - fsspec>=2022.05.0 + - beautifulsoup4>=4.11.2 + - blosc>=1.21.2 + - bottleneck>=1.3.6 + - fastparquet>=2022.12.0 + - fsspec>=2022.11.0 - html5lib>=1.1 - hypothesis>=6.46.1 - - gcsfs>=2022.05.0 + - gcsfs>=2022.11.0 - jinja2>=3.1.2 - - lxml>=4.8.0 - - matplotlib>=3.6.1, <3.8 - - numba>=0.55.2 - - numexpr>=2.8.0 + - lxml>=4.9.2 + - matplotlib>=3.6.3, <3.8 + - numba>=0.56.4 + - numexpr>=2.8.4 - odfpy>=1.4.1 - - qtpy>=2.2.0 - - openpyxl>=3.0.10 - - pandas-gbq>=0.17.5 - - psycopg2>=2.9.3 - - pyarrow>=7.0.0 + - qtpy>=2.3.0 + - openpyxl>=3.1.0 + - pandas-gbq>=0.19.0 + - psycopg2>=2.9.5 + - pyarrow>=10.0.1 - pymysql>=1.0.2 - - pyreadstat>=1.1.5 - - pytables>=3.7.0 + - pyreadstat>=1.2.0 + - pytables>=3.8.0 - python-calamine>=0.1.6 - - pyxlsb>=1.0.9 - - s3fs>=2022.05.0 - - scipy>=1.8.1 - - sqlalchemy>=1.4.36 - - tabulate>=0.8.10 - - xarray>=2022.03.0 + - pyxlsb>=1.0.10 + - s3fs>=2022.11.0 + - scipy>=1.10.0 + - sqlalchemy>=2.0.0 + - tabulate>=0.9.0 + - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.3 - - zstandard>=0.17.0 + - xlsxwriter>=3.0.4 + - zstandard>=0.19.0 - pip: - - pyqt5>=5.15.6 - - tzdata>=2022.1 + - pyqt5>=5.15.8 + - tzdata>=2022.7 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index c8a5d8cfb7640..44cfa2a1e6c43 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -25,38 +25,38 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4>=4.11.1 - - blosc>=1.21.0 - - bottleneck>=1.3.4 - - fastparquet>=0.8.1 - - fsspec>=2022.05.0 + - beautifulsoup4>=4.11.2 + - blosc>=1.21.2 + - bottleneck>=1.3.6 + - fastparquet>=2022.12.0 + - fsspec>=2022.11.0 - html5lib>=1.1 - hypothesis>=6.46.1 - - gcsfs>=2022.05.0 + - gcsfs>=2022.11.0 - jinja2>=3.1.2 - - lxml>=4.8.0 - - matplotlib>=3.6.1, <3.8 - - numba>=0.55.2 - - numexpr>=2.8.0 + - lxml>=4.9.2 + - matplotlib>=3.6.3, <3.8 + - numba>=0.56.4 + - numexpr>=2.8.4 - odfpy>=1.4.1 - - qtpy>=2.2.0 - - openpyxl>=3.0.10 - - pandas-gbq>=0.17.5 - - psycopg2>=2.9.3 - - pyarrow>=7.0.0 + - qtpy>=2.3.0 + - openpyxl>=3.1.0 + - pandas-gbq>=0.19.0 + - psycopg2>=2.9.5 + - pyarrow>=10.0.1 - pymysql>=1.0.2 - - pyreadstat>=1.1.5 - - pytables>=3.7.0 + - pyreadstat>=1.2.0 + - pytables>=3.8.0 - python-calamine>=0.1.6 - - pyxlsb>=1.0.9 - - s3fs>=2022.05.0 - - scipy>=1.8.1 - - sqlalchemy>=1.4.36 - - tabulate>=0.8.10 - - xarray>=2022.03.0 + - pyxlsb>=1.0.10 + - s3fs>=2022.11.0 + - scipy>=1.10.0 + - sqlalchemy>=2.0.0 + - tabulate>=0.9.0 + - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.3 - - zstandard>=0.17.0 + - xlsxwriter>=3.0.4 + - zstandard>=0.19.0 # downstream packages - botocore @@ -73,5 +73,5 @@ dependencies: - py - pip: - dataframe-api-compat>=0.1.7 - - pyqt5>=5.15.6 - - tzdata>=2022.1 + - pyqt5>=5.15.8 + - tzdata>=2022.7 diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml index 9795a1fb39c6f..e6c268a93ad6a 100644 --- a/ci/deps/actions-311-numpydev.yaml +++ b/ci/deps/actions-311-numpydev.yaml @@ -30,4 +30,4 @@ dependencies: - "--pre" - "numpy" - "scipy" - - "tzdata>=2022.1" + - "tzdata>=2022.7" diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml index 1a770d74043bf..4f1afb1de970e 100644 --- a/ci/deps/actions-311-pyarrownightly.yaml +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -24,7 +24,7 @@ dependencies: - pip - pip: - - "tzdata>=2022.1" + - "tzdata>=2022.7" - "--extra-index-url https://pypi.fury.io/arrow-nightlies/" - "--prefer-binary" - "--pre" diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 3d1df9e3bcd59..e1e5d7bcafff6 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -24,39 +24,39 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4>=4.11.1 - - blosc>=1.21.0 - - bottleneck>=1.3.4 - - fastparquet>=0.8.1 - - fsspec>=2022.05.0 + - beautifulsoup4>=4.11.2 + - blosc>=1.21.2 + - bottleneck>=1.3.6 + - fastparquet>=2022.12.0 + - fsspec>=2022.11.0 - html5lib>=1.1 - hypothesis>=6.46.1 - - gcsfs>=2022.05.0 + - gcsfs>=2022.11.0 - jinja2>=3.1.2 - - lxml>=4.8.0 - - matplotlib>=3.6.1, <3.8 - - numba>=0.55.2 - - numexpr>=2.8.0 + - lxml>=4.9.2 + - matplotlib>=3.6.3, <3.8 + - numba>=0.56.4 + - numexpr>=2.8.4 - odfpy>=1.4.1 - - qtpy>=2.2.0 - - openpyxl>=3.0.10 - - pandas-gbq>=0.17.5 - - psycopg2>=2.9.3 - - pyarrow>=7.0.0 + - qtpy>=2.3.0 + - openpyxl>=3.1.0 + - pandas-gbq>=0.19.0 + - psycopg2>=2.9.5 + - pyarrow>=10.0.1 - pymysql>=1.0.2 - - pyreadstat>=1.1.5 + - pyreadstat>=1.2.0 # - pytables>=3.7.0, 3.8.0 is first version that supports 3.11 - python-calamine>=0.1.6 - - pyxlsb>=1.0.9 - - s3fs>=2022.05.0 - - scipy>=1.8.1 - - sqlalchemy>=1.4.36 - - tabulate>=0.8.10 - - xarray>=2022.03.0 + - pyxlsb>=1.0.10 + - s3fs>=2022.11.0 + - scipy>=1.10.0 + - sqlalchemy>=2.0.0 + - tabulate>=0.9.0 + - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.3 - - zstandard>=0.17.0 + - xlsxwriter>=3.0.4 + - zstandard>=0.19.0 - pip: - - pyqt5>=5.15.6 - - tzdata>=2022.1 + - pyqt5>=5.15.8 + - tzdata>=2022.7 diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index 691bdf25d4d96..b8155620499af 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -26,40 +26,40 @@ dependencies: - pytz=2020.1 # optional dependencies - - beautifulsoup4=4.11.1 - - blosc=1.21.0 - - bottleneck=1.3.4 - - fastparquet=0.8.1 - - fsspec=2022.05.0 + - beautifulsoup4=4.11.2 + - blosc=1.21.2 + - bottleneck=1.3.6 + - fastparquet=2022.12.0 + - fsspec=2022.11.0 - html5lib=1.1 - hypothesis=6.46.1 - - gcsfs=2022.05.0 + - gcsfs=2022.11.0 - jinja2=3.1.2 - - lxml=4.8.0 - - matplotlib=3.6.1 - - numba=0.55.2 - - numexpr=2.8.0 + - lxml=4.9.2 + - matplotlib=3.6.3 + - numba=0.56.4 + - numexpr=2.8.4 - odfpy=1.4.1 - - qtpy=2.2.0 - - openpyxl=3.0.10 - - pandas-gbq=0.17.5 - - psycopg2=2.9.3 - - pyarrow=7.0.0 + - qtpy=2.3.0 + - openpyxl=3.1.0 + - pandas-gbq=0.19.0 + - psycopg2=2.9.5 + - pyarrow=10.0.1 - pymysql=1.0.2 - - pyreadstat=1.1.5 - - pytables=3.7.0 + - pyreadstat=1.2.0 + - pytables=3.8.0 - python-calamine=0.1.6 - - pyxlsb=1.0.9 - - s3fs=2022.05.0 - - scipy=1.8.1 - - sqlalchemy=1.4.36 - - tabulate=0.8.10 - - xarray=2022.03.0 + - pyxlsb=1.0.10 + - s3fs=2022.11.0 + - scipy=1.10.0 + - sqlalchemy=2.0.0 + - tabulate=0.9.0 + - xarray=2022.12.0 - xlrd=2.0.1 - - xlsxwriter=3.0.3 - - zstandard=0.17.0 + - xlsxwriter=3.0.4 + - zstandard=0.19.0 - pip: - dataframe-api-compat==0.1.7 - - pyqt5==5.15.6 - - tzdata==2022.1 + - pyqt5==5.15.8 + - tzdata==2022.7 diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 199ce4dea1ac0..cda7f6171f0e1 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -24,39 +24,39 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4>=4.11.1 - - blosc>=1.21.0 - - bottleneck>=1.3.4 - - fastparquet>=0.8.1 - - fsspec>=2022.05.0 + - beautifulsoup4>=4.11.2 + - blosc>=1.21.2 + - bottleneck>=1.3.6 + - fastparquet>=2022.12.0 + - fsspec>=2022.11.0 - html5lib>=1.1 - hypothesis>=6.46.1 - - gcsfs>=2022.05.0 + - gcsfs>=2022.11.0 - jinja2>=3.1.2 - - lxml>=4.8.0 - - matplotlib>=3.6.1, <3.8 - - numba>=0.55.2 - - numexpr>=2.8.0 + - lxml>=4.9.2 + - matplotlib>=3.6.3, <3.8 + - numba>=0.56.4 + - numexpr>=2.8.4 - odfpy>=1.4.1 - - qtpy>=2.2.0 - - openpyxl>=3.0.10 - - pandas-gbq>=0.17.5 - - psycopg2>=2.9.3 - - pyarrow>=7.0.0 + - qtpy>=2.3.0 + - openpyxl>=3.1.0 + - pandas-gbq>=0.19.0 + - psycopg2>=2.9.5 + - pyarrow>=10.0.1 - pymysql>=1.0.2 - - pyreadstat>=1.1.5 - - pytables>=3.7.0 + - pyreadstat>=1.2.0 + - pytables>=3.8.0 - python-calamine>=0.1.6 - - pyxlsb>=1.0.9 - - s3fs>=2022.05.0 - - scipy>=1.8.1 - - sqlalchemy>=1.4.36 - - tabulate>=0.8.10 - - xarray>=2022.03.0 + - pyxlsb>=1.0.10 + - s3fs>=2022.11.0 + - scipy>=1.10.0 + - sqlalchemy>=2.0.0 + - tabulate>=0.9.0 + - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.3 - - zstandard>=0.17.0 + - xlsxwriter>=3.0.4 + - zstandard>=0.19.0 - pip: - - pyqt5>=5.15.6 - - tzdata>=2022.1 + - pyqt5>=5.15.8 + - tzdata>=2022.7 diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml index a2f4d6395783a..86c155e13f7c3 100644 --- a/ci/deps/actions-pypy-39.yaml +++ b/ci/deps/actions-pypy-39.yaml @@ -25,4 +25,4 @@ dependencies: - python-dateutil - pytz - pip: - - tzdata>=2022.1 + - tzdata>=2022.7 diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml index c9f46b98273b3..3899db633f0cf 100644 --- a/ci/deps/circle-310-arm64.yaml +++ b/ci/deps/circle-310-arm64.yaml @@ -24,36 +24,36 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4>=4.11.1 - - blosc>=1.21.0 - - bottleneck>=1.3.4 - - fastparquet>=0.8.1 - - fsspec>=2022.05.0 + - beautifulsoup4>=4.11.2 + - blosc>=1.21.2 + - bottleneck>=1.3.6 + - fastparquet>=2022.12.0 + - fsspec>=2022.11.0 - html5lib>=1.1 - hypothesis>=6.46.1 - - gcsfs>=2022.05.0 + - gcsfs>=2022.11.0 - jinja2>=3.1.2 - - lxml>=4.8.0 - - matplotlib>=3.6.1, <3.8 + - lxml>=4.9.2 + - matplotlib>=3.6.3, <3.8 # test_numba_vs_cython segfaults with numba 0.57 - - numba>=0.55.2, <0.57.0 - - numexpr>=2.8.0 + - numba>=0.56.4, <0.57.0 + - numexpr>=2.8.4 - odfpy>=1.4.1 - - qtpy>=2.2.0 - - openpyxl>=3.0.10 - - pandas-gbq>=0.17.5 - - psycopg2>=2.9.3 - - pyarrow>=7.0.0 + - qtpy>=2.3.0 + - openpyxl>=3.1.0 + - pandas-gbq>=0.19.0 + - psycopg2>=2.9.5 + - pyarrow>=10.0.1 - pymysql>=1.0.2 - # - pyreadstat>=1.1.5 not available on ARM - - pytables>=3.7.0 + # - pyreadstat>=1.2.0 not available on ARM + - pytables>=3.8.0 - python-calamine>=0.1.6 - - pyxlsb>=1.0.9 - - s3fs>=2022.05.0 - - scipy>=1.8.1 - - sqlalchemy>=1.4.36 - - tabulate>=0.8.10 - - xarray>=2022.03.0 + - pyxlsb>=1.0.10 + - s3fs>=2022.11.0 + - scipy>=1.10.0 + - sqlalchemy>=2.0.0 + - tabulate>=0.9.0 + - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.3 - - zstandard>=0.17.0 + - xlsxwriter>=3.0.4 + - zstandard>=0.19.0 diff --git a/ci/meta.yaml b/ci/meta.yaml index 77f536a18a204..c02539e317b1a 100644 --- a/ci/meta.yaml +++ b/ci/meta.yaml @@ -38,7 +38,7 @@ requirements: - numpy >=1.23.2 # [py>=311] - python-dateutil >=2.8.2 - pytz >=2020.1 - - python-tzdata >=2022.1 + - python-tzdata >=2022.7 test: imports: diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 4a0ad4ae658c3..251ca6c317d4a 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -206,7 +206,7 @@ Package Minimum support `NumPy `__ 1.22.4 `python-dateutil `__ 2.8.2 `pytz `__ 2020.1 -`tzdata `__ 2022.1 +`tzdata `__ 2022.7 ================================================================ ========================== .. _install.optional_dependencies: @@ -239,9 +239,9 @@ Installable with ``pip install "pandas[performance]"`` ===================================================== ================== ================== =================================================================================================================================================================================== Dependency Minimum Version pip extra Notes ===================================================== ================== ================== =================================================================================================================================================================================== -`numexpr `__ 2.8.0 performance Accelerates certain numerical operations by using multiple cores as well as smart chunking and caching to achieve large speedups -`bottleneck `__ 1.3.4 performance Accelerates certain types of ``nan`` by using specialized cython routines to achieve large speedup. -`numba `__ 0.55.2 performance Alternative execution engine for operations that accept ``engine="numba"`` using a JIT compiler that translates Python functions to optimized machine code using the LLVM compiler. +`numexpr `__ 2.8.4 performance Accelerates certain numerical operations by using multiple cores as well as smart chunking and caching to achieve large speedups +`bottleneck `__ 1.3.6 performance Accelerates certain types of ``nan`` by using specialized cython routines to achieve large speedup. +`numba `__ 0.56.4 performance Alternative execution engine for operations that accept ``engine="numba"`` using a JIT compiler that translates Python functions to optimized machine code using the LLVM compiler. ===================================================== ================== ================== =================================================================================================================================================================================== Visualization @@ -252,9 +252,9 @@ Installable with ``pip install "pandas[plot, output-formatting]"``. ========================= ================== ================== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== ================== ============================================================= -matplotlib 3.6.1 plot Plotting library +matplotlib 3.6.3 plot Plotting library Jinja2 3.1.2 output-formatting Conditional formatting with DataFrame.style -tabulate 0.8.10 output-formatting Printing in Markdown-friendly format (see `tabulate`_) +tabulate 0.9.0 output-formatting Printing in Markdown-friendly format (see `tabulate`_) ========================= ================== ================== ============================================================= Computation @@ -265,8 +265,8 @@ Installable with ``pip install "pandas[computation]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -SciPy 1.8.1 computation Miscellaneous statistical functions -xarray 2022.03.0 computation pandas-like API for N-dimensional data +SciPy 1.10.0 computation Miscellaneous statistical functions +xarray 2022.12.0 computation pandas-like API for N-dimensional data ========================= ================== =============== ============================================================= Excel files @@ -278,9 +278,9 @@ Installable with ``pip install "pandas[excel]"``. Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= xlrd 2.0.1 excel Reading Excel -xlsxwriter 3.0.3 excel Writing Excel -openpyxl 3.0.10 excel Reading / writing for xlsx files -pyxlsb 1.0.9 excel Reading for xlsb files +xlsxwriter 3.0.4 excel Writing Excel +openpyxl 3.1.0 excel Reading / writing for xlsx files +pyxlsb 1.0.10 excel Reading for xlsb files python-calamine 0.1.6 excel Reading for xls/xlsx/xlsb/ods files ========================= ================== =============== ============================================================= @@ -292,9 +292,9 @@ Installable with ``pip install "pandas[html]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -BeautifulSoup4 4.11.1 html HTML parser for read_html +BeautifulSoup4 4.11.2 html HTML parser for read_html html5lib 1.1 html HTML parser for read_html -lxml 4.8.0 html HTML parser for read_html +lxml 4.9.2 html HTML parser for read_html ========================= ================== =============== ============================================================= One of the following combinations of libraries is needed to use the @@ -329,7 +329,7 @@ Installable with ``pip install "pandas[xml]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -lxml 4.8.0 xml XML parser for read_xml and tree builder for to_xml +lxml 4.9.2 xml XML parser for read_xml and tree builder for to_xml ========================= ================== =============== ============================================================= SQL databases @@ -340,10 +340,10 @@ Installable with ``pip install "pandas[postgresql, mysql, sql-other]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -SQLAlchemy 1.4.36 postgresql, SQL support for databases other than sqlite +SQLAlchemy 2.0.0 postgresql, SQL support for databases other than sqlite mysql, sql-other -psycopg2 2.9.3 postgresql PostgreSQL engine for sqlalchemy +psycopg2 2.9.5 postgresql PostgreSQL engine for sqlalchemy pymysql 1.0.2 mysql MySQL engine for sqlalchemy ========================= ================== =============== ============================================================= @@ -355,12 +355,12 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"`` ========================= ================== ================ ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== ================ ============================================================= -PyTables 3.7.0 hdf5 HDF5-based reading / writing -blosc 1.21.0 hdf5 Compression for HDF5; only available on ``conda`` +PyTables 3.8.0 hdf5 HDF5-based reading / writing +blosc 1.21.2 hdf5 Compression for HDF5; only available on ``conda`` zlib hdf5 Compression for HDF5 -fastparquet 0.8.1 - Parquet reading / writing (pyarrow is default) -pyarrow 7.0.0 parquet, feather Parquet, ORC, and feather reading / writing -pyreadstat 1.1.5 spss SPSS files (.sav) reading +fastparquet 2022.12.0 - Parquet reading / writing (pyarrow is default) +pyarrow 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing +pyreadstat 1.2.0 spss SPSS files (.sav) reading odfpy 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing ========================= ================== ================ ============================================================= @@ -380,11 +380,11 @@ Installable with ``pip install "pandas[fss, aws, gcp]"`` ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -fsspec 2022.05.0 fss, gcp, aws Handling files aside from simple local and HTTP (required +fsspec 2022.11.0 fss, gcp, aws Handling files aside from simple local and HTTP (required dependency of s3fs, gcsfs). -gcsfs 2022.05.0 gcp Google Cloud Storage access -pandas-gbq 0.17.5 gcp Google Big Query access -s3fs 2022.05.0 aws Amazon S3 access +gcsfs 2022.11.0 gcp Google Cloud Storage access +pandas-gbq 0.19.0 gcp Google Big Query access +s3fs 2022.11.0 aws Amazon S3 access ========================= ================== =============== ============================================================= Clipboard @@ -395,8 +395,8 @@ Installable with ``pip install "pandas[clipboard]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -PyQt4/PyQt5 5.15.6 clipboard Clipboard I/O -qtpy 2.2.0 clipboard Clipboard I/O +PyQt4/PyQt5 5.15.8 clipboard Clipboard I/O +qtpy 2.3.0 clipboard Clipboard I/O ========================= ================== =============== ============================================================= .. note:: @@ -413,7 +413,7 @@ Installable with ``pip install "pandas[compression]"`` ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -Zstandard 0.17.0 compression Zstandard compression +Zstandard 0.19.0 compression Zstandard compression ========================= ================== =============== ============================================================= Consortium Standard diff --git a/environment.yml b/environment.yml index a9648f3298198..1b8756aec89e4 100644 --- a/environment.yml +++ b/environment.yml @@ -25,38 +25,38 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4>=4.11.1 + - beautifulsoup4>=4.11.2 - blosc - - bottleneck>=1.3.4 - - fastparquet>=0.8.1 - - fsspec>=2022.05.0 + - bottleneck>=1.3.6 + - fastparquet>=2022.12.0 + - fsspec>=2022.11.0 - html5lib>=1.1 - hypothesis>=6.46.1 - - gcsfs>=2022.05.0 + - gcsfs>=2022.11.0 - ipython - jinja2>=3.1.2 - - lxml>=4.8.0 - - matplotlib>=3.6.1, <3.8 - - numba>=0.55.2 - - numexpr>=2.8.0 - - openpyxl>=3.0.10 + - lxml>=4.9.2 + - matplotlib>=3.6.3, <3.8 + - numba>=0.56.4 + - numexpr>=2.8.4 + - openpyxl>=3.1.0 - odfpy>=1.4.1 - py - - psycopg2>=2.9.3 - - pyarrow>=7.0.0 + - psycopg2>=2.9.5 + - pyarrow>=10.0.1 - pymysql>=1.0.2 - - pyreadstat>=1.1.5 - - pytables>=3.7.0 + - pyreadstat>=1.2.0 + - pytables>=3.8.0 - python-calamine>=0.1.6 - - pyxlsb>=1.0.9 - - s3fs>=2022.05.0 - - scipy>=1.8.1 - - sqlalchemy>=1.4.36 - - tabulate>=0.8.10 - - xarray>=2022.03.0 + - pyxlsb>=1.0.10 + - s3fs>=2022.11.0 + - scipy>=1.10.0 + - sqlalchemy>=2.0.0 + - tabulate>=0.9.0 + - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.3 - - zstandard>=0.17.0 + - xlsxwriter>=3.0.4 + - zstandard>=0.19.0 # downstream packages - dask-core @@ -117,4 +117,4 @@ dependencies: - dataframe-api-compat>=0.1.7 - sphinx-toggleprompt # conda-forge version has stricter pins on jinja2 - typing_extensions; python_version<"3.11" - - tzdata>=2022.1 + - tzdata>=2022.7 diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 4c4e3dfa4bf76..10e5790dd1c35 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -67,7 +67,7 @@ cdef bint is_utc_zoneinfo(tzinfo tz): return False # Warn if tzdata is too old, even if there is a system tzdata to alert # users about the mismatch between local/system tzdata - import_optional_dependency("tzdata", errors="warn", min_version="2022.1") + import_optional_dependency("tzdata", errors="warn", min_version="2022.7") return tz is utc_zoneinfo diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index fa0e9e974ea39..ca2e362149248 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -15,42 +15,42 @@ # Update install.rst & setup.cfg when updating versions! VERSIONS = { - "bs4": "4.11.1", - "blosc": "1.21.0", - "bottleneck": "1.3.4", + "bs4": "4.11.2", + "blosc": "1.21.2", + "bottleneck": "1.3.6", "dataframe-api-compat": "0.1.7", - "fastparquet": "0.8.1", - "fsspec": "2022.05.0", + "fastparquet": "2022.12.0", + "fsspec": "2022.11.0", "html5lib": "1.1", "hypothesis": "6.46.1", - "gcsfs": "2022.05.0", + "gcsfs": "2022.11.0", "jinja2": "3.1.2", - "lxml.etree": "4.8.0", - "matplotlib": "3.6.1", - "numba": "0.55.2", - "numexpr": "2.8.0", + "lxml.etree": "4.9.2", + "matplotlib": "3.6.3", + "numba": "0.56.4", + "numexpr": "2.8.4", "odfpy": "1.4.1", - "openpyxl": "3.0.10", - "pandas_gbq": "0.17.5", - "psycopg2": "2.9.3", # (dt dec pq3 ext lo64) + "openpyxl": "3.1.0", + "pandas_gbq": "0.19.0", + "psycopg2": "2.9.5", # (dt dec pq3 ext lo64) "pymysql": "1.0.2", - "pyarrow": "7.0.0", - "pyreadstat": "1.1.5", + "pyarrow": "10.0.1", + "pyreadstat": "1.2.0", "pytest": "7.3.2", "python-calamine": "0.1.6", - "pyxlsb": "1.0.9", - "s3fs": "2022.05.0", - "scipy": "1.8.1", - "sqlalchemy": "1.4.36", - "tables": "3.7.0", - "tabulate": "0.8.10", - "xarray": "2022.03.0", + "pyxlsb": "1.0.10", + "s3fs": "2022.11.0", + "scipy": "1.10.0", + "sqlalchemy": "2.0.0", + "tables": "3.8.0", + "tabulate": "0.9.0", + "xarray": "2022.12.0", "xlrd": "2.0.1", - "xlsxwriter": "3.0.3", - "zstandard": "0.17.0", - "tzdata": "2022.1", - "qtpy": "2.2.0", - "pyqt5": "5.15.6", + "xlsxwriter": "3.0.4", + "zstandard": "0.19.0", + "tzdata": "2022.7", + "qtpy": "2.3.0", + "pyqt5": "5.15.8", } # A mapping from import name to package name (on PyPI) for packages where diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index c91f892936640..00b0966c26f02 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -256,7 +256,7 @@ class ArrowExtensionArray( def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: if pa_version_under7p0: - msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray." + msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) if isinstance(values, pa.Array): self._pa_array = pa.chunked_array([values]) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 693ebad0ca16f..61a8f9006e64f 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -128,7 +128,7 @@ def __init__(self, storage=None) -> None: ) if storage in ("pyarrow", "pyarrow_numpy") and pa_version_under7p0: raise ImportError( - "pyarrow>=7.0.0 is required for PyArrow backed StringArray." + "pyarrow>=10.0.1 is required for PyArrow backed StringArray." ) self.storage = storage diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 24b99b5d4852e..3f4c1d01ef734 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -67,7 +67,7 @@ def _chk_pyarrow_available() -> None: if pa_version_under7p0: - msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray." + msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9f85f82df666f..60a0e47f3c4c2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2093,7 +2093,7 @@ class ArrowDtype(StorageExtensionDtype): def __init__(self, pyarrow_dtype: pa.DataType) -> None: super().__init__("pyarrow") if pa_version_under7p0: - raise ImportError("pyarrow>=7.0.0 is required for ArrowDtype") + raise ImportError("pyarrow>=10.0.1 is required for ArrowDtype") if not isinstance(pyarrow_dtype, pa.DataType): raise ValueError( f"pyarrow_dtype ({pyarrow_dtype}) must be an instance " diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 09c43822e11e4..483d4281479a5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3073,7 +3073,7 @@ def to_orc( (e.g. via builtin open function). If path is None, a bytes object is returned. engine : {'pyarrow'}, default 'pyarrow' - ORC library to use. Pyarrow must be >= 7.0.0. + ORC library to use. index : bool, optional If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 774f9d797b011..d3aee057d6275 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -168,7 +168,7 @@ def to_orc( (e.g. via builtin open function). If path is None, a bytes object is returned. engine : str, default 'pyarrow' - ORC library to use. Pyarrow must be >= 7.0.0. + ORC library to use. index : bool, optional If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. @@ -237,7 +237,7 @@ def to_orc( if engine != "pyarrow": raise ValueError("engine must be 'pyarrow'") - engine = import_optional_dependency(engine, min_version="7.0.0") + engine = import_optional_dependency(engine, min_version="10.0.1") pa = import_optional_dependency("pyarrow") orc = import_optional_dependency("pyarrow.orc") diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index c1d424f12bfc4..0ed629e339973 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -19,7 +19,7 @@ skip_if_no_pyarrow = pytest.mark.skipif( pa_version_under7p0, - reason="pyarrow>=7.0.0 is required for PyArrow backed StringArray", + reason="pyarrow>=10.0.1 is required for PyArrow backed StringArray", ) @@ -161,7 +161,7 @@ def test_from_sequence_wrong_dtype_raises(): reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): - msg = re.escape("pyarrow>=7.0.0 is required for PyArrow backed") + msg = re.escape("pyarrow>=10.0.1 is required for PyArrow backed") with pytest.raises(ImportError, match=msg): StringDtype(storage="pyarrow") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 41312f45838a9..00cf331c93f29 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -63,7 +63,7 @@ ) from pandas.tests.extension import base -pa = pytest.importorskip("pyarrow", minversion="7.0.0") +pa = pytest.importorskip("pyarrow", minversion="10.0.1") from pandas.core.arrays.arrow.array import ArrowExtensionArray from pandas.core.arrays.arrow.extension_types import ArrowPeriodType diff --git a/pyproject.toml b/pyproject.toml index a5aaa72289209..0ae55c0f0dbec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "numpy>=1.26.0,<2; python_version>='3.12'", "python-dateutil>=2.8.2", "pytz>=2020.1", - "tzdata>=2022.1" + "tzdata>=2022.7" ] classifiers = [ 'Development Status :: 5 - Production/Stable', @@ -62,66 +62,66 @@ matplotlib = "pandas:plotting._matplotlib" [project.optional-dependencies] test = ['hypothesis>=6.46.1', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0'] -performance = ['bottleneck>=1.3.4', 'numba>=0.55.2', 'numexpr>=2.8.0'] -computation = ['scipy>=1.8.1', 'xarray>=2022.03.0'] -fss = ['fsspec>=2022.05.0'] -aws = ['s3fs>=2022.05.0'] -gcp = ['gcsfs>=2022.05.0', 'pandas-gbq>=0.17.5'] -excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.10', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.9', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.3'] -parquet = ['pyarrow>=7.0.0'] -feather = ['pyarrow>=7.0.0'] +performance = ['bottleneck>=1.3.6', 'numba>=0.56.4', 'numexpr>=2.8.4'] +computation = ['scipy>=1.10.0', 'xarray>=2022.12.0'] +fss = ['fsspec>=2022.11.0'] +aws = ['s3fs>=2022.11.0'] +gcp = ['gcsfs>=2022.11.0', 'pandas-gbq>=0.19.0'] +excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.0', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.4'] +parquet = ['pyarrow>=10.0.1'] +feather = ['pyarrow>=10.0.1'] hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) #'blosc>=1.20.1', - 'tables>=3.7.0'] -spss = ['pyreadstat>=1.1.5'] -postgresql = ['SQLAlchemy>=1.4.36', 'psycopg2>=2.9.3'] -mysql = ['SQLAlchemy>=1.4.36', 'pymysql>=1.0.2'] -sql-other = ['SQLAlchemy>=1.4.36'] -html = ['beautifulsoup4>=4.11.1', 'html5lib>=1.1', 'lxml>=4.8.0'] -xml = ['lxml>=4.8.0'] -plot = ['matplotlib>=3.6.1'] -output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.8.10'] -clipboard = ['PyQt5>=5.15.6', 'qtpy>=2.2.0'] -compression = ['zstandard>=0.17.0'] + 'tables>=3.8.0'] +spss = ['pyreadstat>=1.2.0'] +postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.5'] +mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.0.2'] +sql-other = ['SQLAlchemy>=2.0.0'] +html = ['beautifulsoup4>=4.11.2', 'html5lib>=1.1', 'lxml>=4.9.2'] +xml = ['lxml>=4.9.2'] +plot = ['matplotlib>=3.6.3'] +output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.9.0'] +clipboard = ['PyQt5>=5.15.8', 'qtpy>=2.3.0'] +compression = ['zstandard>=0.19.0'] consortium-standard = ['dataframe-api-compat>=0.1.7'] -all = ['beautifulsoup4>=4.11.1', +all = ['beautifulsoup4>=4.11.2', # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) - #'blosc>=1.21.0', - 'bottleneck>=1.3.4', + #'blosc>=1.21.2', + 'bottleneck>=1.3.6', 'dataframe-api-compat>=0.1.7', - 'fastparquet>=0.8.1', - 'fsspec>=2022.05.0', - 'gcsfs>=2022.05.0', + 'fastparquet>=2022.12.0', + 'fsspec>=2022.11.0', + 'gcsfs>=2022.11.0', 'html5lib>=1.1', 'hypothesis>=6.46.1', 'jinja2>=3.1.2', - 'lxml>=4.8.0', - 'matplotlib>=3.6.1', - 'numba>=0.55.2', - 'numexpr>=2.8.0', + 'lxml>=4.9.2', + 'matplotlib>=3.6.3', + 'numba>=0.56.4', + 'numexpr>=2.8.4', 'odfpy>=1.4.1', - 'openpyxl>=3.0.10', - 'pandas-gbq>=0.17.5', - 'psycopg2>=2.9.3', - 'pyarrow>=7.0.0', + 'openpyxl>=3.1.0', + 'pandas-gbq>=0.19.0', + 'psycopg2>=2.9.5', + 'pyarrow>=10.0.1', 'pymysql>=1.0.2', - 'PyQt5>=5.15.6', - 'pyreadstat>=1.1.5', + 'PyQt5>=5.15.8', + 'pyreadstat>=1.2.0', 'pytest>=7.3.2', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0', 'python-calamine>=0.1.6', - 'pyxlsb>=1.0.9', - 'qtpy>=2.2.0', - 'scipy>=1.8.1', - 's3fs>=2022.05.0', - 'SQLAlchemy>=1.4.36', - 'tables>=3.7.0', - 'tabulate>=0.8.10', - 'xarray>=2022.03.0', + 'pyxlsb>=1.0.10', + 'qtpy>=2.3.0', + 'scipy>=1.10.0', + 's3fs>=2022.11.0', + 'SQLAlchemy>=2.0.0', + 'tables>=3.8.0', + 'tabulate>=0.9.0', + 'xarray>=2022.12.0', 'xlrd>=2.0.1', - 'xlsxwriter>=3.0.3', - 'zstandard>=0.17.0'] + 'xlsxwriter>=3.0.4', + 'zstandard>=0.19.0'] # TODO: Remove after setuptools support is dropped. [tool.setuptools] diff --git a/requirements-dev.txt b/requirements-dev.txt index 6e1a6058dce0e..da7b8176399d1 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -14,38 +14,38 @@ coverage python-dateutil numpy<2 pytz -beautifulsoup4>=4.11.1 +beautifulsoup4>=4.11.2 blosc -bottleneck>=1.3.4 -fastparquet>=0.8.1 -fsspec>=2022.05.0 +bottleneck>=1.3.6 +fastparquet>=2022.12.0 +fsspec>=2022.11.0 html5lib>=1.1 hypothesis>=6.46.1 -gcsfs>=2022.05.0 +gcsfs>=2022.11.0 ipython jinja2>=3.1.2 -lxml>=4.8.0 -matplotlib>=3.6.1, <3.8 -numba>=0.55.2 -numexpr>=2.8.0 -openpyxl>=3.0.10 +lxml>=4.9.2 +matplotlib>=3.6.3, <3.8 +numba>=0.56.4 +numexpr>=2.8.4 +openpyxl>=3.1.0 odfpy>=1.4.1 py -psycopg2-binary>=2.9.3 -pyarrow>=7.0.0 +psycopg2-binary>=2.9.5 +pyarrow>=10.0.1 pymysql>=1.0.2 -pyreadstat>=1.1.5 -tables>=3.7.0 +pyreadstat>=1.2.0 +tables>=3.8.0 python-calamine>=0.1.6 -pyxlsb>=1.0.9 -s3fs>=2022.05.0 -scipy>=1.8.1 -SQLAlchemy>=1.4.36 -tabulate>=0.8.10 -xarray>=2022.03.0 +pyxlsb>=1.0.10 +s3fs>=2022.11.0 +scipy>=1.10.0 +SQLAlchemy>=2.0.0 +tabulate>=0.9.0 +xarray>=2022.12.0 xlrd>=2.0.1 -xlsxwriter>=3.0.3 -zstandard>=0.17.0 +xlsxwriter>=3.0.4 +zstandard>=0.19.0 dask seaborn moto @@ -85,4 +85,4 @@ pygments dataframe-api-compat>=0.1.7 sphinx-toggleprompt typing_extensions; python_version<"3.11" -tzdata>=2022.1 +tzdata>=2022.7 diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 2ca4455158db5..49fecca253ba4 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -24,7 +24,7 @@ import yaml EXCLUDE = {"python", "c-compiler", "cxx-compiler"} -REMAP_VERSION = {"tzdata": "2022.1"} +REMAP_VERSION = {"tzdata": "2022.7"} RENAME = { "pytables": "tables", "psycopg2": "psycopg2-binary", From 650d33290212ead149847c1565d8bf782c8ebfa0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 13:08:58 -0700 Subject: [PATCH 02/19] Clean up some version checks --- pandas/compat/numpy/__init__.py | 1 + pandas/core/computation/check.py | 6 +----- pandas/core/interchange/buffer.py | 15 +++++++-------- pandas/tests/interchange/test_impl.py | 4 ++-- pandas/util/_test_decorators.py | 13 ------------- 5 files changed, 11 insertions(+), 28 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 51c9892b64a08..a1ac1024fc3e0 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -8,6 +8,7 @@ # numpy versioning _np_version = np.__version__ _nlv = Version(_np_version) +np_version_lt1p23 = _nlv < Version("1.23") np_version_gte1p24 = _nlv >= Version("1.24") np_version_gte1p24p3 = _nlv >= Version("1.24.3") np_version_gte1p25 = _nlv >= Version("1.25") diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py index 3221b158241f5..caccf34f81111 100644 --- a/pandas/core/computation/check.py +++ b/pandas/core/computation/check.py @@ -4,9 +4,5 @@ ne = import_optional_dependency("numexpr", errors="warn") NUMEXPR_INSTALLED = ne is not None -if NUMEXPR_INSTALLED: - NUMEXPR_VERSION = ne.__version__ -else: - NUMEXPR_VERSION = None -__all__ = ["NUMEXPR_INSTALLED", "NUMEXPR_VERSION"] +__all__ = ["NUMEXPR_INSTALLED"] diff --git a/pandas/core/interchange/buffer.py b/pandas/core/interchange/buffer.py index b31a2526a063d..a54e4428bd836 100644 --- a/pandas/core/interchange/buffer.py +++ b/pandas/core/interchange/buffer.py @@ -1,16 +1,17 @@ from __future__ import annotations -from typing import Any - -import numpy as np +from typing import ( + TYPE_CHECKING, + Any, +) from pandas.core.interchange.dataframe_protocol import ( Buffer, DlpackDeviceType, ) -from pandas.util.version import Version -_NUMPY_HAS_DLPACK = Version(np.__version__) >= Version("1.22.0") +if TYPE_CHECKING: + import numpy as np class PandasBuffer(Buffer): @@ -55,9 +56,7 @@ def __dlpack__(self) -> Any: """ Represent this structure as DLPack interface. """ - if _NUMPY_HAS_DLPACK: - return self._x.__dlpack__() - raise NotImplementedError("__dlpack__") + return self._x.__dlpack__() def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: """ diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 8a25a2c1889f3..ee3a78864ece0 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -4,7 +4,7 @@ import pytest from pandas._libs.tslibs import iNaT -import pandas.util._test_decorators as td +from pandas.compat.numpy import np_version_lt1p23 import pandas as pd import pandas._testing as tm @@ -263,7 +263,7 @@ def test_datetime(): tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) -@td.skip_if_np_lt("1.23") +@pytest.mark.skipif(np_version_lt1p23, reason="Numpy > 1.23 required") def test_categorical_to_numpy_dlpack(): # https://github.com/pandas-dev/pandas/issues/48393 df = pd.DataFrame({"A": pd.Categorical(["a", "b", "a"])}) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 9be0c3edaa998..780cdee25d707 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -31,7 +31,6 @@ def test_foo(): Callable, ) -import numpy as np import pytest from pandas._config import get_option @@ -178,18 +177,6 @@ def skip_if_no(package: str, min_version: str | None = None) -> pytest.MarkDecor ) -def skip_if_np_lt( - ver_str: str, *args, reason: str | None = None -) -> pytest.MarkDecorator: - if reason is None: - reason = f"NumPy {ver_str} or greater required" - return pytest.mark.skipif( - Version(np.__version__) < Version(ver_str), - *args, - reason=reason, - ) - - def parametrize_fixture_doc(*args) -> Callable[[F], F]: """ Intended for use as a decorator for parametrized fixture, From bb452c4367958d56714d8822948318d5a4398bfe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 14:11:44 -0700 Subject: [PATCH 03/19] Address old pyarrow checks --- pandas/_testing/__init__.py | 4 +- pandas/compat/__init__.py | 8 +- pandas/compat/pyarrow.py | 10 +- pandas/core/arrays/_arrow_string_mixins.py | 4 +- pandas/core/arrays/arrow/accessors.py | 4 +- pandas/core/arrays/arrow/array.py | 20 +-- pandas/core/arrays/string_.py | 4 +- pandas/core/arrays/string_arrow.py | 6 +- pandas/core/dtypes/dtypes.py | 6 +- pandas/io/orc.py | 19 --- .../tests/arrays/period/test_arrow_compat.py | 6 +- .../tests/arrays/string_/test_string_arrow.py | 29 ++-- pandas/tests/copy_view/test_astype.py | 11 +- pandas/tests/extension/test_arrow.py | 145 ++---------------- pandas/tests/frame/methods/test_astype.py | 3 +- pandas/tests/groupby/test_groupby_dropna.py | 4 +- .../tests/indexes/multi/test_constructors.py | 5 +- pandas/tests/io/test_parquet.py | 34 +--- 18 files changed, 67 insertions(+), 255 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 73835252c0329..160e2964896ba 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -24,7 +24,7 @@ set_locale, ) -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 from pandas.core.dtypes.common import ( is_float_dtype, @@ -210,7 +210,7 @@ ] ] -if not pa_version_under7p0: +if not pa_version_under10p1: import pyarrow as pa UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 684e9dccdc0f9..ea8cfb7cc144b 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -25,9 +25,7 @@ import pandas.compat.compressors from pandas.compat.numpy import is_numpy_dev from pandas.compat.pyarrow import ( - pa_version_under7p0, - pa_version_under8p0, - pa_version_under9p0, + pa_version_under10p1, pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, @@ -182,9 +180,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: __all__ = [ "is_numpy_dev", - "pa_version_under7p0", - "pa_version_under8p0", - "pa_version_under9p0", + "pa_version_under10p1", "pa_version_under11p0", "pa_version_under13p0", "pa_version_under14p0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 12f58be109d98..d125904ba83f8 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -8,19 +8,13 @@ import pyarrow as pa _palv = Version(Version(pa.__version__).base_version) - pa_version_under7p0 = _palv < Version("7.0.0") - pa_version_under8p0 = _palv < Version("8.0.0") - pa_version_under9p0 = _palv < Version("9.0.0") - pa_version_under10p0 = _palv < Version("10.0.0") + pa_version_under10p1 = _palv < Version("10.0.1") pa_version_under11p0 = _palv < Version("11.0.0") pa_version_under12p0 = _palv < Version("12.0.0") pa_version_under13p0 = _palv < Version("13.0.0") pa_version_under14p0 = _palv < Version("14.0.0") except ImportError: - pa_version_under7p0 = True - pa_version_under8p0 = True - pa_version_under9p0 = True - pa_version_under10p0 = True + pa_version_under10p1 = True pa_version_under11p0 = True pa_version_under12p0 = True pa_version_under13p0 = True diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index 63db03340683b..cc41985843574 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -4,9 +4,9 @@ import numpy as np -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 -if not pa_version_under7p0: +if not pa_version_under10p1: import pyarrow as pa import pyarrow.compute as pc diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index e4ed255476e8e..cbd727d842d83 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -4,9 +4,9 @@ from typing import TYPE_CHECKING -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 -if not pa_version_under7p0: +if not pa_version_under10p1: import pyarrow as pa import pyarrow.compute as pc diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 00b0966c26f02..15b928b10e4d9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -21,9 +21,7 @@ timezones, ) from pandas.compat import ( - pa_version_under7p0, - pa_version_under8p0, - pa_version_under9p0, + pa_version_under10p1, pa_version_under11p0, pa_version_under13p0, ) @@ -69,7 +67,7 @@ from pandas.io._util import _arrow_dtype_mapping from pandas.tseries.frequencies import to_offset -if not pa_version_under7p0: +if not pa_version_under10p1: import pyarrow as pa import pyarrow.compute as pc @@ -255,7 +253,7 @@ class ArrowExtensionArray( _dtype: ArrowDtype def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: - if pa_version_under7p0: + if pa_version_under10p1: msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) if isinstance(values, pa.Array): @@ -1756,7 +1754,7 @@ def _rank_calc( ascending: bool = True, pct: bool = False, ): - if pa_version_under9p0 or axis != 0: + if axis != 0: ranked = super()._rank( axis=axis, method=method, @@ -1993,16 +1991,6 @@ def _replace_with_mask( if isinstance(replacements, pa.ChunkedArray): # replacements must be array or scalar, not ChunkedArray replacements = replacements.combine_chunks() - if pa_version_under8p0: - # pc.replace_with_mask seems to be a bit unreliable for versions < 8.0: - # version <= 7: segfaults with various types - # version <= 6: fails to replace nulls - if isinstance(replacements, pa.Array): - indices = np.full(len(values), None) - indices[mask] = np.arange(len(replacements)) - indices = pa.array(indices, type=pa.int64()) - replacements = replacements.take(indices) - return cls._if_else(mask, replacements, values) if isinstance(values, pa.ChunkedArray) and pa.types.is_boolean(values.type): # GH#52059 replace_with_mask segfaults for chunked array # https://github.com/apache/arrow/issues/34634 diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 61a8f9006e64f..4e2fc5a563f85 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -16,7 +16,7 @@ ) from pandas._libs.arrays import NDArrayBacked from pandas._libs.lib import ensure_string_array -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 from pandas.compat.numpy import function as nv from pandas.util._decorators import doc @@ -126,7 +126,7 @@ def __init__(self, storage=None) -> None: f"Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'. " f"Got {storage} instead." ) - if storage in ("pyarrow", "pyarrow_numpy") and pa_version_under7p0: + if storage in ("pyarrow", "pyarrow_numpy") and pa_version_under10p1: raise ImportError( "pyarrow>=10.0.1 is required for PyArrow backed StringArray." ) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3f4c1d01ef734..e0da4bbc171bf 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -16,7 +16,7 @@ missing as libmissing, ) from pandas.compat import ( - pa_version_under7p0, + pa_version_under10p1, pa_version_under13p0, ) from pandas.util._exceptions import find_stack_level @@ -42,7 +42,7 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -if not pa_version_under7p0: +if not pa_version_under10p1: import pyarrow as pa import pyarrow.compute as pc @@ -66,7 +66,7 @@ def _chk_pyarrow_available() -> None: - if pa_version_under7p0: + if pa_version_under10p1: msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 60a0e47f3c4c2..2429b419a2a8a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -43,7 +43,7 @@ abbrev_to_npy_unit, ) from pandas._libs.tslibs.offsets import BDay -from pandas.compat import pa_version_under7p0 +from pandas.compat import pa_version_under10p1 from pandas.errors import PerformanceWarning from pandas.util._exceptions import find_stack_level @@ -63,7 +63,7 @@ from pandas.util import capitalize_first_letter -if not pa_version_under7p0: +if not pa_version_under10p1: import pyarrow as pa if TYPE_CHECKING: @@ -2092,7 +2092,7 @@ class ArrowDtype(StorageExtensionDtype): def __init__(self, pyarrow_dtype: pa.DataType) -> None: super().__init__("pyarrow") - if pa_version_under7p0: + if pa_version_under10p1: raise ImportError("pyarrow>=10.0.1 is required for ArrowDtype") if not isinstance(pyarrow_dtype, pa.DataType): raise ValueError( diff --git a/pandas/io/orc.py b/pandas/io/orc.py index d3aee057d6275..fed9463c38d5d 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -12,17 +12,9 @@ from pandas._config import using_pyarrow_string_dtype from pandas._libs import lib -from pandas.compat import pa_version_under8p0 from pandas.compat._optional import import_optional_dependency from pandas.util._validators import check_dtype_backend -from pandas.core.dtypes.common import is_unsigned_integer_dtype -from pandas.core.dtypes.dtypes import ( - CategoricalDtype, - IntervalDtype, - PeriodDtype, -) - import pandas as pd from pandas.core.indexes.api import default_index @@ -224,17 +216,6 @@ def to_orc( if df.index.name is not None: raise ValueError("orc does not serialize index meta-data on a default index") - # If unsupported dtypes are found raise NotImplementedError - # In Pyarrow 8.0.0 this check will no longer be needed - if pa_version_under8p0: - for dtype in df.dtypes: - if isinstance( - dtype, (IntervalDtype, CategoricalDtype, PeriodDtype) - ) or is_unsigned_integer_dtype(dtype): - raise NotImplementedError( - "The dtype of one or more columns is not supported yet." - ) - if engine != "pyarrow": raise ValueError("engine must be 'pyarrow'") engine = import_optional_dependency(engine, min_version="10.0.1") diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index a1e1e8efe6dee..0f16cfe03e85a 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -1,6 +1,6 @@ import pytest -from pandas.compat.pyarrow import pa_version_under10p0 +from pandas.compat.pyarrow import pa_version_under10p1 from pandas.core.dtypes.dtypes import PeriodDtype @@ -11,7 +11,7 @@ period_array, ) -pa = pytest.importorskip("pyarrow", minversion="1.0.1") +pa = pytest.importorskip("pyarrow") def test_arrow_extension_type(): @@ -28,7 +28,7 @@ def test_arrow_extension_type(): assert hash(p1) != hash(p3) -@pytest.mark.xfail(not pa_version_under10p0, reason="Wrong behavior with pyarrow 10") +@pytest.mark.xfail(not pa_version_under10p1, reason="Wrong behavior with pyarrow 10") @pytest.mark.parametrize( "data, freq", [ diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 0ed629e339973..b3bc2c7166130 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 +import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -17,14 +17,9 @@ ArrowStringArrayNumpySemantics, ) -skip_if_no_pyarrow = pytest.mark.skipif( - pa_version_under7p0, - reason="pyarrow>=10.0.1 is required for PyArrow backed StringArray", -) - -@skip_if_no_pyarrow def test_eq_all_na(): + pytest.importorskip("pyarrow") a = pd.array([pd.NA, pd.NA], dtype=StringDtype("pyarrow")) result = a == a expected = pd.array([pd.NA, pd.NA], dtype="boolean[pyarrow]") @@ -49,11 +44,10 @@ def test_config_bad_storage_raises(): pd.options.mode.string_storage = "foo" -@skip_if_no_pyarrow @pytest.mark.parametrize("chunked", [True, False]) @pytest.mark.parametrize("array", ["numpy", "pyarrow"]) def test_constructor_not_string_type_raises(array, chunked, arrow_string_storage): - import pyarrow as pa + pa = pytest.importorskip("pyarrow") array = pa if array in arrow_string_storage else np @@ -107,8 +101,8 @@ def test_constructor_from_list(): assert result.dtype.storage == "pyarrow" -@skip_if_no_pyarrow def test_from_sequence_wrong_dtype_raises(): + pytest.importorskip("pyarrow") with pd.option_context("string_storage", "python"): ArrowStringArray._from_sequence(["a", None, "c"], dtype="string") @@ -156,10 +150,7 @@ def test_from_sequence_wrong_dtype_raises(): StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow")) -@pytest.mark.skipif( - not pa_version_under7p0, - reason="pyarrow is installed", -) +@td.skip_if_installed("pyarrow") def test_pyarrow_not_installed_raises(): msg = re.escape("pyarrow>=10.0.1 is required for PyArrow backed") @@ -176,7 +167,6 @@ def test_pyarrow_not_installed_raises(): ArrowStringArray._from_sequence(["a", None, "b"]) -@skip_if_no_pyarrow @pytest.mark.parametrize("multiple_chunks", [False, True]) @pytest.mark.parametrize( "key, value, expected", @@ -199,7 +189,7 @@ def test_pyarrow_not_installed_raises(): ], ) def test_setitem(multiple_chunks, key, value, expected): - import pyarrow as pa + pa = pytest.importorskip("pyarrow") result = pa.array(list("abcde")) expected = pa.array(expected) @@ -215,9 +205,8 @@ def test_setitem(multiple_chunks, key, value, expected): tm.assert_equal(result, expected) -@skip_if_no_pyarrow def test_setitem_invalid_indexer_raises(): - import pyarrow as pa + pa = pytest.importorskip("pyarrow") arr = ArrowStringArray(pa.array(list("abcde"))) @@ -240,10 +229,10 @@ def test_setitem_invalid_indexer_raises(): arr[[0, 1]] = ["foo", "bar", "baz"] -@skip_if_no_pyarrow @pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"]) def test_pickle_roundtrip(dtype): # GH 42600 + pytest.importorskip("pyarrow") expected = pd.Series(range(10), dtype=dtype) expected_sliced = expected.head(2) full_pickled = pickle.dumps(expected) @@ -258,9 +247,9 @@ def test_pickle_roundtrip(dtype): tm.assert_series_equal(result_sliced, expected_sliced) -@skip_if_no_pyarrow def test_string_dtype_error_message(): # GH#55051 + pytest.importorskip("pyarrow") msg = "Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'." with pytest.raises(ValueError, match=msg): StringDtype("bla") diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 4b751ad452ec4..3d5556bdd2823 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -1,7 +1,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 from pandas.compat.pyarrow import pa_version_under12p0 import pandas.util._test_decorators as td @@ -43,8 +42,8 @@ def test_astype_single_dtype(using_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) @pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"]) def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): - if new_dtype == "int64[pyarrow]" and pa_version_under7p0: - pytest.skip("pyarrow not installed") + if new_dtype == "int64[pyarrow]": + pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) df_orig = df.copy() df2 = df.astype(new_dtype) @@ -68,8 +67,8 @@ def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype): @pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"]) def test_astype_different_target_dtype(using_copy_on_write, dtype): - if dtype == "int32[pyarrow]" and pa_version_under7p0: - pytest.skip("pyarrow not installed") + if dtype == "int32[pyarrow]": + pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() df2 = df.astype(dtype) @@ -187,8 +186,8 @@ def test_astype_different_timezones_different_reso(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) -@pytest.mark.skipif(pa_version_under7p0, reason="pyarrow not installed") def test_astype_arrow_timestamp(using_copy_on_write): + pytest.importorskip("pyarrow") df = DataFrame( { "a": [ diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 00cf331c93f29..ece6475b3fe1c 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -36,9 +36,6 @@ PY311, is_ci_environment, is_platform_windows, - pa_version_under7p0, - pa_version_under8p0, - pa_version_under9p0, pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, @@ -389,9 +386,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques data, all_numeric_accumulations, skipna ) - if pa_version_under9p0 or ( - pa_version_under13p0 and all_numeric_accumulations != "cumsum" - ): + if pa_version_under13p0 and all_numeric_accumulations != "cumsum": # xfailing takes a long time to run because pytest # renders the exception messages even when not showing them opt = request.config.option @@ -497,18 +492,6 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque dtype._is_numeric or dtype.kind == "b" ): request.node.add_marker(xfail_mark) - elif ( - all_numeric_reductions in {"var", "std", "median"} - and pa_version_under7p0 - and pa.types.is_decimal(pa_dtype) - ): - request.node.add_marker(xfail_mark) - elif ( - all_numeric_reductions == "sem" - and pa_version_under8p0 - and (dtype._is_numeric or pa.types.is_temporal(pa_dtype)) - ): - request.node.add_marker(xfail_mark) elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { "sem", @@ -753,45 +736,6 @@ def test_value_counts_returns_pyarrow_int64(self, data): result = data.value_counts() assert result.dtype == ArrowDtype(pa.int64()) - def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, request): - pa_dtype = data_for_sorting.dtype.pyarrow_dtype - if pa.types.is_decimal(pa_dtype) and pa_version_under7p0: - request.node.add_marker( - pytest.mark.xfail( - reason=f"No pyarrow kernel for {pa_dtype}", - raises=pa.ArrowNotImplementedError, - ) - ) - super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting) - - @pytest.mark.parametrize( - "op_name, skipna, expected", - [ - ("idxmax", True, 0), - ("idxmin", True, 2), - ("argmax", True, 0), - ("argmin", True, 2), - ("idxmax", False, np.nan), - ("idxmin", False, np.nan), - ("argmax", False, -1), - ("argmin", False, -1), - ], - ) - def test_argreduce_series( - self, data_missing_for_sorting, op_name, skipna, expected, request - ): - pa_dtype = data_missing_for_sorting.dtype.pyarrow_dtype - if pa.types.is_decimal(pa_dtype) and pa_version_under7p0 and skipna: - request.node.add_marker( - pytest.mark.xfail( - reason=f"No pyarrow kernel for {pa_dtype}", - raises=pa.ArrowNotImplementedError, - ) - ) - super().test_argreduce_series( - data_missing_for_sorting, op_name, skipna, expected - ) - _combine_le_expected_dtype = "bool[pyarrow]" divmod_exc = NotImplementedError @@ -913,7 +857,7 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): return expected def _is_temporal_supported(self, opname, pa_dtype): - return not pa_version_under8p0 and ( + return ( ( opname in ("__add__", "__radd__") or ( @@ -968,14 +912,10 @@ def _get_arith_xfail_marker(self, opname, pa_dtype): arrow_temporal_supported = self._is_temporal_supported(opname, pa_dtype) - if ( - opname == "__rpow__" - and ( - pa.types.is_floating(pa_dtype) - or pa.types.is_integer(pa_dtype) - or pa.types.is_decimal(pa_dtype) - ) - and not pa_version_under7p0 + if opname == "__rpow__" and ( + pa.types.is_floating(pa_dtype) + or pa.types.is_integer(pa_dtype) + or pa.types.is_decimal(pa_dtype) ): mark = pytest.mark.xfail( reason=( @@ -998,33 +938,18 @@ def _get_arith_xfail_marker(self, opname, pa_dtype): f"pd.NA and {pa_dtype} Python scalar" ), ) - elif ( - opname == "__rfloordiv__" - and (pa.types.is_integer(pa_dtype) or pa.types.is_decimal(pa_dtype)) - and not pa_version_under7p0 + elif opname == "__rfloordiv__" and ( + pa.types.is_integer(pa_dtype) or pa.types.is_decimal(pa_dtype) ): mark = pytest.mark.xfail( raises=pa.ArrowInvalid, reason="divide by 0", ) - elif ( - opname == "__rtruediv__" - and pa.types.is_decimal(pa_dtype) - and not pa_version_under7p0 - ): + elif opname == "__rtruediv__" and pa.types.is_decimal(pa_dtype): mark = pytest.mark.xfail( raises=pa.ArrowInvalid, reason="divide by 0", ) - elif ( - opname == "__pow__" - and pa.types.is_decimal(pa_dtype) - and pa_version_under7p0 - ): - mark = pytest.mark.xfail( - raises=pa.ArrowInvalid, - reason="Invalid decimal function: power_checked", - ) return mark @@ -1057,15 +982,10 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): def test_arith_series_with_array(self, data, all_arithmetic_operators, request): pa_dtype = data.dtype.pyarrow_dtype - if ( - all_arithmetic_operators - in ( - "__sub__", - "__rsub__", - ) - and pa.types.is_unsigned_integer(pa_dtype) - and not pa_version_under7p0 - ): + if all_arithmetic_operators in ( + "__sub__", + "__rsub__", + ) and pa.types.is_unsigned_integer(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowInvalid, @@ -1351,10 +1271,7 @@ def test_quantile(data, interpolation, quantile, request): ): # For string, bytes, and bool, we don't *expect* to have quantile work # Note this matches the non-pyarrow behavior - if pa_version_under7p0: - msg = r"Function quantile has no kernel matching input types \(.*\)" - else: - msg = r"Function 'quantile' has no kernel matching input types \(.*\)" + msg = r"Function 'quantile' has no kernel matching input types \(.*\)" with pytest.raises(pa.ArrowNotImplementedError, match=msg): ser.quantile(q=quantile, interpolation=interpolation) return @@ -1362,7 +1279,7 @@ def test_quantile(data, interpolation, quantile, request): if ( pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype) - or (pa.types.is_decimal(pa_dtype) and not pa_version_under7p0) + or pa.types.is_decimal(pa_dtype) ): pass elif pa.types.is_temporal(data._pa_array.type): @@ -1656,7 +1573,6 @@ def test_setitem_invalid_dtype(data): data[:] = fill_value -@pytest.mark.skipif(pa_version_under8p0, reason="returns object with 7.0") def test_from_arrow_respecting_given_dtype(): date_array = pa.array( [pd.Timestamp("2019-12-31"), pd.Timestamp("2019-12-31")], type=pa.date32() @@ -1671,7 +1587,6 @@ def test_from_arrow_respecting_given_dtype(): tm.assert_series_equal(result, expected) -@pytest.mark.skipif(pa_version_under8p0, reason="doesn't raise with 7") def test_from_arrow_respecting_given_dtype_unsafe(): array = pa.array([1.5, 2.5], type=pa.float64()) with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"): @@ -1827,11 +1742,6 @@ def test_str_repeat_unsupported(): ser.str.repeat([1, 2]) -@pytest.mark.xfail( - pa_version_under7p0, - reason="Unsupported for pyarrow < 7", - raises=NotImplementedError, -) def test_str_repeat(): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) result = ser.str.repeat(2) @@ -2270,15 +2180,7 @@ def test_unsupported_dt(data): ["dayofyear", 2], ["hour", 3], ["minute", 4], - pytest.param( - "is_leap_year", - False, - marks=pytest.mark.xfail( - pa_version_under8p0, - raises=NotImplementedError, - reason="is_leap_year not implemented for pyarrow < 8.0", - ), - ), + ["is_leap_year", False], ["microsecond", 5], ["month", 1], ["nanosecond", 6], @@ -2503,9 +2405,6 @@ def test_dt_roundlike_unsupported_freq(method): getattr(ser.dt, method)(None) -@pytest.mark.xfail( - pa_version_under7p0, reason="Methods not supported for pyarrow < 7.0" -) @pytest.mark.parametrize("freq", ["D", "h", "min", "s", "ms", "us", "ns"]) @pytest.mark.parametrize("method", ["ceil", "floor", "round"]) def test_dt_ceil_year_floor(freq, method): @@ -2805,11 +2704,6 @@ def test_date32_repr(): assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]" -@pytest.mark.xfail( - pa_version_under8p0, - reason="Function 'add_checked' has no kernel matching input types", - raises=pa.ArrowNotImplementedError, -) def test_duration_overflow_from_ndarray_containing_nat(): # GH52843 data_ts = pd.to_datetime([1, None]) @@ -2876,13 +2770,6 @@ def test_setitem_temporal(pa_type): ) def test_arithmetic_temporal(pa_type, request): # GH 53171 - if pa_version_under8p0 and pa.types.is_duration(pa_type): - mark = pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason="Function 'subtract_checked' has no kernel matching input types", - ) - request.node.add_marker(mark) - arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) unit = pa_type.unit result = arr - pd.Timedelta(1, unit=unit).as_unit(unit) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 6590f10c6b967..a6caafb48ca4d 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -3,7 +3,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 import pandas.util._test_decorators as td import pandas as pd @@ -868,10 +867,10 @@ def test_frame_astype_no_copy(): assert np.shares_memory(df.b.values, result.b.values) -@pytest.mark.skipif(pa_version_under7p0, reason="pyarrow is required for this test") @pytest.mark.parametrize("dtype", ["int64", "Int64"]) def test_astype_copies(dtype): # GH#50984 + pytest.importorskip("pyarrow") df = DataFrame({"a": [1, 2, 3]}, dtype=dtype) result = df.astype("int64[pyarrow]", copy=True) df.iloc[0, 0] = 100 diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 8065aa63dff81..31efe55e1a904 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under7p0 +from pandas.compat.pyarrow import pa_version_under10p1 from pandas.core.dtypes.missing import na_value_for_dtype @@ -418,7 +418,7 @@ def test_groupby_drop_nan_with_multi_index(): pytest.param( "string[pyarrow]", marks=pytest.mark.skipif( - pa_version_under7p0, reason="pyarrow is not installed" + pa_version_under10p1, reason="pyarrow is not installed" ), ), "datetime64[ns]", diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index c51dcb395c795..32659bc2f7bff 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -7,8 +7,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 - from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike import pandas as pd @@ -648,10 +646,9 @@ def test_from_frame(): tm.assert_index_equal(expected, result) -@pytest.mark.skipif(pa_version_under7p0, reason="minimum pyarrow not installed") def test_from_frame_missing_values_multiIndex(): # GH 39984 - import pyarrow as pa + pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b043f9fab23ae..f84d642576efe 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -13,8 +13,6 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under7p0, - pa_version_under8p0, pa_version_under11p0, pa_version_under13p0, ) @@ -230,17 +228,11 @@ def check_partition_names(path, expected): expected: iterable of str Expected partition names. """ - if pa_version_under7p0: - import pyarrow.parquet as pq - - dataset = pq.ParquetDataset(path, validate_schema=False) - assert len(dataset.partitions.partition_names) == len(expected) - assert dataset.partitions.partition_names == set(expected) - else: - import pyarrow.dataset as ds + import pyarrow.parquet as pq - dataset = ds.dataset(path, partitioning="hive") - assert dataset.partitioning.schema.names == expected + dataset = pq.ParquetDataset(path, validate_schema=False) + assert len(dataset.partitions.partition_names) == len(expected) + assert dataset.partitions.partition_names == set(expected) def test_invalid_engine(df_compat): @@ -616,9 +608,8 @@ def test_write_column_index_nonstring(self, engine): else: check_round_trip(df, engine) - @pytest.mark.skipif(pa_version_under7p0, reason="minimum pyarrow not installed") def test_dtype_backend(self, engine, request): - import pyarrow.parquet as pq + pq = pytest.importorskip("pyarrow.parquet") if engine == "fastparquet": # We are manually disabling fastparquet's @@ -751,10 +742,7 @@ def test_duplicate_columns(self, pa): def test_timedelta(self, pa): df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)}) - if pa_version_under8p0: - self.check_external_error_on_write(df, pa, NotImplementedError) - else: - check_round_trip(df, pa) + check_round_trip(df, pa) def test_unsupported(self, pa): # mixed python objects @@ -976,18 +964,12 @@ def test_timestamp_nanoseconds(self, pa): # with version 2.6, pyarrow defaults to writing the nanoseconds, so # this should work without error # Note in previous pyarrows(<7.0.0), only the pseudo-version 2.0 was available - if not pa_version_under7p0: - ver = "2.6" - else: - ver = "2.0" + ver = "2.6" df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1ns", periods=10)}) check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): - if ( - not pa_version_under7p0 - and timezone_aware_date_list.tzinfo != datetime.timezone.utc - ): + if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.node.add_marker( pytest.mark.xfail( reason="temporary skip this test until it is properly resolved: " From ceeaa768a3ca7428289310cc95202b0e93ba2815 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 14:19:18 -0700 Subject: [PATCH 04/19] Try removing pins --- ci/deps/actions-311.yaml | 2 +- ci/deps/circle-310-arm64.yaml | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index e1e5d7bcafff6..81455fb82c7ff 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -45,7 +45,7 @@ dependencies: - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 - # - pytables>=3.7.0, 3.8.0 is first version that supports 3.11 + - pytables>=3.8.0 - python-calamine>=0.1.6 - pyxlsb>=1.0.10 - s3fs>=2022.11.0 diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml index 3899db633f0cf..769cdd435a02d 100644 --- a/ci/deps/circle-310-arm64.yaml +++ b/ci/deps/circle-310-arm64.yaml @@ -35,8 +35,7 @@ dependencies: - jinja2>=3.1.2 - lxml>=4.9.2 - matplotlib>=3.6.3, <3.8 - # test_numba_vs_cython segfaults with numba 0.57 - - numba>=0.56.4, <0.57.0 + - numba>=0.56.4 - numexpr>=2.8.4 - odfpy>=1.4.1 - qtpy>=2.3.0 @@ -45,7 +44,7 @@ dependencies: - psycopg2>=2.9.5 - pyarrow>=10.0.1 - pymysql>=1.0.2 - # - pyreadstat>=1.2.0 not available on ARM + - pyreadstat>=1.2.0 - pytables>=3.8.0 - python-calamine>=0.1.6 - pyxlsb>=1.0.10 From a611cadb0ba2244ac0a80b474bf3c0f5ee809952 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 14:37:32 -0700 Subject: [PATCH 05/19] Is it pandas gbq? --- ci/deps/actions-310.yaml | 2 +- ci/deps/actions-311.yaml | 2 +- ci/deps/actions-39-minimum_versions.yaml | 2 +- ci/deps/actions-39.yaml | 2 +- ci/deps/circle-310-arm64.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index cf4d05e5ff0a7..8a4b440d667c4 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -40,7 +40,7 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 - - pandas-gbq>=0.19.0 + # - pandas-gbq>=0.19.0 - psycopg2>=2.9.5 - pyarrow>=10.0.1 - pymysql>=1.0.2 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 81455fb82c7ff..19103d2c270fa 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -40,7 +40,7 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 - - pandas-gbq>=0.19.0 + # - pandas-gbq>=0.19.0 - psycopg2>=2.9.5 - pyarrow>=10.0.1 - pymysql>=1.0.2 diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index b8155620499af..f83306c6c592a 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -42,7 +42,7 @@ dependencies: - odfpy=1.4.1 - qtpy=2.3.0 - openpyxl=3.1.0 - - pandas-gbq=0.19.0 + # - pandas-gbq=0.19.0 - psycopg2=2.9.5 - pyarrow=10.0.1 - pymysql=1.0.2 diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index cda7f6171f0e1..c1585a0f25ccc 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -40,7 +40,7 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 - - pandas-gbq>=0.19.0 + # - pandas-gbq>=0.19.0 - psycopg2>=2.9.5 - pyarrow>=10.0.1 - pymysql>=1.0.2 diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml index 769cdd435a02d..30e7409da4b5f 100644 --- a/ci/deps/circle-310-arm64.yaml +++ b/ci/deps/circle-310-arm64.yaml @@ -40,7 +40,7 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 - - pandas-gbq>=0.19.0 + # - pandas-gbq>=0.19.0 - psycopg2>=2.9.5 - pyarrow>=10.0.1 - pymysql>=1.0.2 From ad77b18136a37b2583a20df1626ace7283364554 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 19:12:23 -0700 Subject: [PATCH 06/19] Fix non existent versions, add note about gbq --- ci/deps/actions-310.yaml | 8 +++++--- ci/deps/actions-311-downstream_compat.yaml | 10 ++++++---- ci/deps/actions-311.yaml | 8 +++++--- ci/deps/actions-39-minimum_versions.yaml | 8 +++++--- ci/deps/actions-39.yaml | 8 +++++--- ci/deps/circle-310-arm64.yaml | 8 +++++--- doc/source/getting_started/install.rst | 6 +++--- environment.yml | 4 ++-- pandas/compat/_optional.py | 6 +++--- pyproject.toml | 10 +++++----- requirements-dev.txt | 4 ++-- 11 files changed, 46 insertions(+), 34 deletions(-) diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 8a4b440d667c4..dca2fb814114e 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -25,7 +25,7 @@ dependencies: # optional dependencies - beautifulsoup4>=4.11.2 - - blosc>=1.21.2 + - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2022.12.0 - fsspec>=2022.11.0 @@ -40,8 +40,10 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 + # Doesn't install well with pyarrow + # https://github.com/pandas-dev/pandas/issues/55525 # - pandas-gbq>=0.19.0 - - psycopg2>=2.9.5 + - psycopg2>=2.9.6 - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 @@ -54,7 +56,7 @@ dependencies: - tabulate>=0.9.0 - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.4 + - xlsxwriter>=3.0.5 - zstandard>=0.19.0 - pip: diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index 44cfa2a1e6c43..533fb69862a69 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -26,7 +26,7 @@ dependencies: # optional dependencies - beautifulsoup4>=4.11.2 - - blosc>=1.21.2 + - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2022.12.0 - fsspec>=2022.11.0 @@ -41,8 +41,10 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 - - pandas-gbq>=0.19.0 - - psycopg2>=2.9.5 + # Doesn't install well with pyarrow + # https://github.com/pandas-dev/pandas/issues/55525 + # - pandas-gbq>=0.19.0 + - psycopg2>=2.9.6 - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 @@ -55,7 +57,7 @@ dependencies: - tabulate>=0.9.0 - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.4 + - xlsxwriter>=3.0.5 - zstandard>=0.19.0 # downstream packages diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 19103d2c270fa..45703be23f576 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -25,7 +25,7 @@ dependencies: # optional dependencies - beautifulsoup4>=4.11.2 - - blosc>=1.21.2 + - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2022.12.0 - fsspec>=2022.11.0 @@ -40,8 +40,10 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 + # Doesn't install well with pyarrow + # https://github.com/pandas-dev/pandas/issues/55525 # - pandas-gbq>=0.19.0 - - psycopg2>=2.9.5 + - psycopg2>=2.9.6 - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 @@ -54,7 +56,7 @@ dependencies: - tabulate>=0.9.0 - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.4 + - xlsxwriter>=3.0.5 - zstandard>=0.19.0 - pip: diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index f83306c6c592a..caf78d4d92edd 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -27,7 +27,7 @@ dependencies: # optional dependencies - beautifulsoup4=4.11.2 - - blosc=1.21.2 + - blosc=1.21.3 - bottleneck=1.3.6 - fastparquet=2022.12.0 - fsspec=2022.11.0 @@ -42,8 +42,10 @@ dependencies: - odfpy=1.4.1 - qtpy=2.3.0 - openpyxl=3.1.0 + # Doesn't install well with pyarrow + # https://github.com/pandas-dev/pandas/issues/55525 # - pandas-gbq=0.19.0 - - psycopg2=2.9.5 + - psycopg2=2.9.6 - pyarrow=10.0.1 - pymysql=1.0.2 - pyreadstat=1.2.0 @@ -56,7 +58,7 @@ dependencies: - tabulate=0.9.0 - xarray=2022.12.0 - xlrd=2.0.1 - - xlsxwriter=3.0.4 + - xlsxwriter=3.0.5 - zstandard=0.19.0 - pip: diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index c1585a0f25ccc..c91fdd31b443b 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -25,7 +25,7 @@ dependencies: # optional dependencies - beautifulsoup4>=4.11.2 - - blosc>=1.21.2 + - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2022.12.0 - fsspec>=2022.11.0 @@ -40,8 +40,10 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 + # Doesn't install well with pyarrow + # https://github.com/pandas-dev/pandas/issues/55525 # - pandas-gbq>=0.19.0 - - psycopg2>=2.9.5 + - psycopg2>=2.9.6 - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 @@ -54,7 +56,7 @@ dependencies: - tabulate>=0.9.0 - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.4 + - xlsxwriter>=3.0.5 - zstandard>=0.19.0 - pip: diff --git a/ci/deps/circle-310-arm64.yaml b/ci/deps/circle-310-arm64.yaml index 30e7409da4b5f..2076f5bc9aca9 100644 --- a/ci/deps/circle-310-arm64.yaml +++ b/ci/deps/circle-310-arm64.yaml @@ -25,7 +25,7 @@ dependencies: # optional dependencies - beautifulsoup4>=4.11.2 - - blosc>=1.21.2 + - blosc>=1.21.3 - bottleneck>=1.3.6 - fastparquet>=2022.12.0 - fsspec>=2022.11.0 @@ -40,8 +40,10 @@ dependencies: - odfpy>=1.4.1 - qtpy>=2.3.0 - openpyxl>=3.1.0 + # Doesn't install well with pyarrow + # https://github.com/pandas-dev/pandas/issues/55525 # - pandas-gbq>=0.19.0 - - psycopg2>=2.9.5 + - psycopg2>=2.9.6 - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 @@ -54,5 +56,5 @@ dependencies: - tabulate>=0.9.0 - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.4 + - xlsxwriter>=3.0.5 - zstandard>=0.19.0 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 251ca6c317d4a..1d3442aa605f6 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -278,7 +278,7 @@ Installable with ``pip install "pandas[excel]"``. Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= xlrd 2.0.1 excel Reading Excel -xlsxwriter 3.0.4 excel Writing Excel +xlsxwriter 3.0.5 excel Writing Excel openpyxl 3.1.0 excel Reading / writing for xlsx files pyxlsb 1.0.10 excel Reading for xlsb files python-calamine 0.1.6 excel Reading for xls/xlsx/xlsb/ods files @@ -343,7 +343,7 @@ Dependency Minimum Version pip extra Notes SQLAlchemy 2.0.0 postgresql, SQL support for databases other than sqlite mysql, sql-other -psycopg2 2.9.5 postgresql PostgreSQL engine for sqlalchemy +psycopg2 2.9.6 postgresql PostgreSQL engine for sqlalchemy pymysql 1.0.2 mysql MySQL engine for sqlalchemy ========================= ================== =============== ============================================================= @@ -356,7 +356,7 @@ Installable with ``pip install "pandas[hdf5, parquet, feather, spss, excel]"`` Dependency Minimum Version pip extra Notes ========================= ================== ================ ============================================================= PyTables 3.8.0 hdf5 HDF5-based reading / writing -blosc 1.21.2 hdf5 Compression for HDF5; only available on ``conda`` +blosc 1.21.3 hdf5 Compression for HDF5; only available on ``conda`` zlib hdf5 Compression for HDF5 fastparquet 2022.12.0 - Parquet reading / writing (pyarrow is default) pyarrow 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing diff --git a/environment.yml b/environment.yml index 1b8756aec89e4..1f8aeca0d197e 100644 --- a/environment.yml +++ b/environment.yml @@ -42,7 +42,7 @@ dependencies: - openpyxl>=3.1.0 - odfpy>=1.4.1 - py - - psycopg2>=2.9.5 + - psycopg2>=2.9.6 - pyarrow>=10.0.1 - pymysql>=1.0.2 - pyreadstat>=1.2.0 @@ -55,7 +55,7 @@ dependencies: - tabulate>=0.9.0 - xarray>=2022.12.0 - xlrd>=2.0.1 - - xlsxwriter>=3.0.4 + - xlsxwriter>=3.0.5 - zstandard>=0.19.0 # downstream packages diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index ca2e362149248..c45c2ff6eb6df 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -16,7 +16,7 @@ VERSIONS = { "bs4": "4.11.2", - "blosc": "1.21.2", + "blosc": "1.21.3", "bottleneck": "1.3.6", "dataframe-api-compat": "0.1.7", "fastparquet": "2022.12.0", @@ -32,7 +32,7 @@ "odfpy": "1.4.1", "openpyxl": "3.1.0", "pandas_gbq": "0.19.0", - "psycopg2": "2.9.5", # (dt dec pq3 ext lo64) + "psycopg2": "2.9.6", # (dt dec pq3 ext lo64) "pymysql": "1.0.2", "pyarrow": "10.0.1", "pyreadstat": "1.2.0", @@ -46,7 +46,7 @@ "tabulate": "0.9.0", "xarray": "2022.12.0", "xlrd": "2.0.1", - "xlsxwriter": "3.0.4", + "xlsxwriter": "3.0.5", "zstandard": "0.19.0", "tzdata": "2022.7", "qtpy": "2.3.0", diff --git a/pyproject.toml b/pyproject.toml index 0ae55c0f0dbec..1825ef3fe65c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,14 +67,14 @@ computation = ['scipy>=1.10.0', 'xarray>=2022.12.0'] fss = ['fsspec>=2022.11.0'] aws = ['s3fs>=2022.11.0'] gcp = ['gcsfs>=2022.11.0', 'pandas-gbq>=0.19.0'] -excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.0', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.4'] +excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.0', 'python-calamine>=0.1.6', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.5'] parquet = ['pyarrow>=10.0.1'] feather = ['pyarrow>=10.0.1'] hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) #'blosc>=1.20.1', 'tables>=3.8.0'] spss = ['pyreadstat>=1.2.0'] -postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.5'] +postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6'] mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.0.2'] sql-other = ['SQLAlchemy>=2.0.0'] html = ['beautifulsoup4>=4.11.2', 'html5lib>=1.1', 'lxml>=4.9.2'] @@ -86,7 +86,7 @@ compression = ['zstandard>=0.19.0'] consortium-standard = ['dataframe-api-compat>=0.1.7'] all = ['beautifulsoup4>=4.11.2', # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) - #'blosc>=1.21.2', + #'blosc>=1.21.3', 'bottleneck>=1.3.6', 'dataframe-api-compat>=0.1.7', 'fastparquet>=2022.12.0', @@ -102,7 +102,7 @@ all = ['beautifulsoup4>=4.11.2', 'odfpy>=1.4.1', 'openpyxl>=3.1.0', 'pandas-gbq>=0.19.0', - 'psycopg2>=2.9.5', + 'psycopg2>=2.9.6', 'pyarrow>=10.0.1', 'pymysql>=1.0.2', 'PyQt5>=5.15.8', @@ -120,7 +120,7 @@ all = ['beautifulsoup4>=4.11.2', 'tabulate>=0.9.0', 'xarray>=2022.12.0', 'xlrd>=2.0.1', - 'xlsxwriter>=3.0.4', + 'xlsxwriter>=3.0.5', 'zstandard>=0.19.0'] # TODO: Remove after setuptools support is dropped. diff --git a/requirements-dev.txt b/requirements-dev.txt index da7b8176399d1..a7c2237d1664d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,7 +31,7 @@ numexpr>=2.8.4 openpyxl>=3.1.0 odfpy>=1.4.1 py -psycopg2-binary>=2.9.5 +psycopg2-binary>=2.9.6 pyarrow>=10.0.1 pymysql>=1.0.2 pyreadstat>=1.2.0 @@ -44,7 +44,7 @@ SQLAlchemy>=2.0.0 tabulate>=0.9.0 xarray>=2022.12.0 xlrd>=2.0.1 -xlsxwriter>=3.0.4 +xlsxwriter>=3.0.5 zstandard>=0.19.0 dask seaborn From 867c6bd754ac960521662d555d7146d146ac9d1b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 14 Oct 2023 19:15:37 -0700 Subject: [PATCH 07/19] Fix pyarrow test --- pandas/tests/io/test_parquet.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index f84d642576efe..944cc56899e42 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -228,11 +228,10 @@ def check_partition_names(path, expected): expected: iterable of str Expected partition names. """ - import pyarrow.parquet as pq + import pyarrow.dataset as ds - dataset = pq.ParquetDataset(path, validate_schema=False) - assert len(dataset.partitions.partition_names) == len(expected) - assert dataset.partitions.partition_names == set(expected) + dataset = ds.dataset(path, partitioning="hive") + assert dataset.partitioning.schema.names == expected def test_invalid_engine(df_compat): From a11c312891c70aa3d2359d1203e73ca0a976b5b8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 11:39:01 -0700 Subject: [PATCH 08/19] Address test_parse_tz_aware --- pandas/tests/io/parser/test_parse_dates.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 9f7840588f89e..2ea70634fdcc0 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -979,12 +979,15 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): ) -def test_parse_tz_aware(all_parsers, request): +def test_parse_tz_aware(all_parsers): # See gh-1693 parser = all_parsers data = "Date,x\n2012-06-13T01:39:00Z,0.5" result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result.index = result.index.as_unit("ns") expected = DataFrame( {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date") ) From 2a4a266e88330d562b52c8d121c8ff9feef69f52 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 11:42:24 -0700 Subject: [PATCH 09/19] FIx test_parse_dates_arrow_engine --- pandas/tests/io/parser/test_parse_dates.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 2ea70634fdcc0..c6afff56de16b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2234,6 +2234,9 @@ def test_parse_dates_arrow_engine(all_parsers): 2000-01-01 00:00:01,1""" result = parser.read_csv(StringIO(data), parse_dates=["a"]) + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result["a"] = result["a"].dt.as_unit("ns") expected = DataFrame( { "a": [ From efe9d8dc63ceee0e96cefc19626f73ba21a60663 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 11:50:32 -0700 Subject: [PATCH 10/19] Address more tests --- pandas/tests/interchange/test_impl.py | 16 +++++++++++++++- .../tests/io/parser/common/test_common_basic.py | 8 ++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index ee3a78864ece0..5dbc0156816aa 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -4,6 +4,10 @@ import pytest from pandas._libs.tslibs import iNaT +from pandas.compat import ( + is_ci_environment, + is_platform_windows, +) from pandas.compat.numpy import np_version_lt1p23 import pandas as pd @@ -309,11 +313,21 @@ def test_datetimetzdtype(tz, unit): tm.assert_frame_equal(df, from_dataframe(df.__dataframe__())) -def test_interchange_from_non_pandas_tz_aware(): +def test_interchange_from_non_pandas_tz_aware(request): # GH 54239, 54287 pa = pytest.importorskip("pyarrow", "11.0.0") import pyarrow.compute as pc + if is_platform_windows() and is_ci_environment(): + mark = pytest.mark.xfail( + raises=pa.ArrowInvalid, + reason=( + "TODO: Set ARROW_TIMEZONE_DATABASE environment variable " + "on CI to path to the tzdata for pyarrow." + ), + ) + request.applymarker(mark) + arr = pa.array([datetime(2020, 1, 1), None, datetime(2020, 1, 2)]) arr = pc.assume_timezone(arr, "Asia/Kathmandu") table = pa.table({"arr": arr}) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 00a26a755756f..442aa5ef87b10 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -86,7 +86,9 @@ def test_read_csv_local(all_parsers, csv1): fname = prefix + str(os.path.abspath(csv1)) result = parser.read_csv(fname, index_col=0, parse_dates=True) - + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result.index = result.index.as_unit("ns") expected = DataFrame( [ [0.980269, 3.685731, -0.364216805298, -1.159738], @@ -177,7 +179,9 @@ def test_read_csv_low_memory_no_rows_with_index(all_parsers): def test_read_csv_dataframe(all_parsers, csv1): parser = all_parsers result = parser.read_csv(csv1, index_col=0, parse_dates=True) - + # TODO: make unit check more specific + if parser.engine == "pyarrow": + result.index = result.index.as_unit("ns") expected = DataFrame( [ [0.980269, 3.685731, -0.364216805298, -1.159738], From f46eb524913efad1a2d8e4b7bb0729b65decad82 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 12:22:51 -0700 Subject: [PATCH 11/19] Skip comments or in sync script --- scripts/validate_min_versions_in_sync.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index cb03276d2dd93..92f39189d83ed 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -226,6 +226,9 @@ def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, elif "- pip:" in line: continue elif seen_required and line.strip(): + if "#" in line: + # ignore comment in yaml + continue if "==" in line: package, version = line.strip().split("==", maxsplit=1) else: From ccc244a9a33fcab7612bfe5df17ee4d69e8c1643 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 12:29:00 -0700 Subject: [PATCH 12/19] check = instead --- scripts/validate_min_versions_in_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 92f39189d83ed..7334d41654a50 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -226,7 +226,7 @@ def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, elif "- pip:" in line: continue elif seen_required and line.strip(): - if "#" in line: + if "=" not in line: # ignore comment in yaml continue if "==" in line: From abc675fac0c63d0e9510fd87525a379008d8c140 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 19:19:38 -0700 Subject: [PATCH 13/19] adjust script --- ci/deps/actions-39-minimum_versions.yaml | 4 +--- scripts/validate_min_versions_in_sync.py | 9 ++++----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index caf78d4d92edd..4297d2b56cf17 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -42,9 +42,7 @@ dependencies: - odfpy=1.4.1 - qtpy=2.3.0 - openpyxl=3.1.0 - # Doesn't install well with pyarrow - # https://github.com/pandas-dev/pandas/issues/55525 - # - pandas-gbq=0.19.0 + #- pandas-gbq=0.19.0 - psycopg2=2.9.6 - pyarrow=10.0.1 - pymysql=1.0.2 diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 7334d41654a50..d5c491f6ebc12 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -36,7 +36,7 @@ SETUP_PATH = pathlib.Path("pyproject.toml").resolve() YAML_PATH = pathlib.Path("ci/deps") ENV_PATH = pathlib.Path("environment.yml") -EXCLUDE_DEPS = {"tzdata", "blosc"} +EXCLUDE_DEPS = {"tzdata", "blosc", "pandas-gbq"} EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"]) # pandas package is not available # in pre-commit environment @@ -197,8 +197,10 @@ def pin_min_versions_to_yaml_file( def get_versions_from_code() -> dict[str, str]: """Min versions for checking within pandas code.""" install_map = _optional.INSTALL_MAPPING + inverse_install_map = {v: k for k, v in install_map.items()} versions = _optional.VERSIONS for item in EXCLUDE_DEPS: + item = inverse_install_map.get(item, item) versions.pop(item, None) return {install_map.get(k, k).casefold(): v for k, v in versions.items()} @@ -226,14 +228,11 @@ def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, elif "- pip:" in line: continue elif seen_required and line.strip(): - if "=" not in line: - # ignore comment in yaml - continue if "==" in line: package, version = line.strip().split("==", maxsplit=1) else: package, version = line.strip().split("=", maxsplit=1) - package = package[2:] + package = package.split()[-1] if package in EXCLUDE_DEPS: continue if not seen_optional: From 26d45b7f14d9fd0655e505b816fac67d6f529be4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 19:22:42 -0700 Subject: [PATCH 14/19] Fix table --- doc/source/getting_started/install.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 1d3442aa605f6..5e84270aba533 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -254,7 +254,7 @@ Dependency Minimum Version pip extra Notes ========================= ================== ================== ============================================================= matplotlib 3.6.3 plot Plotting library Jinja2 3.1.2 output-formatting Conditional formatting with DataFrame.style -tabulate 0.9.0 output-formatting Printing in Markdown-friendly format (see `tabulate`_) +tabulate 0.9.0 output-formatting Printing in Markdown-friendly format (see `tabulate`_) ========================= ================== ================== ============================================================= Computation @@ -279,8 +279,8 @@ Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= xlrd 2.0.1 excel Reading Excel xlsxwriter 3.0.5 excel Writing Excel -openpyxl 3.1.0 excel Reading / writing for xlsx files -pyxlsb 1.0.10 excel Reading for xlsb files +openpyxl 3.1.0 excel Reading / writing for xlsx files +pyxlsb 1.0.10 excel Reading for xlsb files python-calamine 0.1.6 excel Reading for xls/xlsx/xlsb/ods files ========================= ================== =============== ============================================================= @@ -340,7 +340,7 @@ Installable with ``pip install "pandas[postgresql, mysql, sql-other]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -SQLAlchemy 2.0.0 postgresql, SQL support for databases other than sqlite +SQLAlchemy 2.0.0 postgresql, SQL support for databases other than sqlite mysql, sql-other psycopg2 2.9.6 postgresql PostgreSQL engine for sqlalchemy @@ -358,8 +358,8 @@ Dependency Minimum Version pip extra Notes PyTables 3.8.0 hdf5 HDF5-based reading / writing blosc 1.21.3 hdf5 Compression for HDF5; only available on ``conda`` zlib hdf5 Compression for HDF5 -fastparquet 2022.12.0 - Parquet reading / writing (pyarrow is default) -pyarrow 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing +fastparquet 2022.12.0 - Parquet reading / writing (pyarrow is default) +pyarrow 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing pyreadstat 1.2.0 spss SPSS files (.sav) reading odfpy 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing ========================= ================== ================ ============================================================= From 257f8e738a71c8527a36be69df9142f1d8fa8700 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Oct 2023 21:15:57 -0700 Subject: [PATCH 15/19] xfail test --- pandas/tests/io/pytables/test_file_handling.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index 1517bdc96f709..0307c3a2fd3ef 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -236,8 +236,12 @@ def test_complibs_default_settings_override(tmp_path, setup_path): # with xfail, would sometimes raise UnicodeDecodeError # invalid state byte ) -def test_complibs(tmp_path, lvl, lib): +def test_complibs(tmp_path, lvl, lib, request): # GH14478 + if not PY311 and is_platform_linux() and lib == "blosc2": + request.applymarker( + pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11") + ) df = DataFrame( np.ones((30, 4)), columns=list("ABCD"), index=np.arange(30).astype(np.str_) ) From 2fcde874cced7e87c15214100171894ce59750f6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 16 Oct 2023 11:11:50 -0700 Subject: [PATCH 16/19] Fix table aligning --- doc/source/getting_started/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 5e84270aba533..2af66da6b8baf 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -265,7 +265,7 @@ Installable with ``pip install "pandas[computation]"``. ========================= ================== =============== ============================================================= Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= -SciPy 1.10.0 computation Miscellaneous statistical functions +SciPy 1.10.0 computation Miscellaneous statistical functions xarray 2022.12.0 computation pandas-like API for N-dimensional data ========================= ================== =============== ============================================================= From fc095c45e088139ff20f76ddab15e5309d3fdb1b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 16 Oct 2023 22:17:25 -0700 Subject: [PATCH 17/19] Update pandas/tests/io/pytables/test_file_handling.py --- pandas/tests/io/pytables/test_file_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index 0307c3a2fd3ef..d7ac718747cc6 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -238,7 +238,7 @@ def test_complibs_default_settings_override(tmp_path, setup_path): ) def test_complibs(tmp_path, lvl, lib, request): # GH14478 - if not PY311 and is_platform_linux() and lib == "blosc2": + if PY311 and is_platform_linux() and lib == "blosc2": request.applymarker( pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11") ) From 450d6a3aa7c428a51f322ab5780cd9c66a7d2976 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 17 Oct 2023 08:50:20 -0700 Subject: [PATCH 18/19] Update pandas/tests/io/pytables/test_file_handling.py --- pandas/tests/io/pytables/test_file_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index d7ac718747cc6..cb44512d4506c 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -238,7 +238,7 @@ def test_complibs_default_settings_override(tmp_path, setup_path): ) def test_complibs(tmp_path, lvl, lib, request): # GH14478 - if PY311 and is_platform_linux() and lib == "blosc2": + if PY311 and is_platform_linux() and lib == "blosc2" and lvl != 0: request.applymarker( pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11") ) From fa9e93b03f33dbe515eacf2f2cd5909c004811aa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 22 Oct 2023 13:57:38 -0700 Subject: [PATCH 19/19] Use importorskip --- pandas/tests/groupby/test_groupby.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0a5a2ed99caae..9485a84a9d5a8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -5,7 +5,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 from pandas.errors import ( PerformanceWarning, SpecificationError, @@ -2542,14 +2541,13 @@ def test_groupby_column_index_name_lost(func): "infer_string", [ False, - pytest.param( - True, - marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"), - ), + True, ], ) def test_groupby_duplicate_columns(infer_string): # GH: 31735 + if infer_string: + pytest.importorskip("pyarrow") df = DataFrame( {"A": ["f", "e", "g", "h"], "B": ["a", "b", "c", "d"], "C": [1, 2, 3, 4]} ).astype(object)