diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 59512ddc91a8a..5072ff4723be1 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -30,7 +30,7 @@ jobs: env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml, actions-313.yaml] # Prevent the include jobs from overriding other jobs pattern: [""] - pandas_future_infer_string: ["0"] + pandas_future_infer_string: ["1"] include: - name: "Downstream Compat" env_file: actions-311-downstream_compat.yaml @@ -45,6 +45,10 @@ jobs: env_file: actions-313-freethreading.yaml pattern: "not slow and not network and not single_cpu" platform: ubuntu-24.04 + - name: "Without PyArrow" + env_file: actions-312.yaml + pattern: "not slow and not network and not single_cpu" + platform: ubuntu-24.04 - name: "Locale: it_IT" env_file: actions-311.yaml pattern: "not slow and not network and not single_cpu" @@ -67,13 +71,9 @@ jobs: # It will be temporarily activated during tests with locale.setlocale extra_loc: "zh_CN" platform: ubuntu-24.04 - - name: "Future infer strings" + - name: "Past no infer strings" env_file: actions-312.yaml - pandas_future_infer_string: "1" - platform: ubuntu-24.04 - - name: "Future infer strings (without pyarrow)" - env_file: actions-311.yaml - pandas_future_infer_string: "1" + pandas_future_infer_string: "0" platform: ubuntu-24.04 - name: "Pypy" env_file: actions-pypy-39.yaml @@ -88,7 +88,6 @@ jobs: - name: "Pyarrow Nightly" env_file: actions-311-pyarrownightly.yaml pattern: "not slow and not network and not single_cpu" - pandas_future_infer_string: "1" platform: ubuntu-24.04 fail-fast: false name: ${{ matrix.name || format('{0} {1}', matrix.platform, matrix.env_file) }} @@ -103,7 +102,7 @@ jobs: PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} # Clipboard tests QT_QPA_PLATFORM: offscreen - REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }} + REMOVE_PYARROW: ${{ matrix.name == 'Without PyArrow' && '1' || '0' }} concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}-${{ matrix.platform }} diff --git a/ci/deps/actions-310-minimum_versions.yaml b/ci/deps/actions-310-minimum_versions.yaml index 9f12fe941d488..a9ea6a639043b 100644 --- a/ci/deps/actions-310-minimum_versions.yaml +++ b/ci/deps/actions-310-minimum_versions.yaml @@ -41,7 +41,7 @@ dependencies: - qtpy=2.3.0 - openpyxl=3.1.2 - psycopg2=2.9.6 - - pyarrow=10.0.1 + - pyarrow=12.0.1 - pyiceberg=0.7.1 - pymysql=1.1.0 - pyqt=5.15.9 diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 66d49475bf34b..4904140f2e70b 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -39,7 +39,7 @@ dependencies: - qtpy>=2.3.0 - openpyxl>=3.1.2 - psycopg2>=2.9.6 - - pyarrow>=10.0.1 + - pyarrow>=12.0.1 - pyiceberg>=0.7.1 - pymysql>=1.1.0 - pyqt>=5.15.9 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index 100a250f0bf01..1c6bece3374b5 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -40,7 +40,7 @@ dependencies: - qtpy>=2.3.0 - openpyxl>=3.1.2 - psycopg2>=2.9.6 - - pyarrow>=10.0.1 + - pyarrow>=12.0.1 - pyiceberg>=0.7.1 - pymysql>=1.1.0 - pyqt>=5.15.9 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 9669c1e29a435..deb646a7ba86a 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -40,7 +40,7 @@ dependencies: - pyqt>=5.15.9 - openpyxl>=3.1.2 - psycopg2>=2.9.6 - - pyarrow>=10.0.1 + - pyarrow>=12.0.1 - pyiceberg>=0.7.1 - pymysql>=1.1.0 - pyreadstat>=1.2.6 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index 61f1d602bb241..97b582b80fb8f 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -40,7 +40,7 @@ dependencies: - pyqt>=5.15.9 - openpyxl>=3.1.2 - psycopg2>=2.9.6 - - pyarrow>=10.0.1 + - pyarrow>=12.0.1 - pyiceberg>=0.7.1 - pymysql>=1.1.0 - pyreadstat>=1.2.6 diff --git a/ci/deps/actions-313.yaml b/ci/deps/actions-313.yaml index 11f4428be27e5..4bc363dc4a27e 100644 --- a/ci/deps/actions-313.yaml +++ b/ci/deps/actions-313.yaml @@ -41,7 +41,7 @@ dependencies: - pyqt>=5.15.9 - openpyxl>=3.1.2 - psycopg2>=2.9.6 - - pyarrow>=10.0.1 + - pyarrow>=12.0.1 - pymysql>=1.1.0 - pyreadstat>=1.2.6 - pytables>=3.8.0 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 1589fea5f8953..ed0c8bd05098d 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -307,7 +307,7 @@ Dependency Minimum Version pip ex `PyTables `__ 3.8.0 hdf5 HDF5-based reading / writing `zlib `__ hdf5 Compression for HDF5 `fastparquet `__ 2024.2.0 - Parquet reading / writing (pyarrow is default) -`pyarrow `__ 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing +`pyarrow `__ 12.0.1 parquet, feather Parquet, ORC, and feather reading / writing `PyIceberg `__ 0.7.1 iceberg Apache Iceberg reading / writing `pyreadstat `__ 1.2.6 spss SPSS files (.sav) reading `odfpy `__ 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing diff --git a/environment.yml b/environment.yml index 74186bd2581c4..80a1b720ae2ad 100644 --- a/environment.yml +++ b/environment.yml @@ -43,7 +43,7 @@ dependencies: - openpyxl>=3.1.2 - odfpy>=1.4.1 - psycopg2>=2.9.6 - - pyarrow>=10.0.1 + - pyarrow>=12.0.1 - pyiceberg>=0.7.1 - pymysql>=1.1.0 - pyreadstat>=1.2.6 diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index f01dfab0de829..068219443799d 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -38,7 +38,7 @@ "openpyxl": "3.1.2", "psycopg2": "2.9.6", # (dt dec pq3 ext lo64) "pymysql": "1.1.0", - "pyarrow": "10.0.1", + "pyarrow": "12.0.1", "pyiceberg": "0.7.1", "pyreadstat": "1.2.6", "pytest": "7.3.2", diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 20fe8cbab1c9f..bf7e8fb02b58e 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -880,7 +880,7 @@ def register_converter_cb(key: str) -> None: with cf.config_prefix("future"): cf.register_option( "infer_string", - True if os.environ.get("PANDAS_FUTURE_INFER_STRING", "0") == "1" else False, + False if os.environ.get("PANDAS_FUTURE_INFER_STRING", "1") == "0" else True, "Whether to infer sequence of str objects as pyarrow string " "dtype, which will be the default in pandas 3.0 " "(at which point this option will be deprecated).", diff --git a/pyproject.toml b/pyproject.toml index b17a1eacfa717..7582e2bce3879 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,15 +59,15 @@ matplotlib = "pandas:plotting._matplotlib" [project.optional-dependencies] test = ['hypothesis>=6.84.0', 'pytest>=7.3.2', 'pytest-xdist>=3.4.0'] -pyarrow = ['pyarrow>=10.0.1'] +pyarrow = ['pyarrow>=12.0.1'] performance = ['bottleneck>=1.3.6', 'numba>=0.59.0', 'numexpr>=2.9.0'] computation = ['scipy>=1.12.0', 'xarray>=2024.1.1'] fss = ['fsspec>=2023.12.2'] aws = ['s3fs>=2023.12.2'] gcp = ['gcsfs>=2023.12.2'] excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.2', 'python-calamine>=0.1.7', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.2.0'] -parquet = ['pyarrow>=10.0.1'] -feather = ['pyarrow>=10.0.1'] +parquet = ['pyarrow>=12.0.1'] +feather = ['pyarrow>=12.0.1'] iceberg = ['pyiceberg>=0.7.1'] hdf5 = ['tables>=3.8.0'] spss = ['pyreadstat>=1.2.6'] @@ -98,7 +98,7 @@ all = ['adbc-driver-postgresql>=0.10.0', 'odfpy>=1.4.1', 'openpyxl>=3.1.2', 'psycopg2>=2.9.6', - 'pyarrow>=10.0.1', + 'pyarrow>=12.0.1', 'pyiceberg>=0.7.1', 'pymysql>=1.1.0', 'PyQt5>=5.15.9', diff --git a/requirements-dev.txt b/requirements-dev.txt index 6515797bc3b9d..b4e977e1b0b1b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -32,7 +32,7 @@ numexpr>=2.9.0 openpyxl>=3.1.2 odfpy>=1.4.1 psycopg2-binary>=2.9.6 -pyarrow>=10.0.1 +pyarrow>=12.0.1 pyiceberg>=0.7.1 pymysql>=1.1.0 pyreadstat>=1.2.6