diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml
index 51fbe8529..6cd2103f4 100755
--- a/.azure-pipelines/azure-pipelines-linux.yml
+++ b/.azure-pipelines/azure-pipelines-linux.yml
@@ -16,10 +16,18 @@ jobs:
CONFIG: linux_64_cuda_compiler_versionNone
UPLOAD_PACKAGES: 'True'
DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-x86_64
+ linux_aarch64_cuda_compiler_version11.2:
+ CONFIG: linux_aarch64_cuda_compiler_version11.2
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
linux_aarch64_cuda_compiler_versionNone:
CONFIG: linux_aarch64_cuda_compiler_versionNone
UPLOAD_PACKAGES: 'True'
DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-x86_64
+ linux_ppc64le_cuda_compiler_version11.2:
+ CONFIG: linux_ppc64le_cuda_compiler_version11.2
+ UPLOAD_PACKAGES: 'True'
+ DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
linux_ppc64le_cuda_compiler_versionNone:
CONFIG: linux_ppc64le_cuda_compiler_versionNone
UPLOAD_PACKAGES: 'True'
diff --git a/.ci_support/linux_64_cuda_compiler_version10.2.yaml b/.ci_support/linux_64_cuda_compiler_version10.2.yaml
index 5d80a17c4..62efe90e2 100644
--- a/.ci_support/linux_64_cuda_compiler_version10.2.yaml
+++ b/.ci_support/linux_64_cuda_compiler_version10.2.yaml
@@ -45,8 +45,8 @@ lz4_c:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/linux_64_cuda_compiler_versionNone.yaml b/.ci_support/linux_64_cuda_compiler_versionNone.yaml
index 39b25b446..91549a610 100644
--- a/.ci_support/linux_64_cuda_compiler_versionNone.yaml
+++ b/.ci_support/linux_64_cuda_compiler_versionNone.yaml
@@ -45,8 +45,8 @@ lz4_c:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/linux_aarch64_cuda_compiler_version11.2.yaml b/.ci_support/linux_aarch64_cuda_compiler_version11.2.yaml
new file mode 100644
index 000000000..dceed4989
--- /dev/null
+++ b/.ci_support/linux_aarch64_cuda_compiler_version11.2.yaml
@@ -0,0 +1,88 @@
+BUILD:
+- aarch64-conda_cos7-linux-gnu
+aws_crt_cpp:
+- 0.19.8
+aws_sdk_cpp:
+- 1.10.57
+bzip2:
+- '1'
+c_ares:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '10'
+cdt_arch:
+- aarch64
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.2'
+cuda_compiler_version_min:
+- '11.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '10'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.2
+gflags:
+- '2.2'
+glog:
+- '0.6'
+google_cloud_cpp:
+- 2.8.0
+libabseil:
+- '20230125'
+libgrpc:
+- '1.52'
+libprotobuf:
+- '3.21'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.21'
+- '1.23'
+- '1.21'
+- '1.21'
+openssl:
+- '3'
+orc:
+- 1.8.3
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.8.* *_cpython
+- 3.9.* *_cpython
+re2:
+- 2023.02.02
+snappy:
+- '1'
+target_platform:
+- linux-aarch64
+thrift_cpp:
+- 0.18.1
+ucx:
+- 1.14.0
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+ - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml
index af0fc2dcd..9bff007d8 100644
--- a/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml
+++ b/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml
@@ -20,8 +20,12 @@ channel_sources:
- conda-forge
channel_targets:
- conda-forge main
+cuda_compiler:
+- nvcc
cuda_compiler_version:
- None
+cuda_compiler_version_min:
+- '11.2'
cxx_compiler:
- gxx
cxx_compiler_version:
@@ -45,8 +49,8 @@ lz4_c:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/linux_ppc64le_cuda_compiler_version11.2.yaml b/.ci_support/linux_ppc64le_cuda_compiler_version11.2.yaml
new file mode 100644
index 000000000..d8ad133cb
--- /dev/null
+++ b/.ci_support/linux_ppc64le_cuda_compiler_version11.2.yaml
@@ -0,0 +1,84 @@
+aws_crt_cpp:
+- 0.19.8
+aws_sdk_cpp:
+- 1.10.57
+bzip2:
+- '1'
+c_ares:
+- '1'
+c_compiler:
+- gcc
+c_compiler_version:
+- '10'
+cdt_name:
+- cos7
+channel_sources:
+- conda-forge
+channel_targets:
+- conda-forge main
+cuda_compiler:
+- nvcc
+cuda_compiler_version:
+- '11.2'
+cuda_compiler_version_min:
+- '11.2'
+cxx_compiler:
+- gxx
+cxx_compiler_version:
+- '10'
+docker_image:
+- quay.io/condaforge/linux-anvil-cuda:11.2
+gflags:
+- '2.2'
+glog:
+- '0.6'
+google_cloud_cpp:
+- 2.8.0
+libabseil:
+- '20230125'
+libgrpc:
+- '1.52'
+libprotobuf:
+- '3.21'
+lz4_c:
+- 1.9.3
+numpy:
+- '1.21'
+- '1.23'
+- '1.21'
+- '1.21'
+openssl:
+- '3'
+orc:
+- 1.8.3
+pin_run_as_build:
+ python:
+ min_pin: x.x
+ max_pin: x.x
+python:
+- 3.10.* *_cpython
+- 3.11.* *_cpython
+- 3.8.* *_cpython
+- 3.9.* *_cpython
+re2:
+- 2023.02.02
+snappy:
+- '1'
+target_platform:
+- linux-ppc64le
+thrift_cpp:
+- 0.18.1
+ucx:
+- 1.14.0
+zip_keys:
+- - c_compiler_version
+ - cxx_compiler_version
+ - cuda_compiler_version
+ - cdt_name
+ - docker_image
+- - python
+ - numpy
+zlib:
+- '1.2'
+zstd:
+- '1.5'
diff --git a/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml b/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml
index 83a1f7f74..6126222b5 100644
--- a/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml
+++ b/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml
@@ -16,8 +16,12 @@ channel_sources:
- conda-forge
channel_targets:
- conda-forge main
+cuda_compiler:
+- nvcc
cuda_compiler_version:
- None
+cuda_compiler_version_min:
+- '11.2'
cxx_compiler:
- gxx
cxx_compiler_version:
@@ -41,8 +45,8 @@ lz4_c:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/migrations/libabseil20230125.yaml b/.ci_support/migrations/libabseil20230125.yaml
deleted file mode 100644
index 2dea689fc..000000000
--- a/.ci_support/migrations/libabseil20230125.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-__migrator:
- build_number: 1
- kind: version
- migration_number: 1
-libabseil:
-- '20230125'
-migrator_ts: 1676218104.8853533
diff --git a/.ci_support/migrations/libthrift0181.yaml b/.ci_support/migrations/libthrift0181.yaml
deleted file mode 100644
index fa3b1772e..000000000
--- a/.ci_support/migrations/libthrift0181.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-__migrator:
- build_number: 1
- kind: version
- migration_number: 1
-libthrift:
-- 0.18.1
-thrift_cpp:
-- 0.18.1
-migrator_ts: 1678838391.6043901
diff --git a/.ci_support/osx_64_.yaml b/.ci_support/osx_64_.yaml
index 0cf990cc1..b4a0efcd6 100644
--- a/.ci_support/osx_64_.yaml
+++ b/.ci_support/osx_64_.yaml
@@ -11,7 +11,7 @@ c_ares:
c_compiler:
- clang
c_compiler_version:
-- '14'
+- '15'
channel_sources:
- conda-forge
channel_targets:
@@ -21,7 +21,7 @@ cuda_compiler_version:
cxx_compiler:
- clangxx
cxx_compiler_version:
-- '14'
+- '15'
gflags:
- '2.2'
glog:
@@ -41,8 +41,8 @@ macos_machine:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/osx_arm64_.yaml b/.ci_support/osx_arm64_.yaml
index 3faa6278e..b6c0ae50d 100644
--- a/.ci_support/osx_arm64_.yaml
+++ b/.ci_support/osx_arm64_.yaml
@@ -11,7 +11,7 @@ c_ares:
c_compiler:
- clang
c_compiler_version:
-- '14'
+- '15'
channel_sources:
- conda-forge
channel_targets:
@@ -21,7 +21,7 @@ cuda_compiler_version:
cxx_compiler:
- clangxx
cxx_compiler_version:
-- '14'
+- '15'
gflags:
- '2.2'
glog:
@@ -41,8 +41,8 @@ macos_machine:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/win_64_cuda_compiler_version10.2.yaml b/.ci_support/win_64_cuda_compiler_version10.2.yaml
index 6ea00e3bd..408b53a07 100644
--- a/.ci_support/win_64_cuda_compiler_version10.2.yaml
+++ b/.ci_support/win_64_cuda_compiler_version10.2.yaml
@@ -29,7 +29,7 @@ libabseil:
libcrc32c:
- '1.1'
libcurl:
-- '7'
+- '8'
libgrpc:
- '1.52'
libprotobuf:
@@ -39,8 +39,8 @@ lz4_c:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/.ci_support/win_64_cuda_compiler_versionNone.yaml b/.ci_support/win_64_cuda_compiler_versionNone.yaml
index 183356662..f406d107b 100644
--- a/.ci_support/win_64_cuda_compiler_versionNone.yaml
+++ b/.ci_support/win_64_cuda_compiler_versionNone.yaml
@@ -29,7 +29,7 @@ libabseil:
libcrc32c:
- '1.1'
libcurl:
-- '7'
+- '8'
libgrpc:
- '1.52'
libprotobuf:
@@ -39,8 +39,8 @@ lz4_c:
numpy:
- '1.21'
- '1.23'
-- '1.20'
-- '1.20'
+- '1.21'
+- '1.21'
openssl:
- '3'
orc:
diff --git a/README.md b/README.md
index b79b78269..1b36c4bbd 100644
--- a/README.md
+++ b/README.md
@@ -75,6 +75,13 @@ Current build status
+
linux_ppc64le_cuda_compiler_versionNone |
diff --git a/recipe/build-arrow.sh b/recipe/build-arrow.sh
index ba1ffa7da..55b3c78d1 100644
--- a/recipe/build-arrow.sh
+++ b/recipe/build-arrow.sh
@@ -42,13 +42,19 @@ then
return 1
fi
fi
- EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} -DCMAKE_LIBRARY_PATH=${CUDA_HOME}/lib64/stubs"
+ EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} -DCMAKE_LIBRARY_PATH=${CONDA_BUILD_SYSROOT}/lib"
else
EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=OFF"
fi
-if [[ "${target_platform}" == "osx-arm64" ]]; then
- EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCLANG_EXECUTABLE=${BUILD_PREFIX}/bin/clang -DLLVM_LINK_EXECUTABLE=${BUILD_PREFIX}/bin/llvm-link"
+if [[ "${build_platform}" != "${target_platform}" ]]; then
+ # point to a usable protoc/grpc_cpp_plugin if we're cross-compiling
+ EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc"
+ if [[ ! -f ${BUILD_PREFIX}/bin/${CONDA_TOOLCHAIN_HOST}-clang ]]; then
+ ln -sf ${BUILD_PREFIX}/bin/clang ${BUILD_PREFIX}/bin/${CONDA_TOOLCHAIN_HOST}-clang
+ fi
+ EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCLANG_EXECUTABLE=${BUILD_PREFIX}/bin/${CONDA_TOOLCHAIN_HOST}-clang"
+ EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DLLVM_LINK_EXECUTABLE=${BUILD_PREFIX}/bin/llvm-link"
sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt
sed -ie 's;"--with-jemalloc-prefix\=je_arrow_";"--with-jemalloc-prefix\=je_arrow_" "--with-lg-page\=14";g' ../cmake_modules/ThirdpartyToolchain.cmake
fi
@@ -64,11 +70,6 @@ if [[ "${target_platform}" == "linux-aarch64" ]] || [[ "${target_platform}" == "
export CMAKE_BUILD_PARALLEL_LEVEL=3
fi
-# point to a usable protoc if we're running on a different architecture than the target
-if [[ "${build_platform}" != "${target_platform}" ]]; then
- EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc"
-fi
-
# reusable variable for dependencies we cannot yet unvendor
export READ_RECIPE_META_YAML_WHY_NOT=OFF
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
new file mode 100644
index 000000000..dd8dd01f1
--- /dev/null
+++ b/recipe/conda_build_config.yaml
@@ -0,0 +1,4 @@
+c_compiler_version: # [osx]
+ - 15 # [osx]
+cxx_compiler_version: # [osx]
+ - 15 # [osx]
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 09f77fb72..423527421 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -3,7 +3,7 @@
{% set build_ext_version = "4.0.0" %}
{% set build_ext = "cuda" if cuda_enabled else "cpu" %}
{% set proc_build_number = "0" %}
-{% set llvm_version = "14" %}
+{% set llvm_version = "15" %}
# see https://github.com/apache/arrow/blob/apache-arrow-10.0.1/cpp/CMakeLists.txt#L88-L90
{% set so_version = (version.split(".")[0] | int * 100 + version.split(".")[1] | int) ~ "." ~ version.split(".")[2] ~ ".0" %}
@@ -24,19 +24,20 @@ source:
- patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch
# backport apache/arrow#34019 to disable useless pkgconfig search that takes ~15min
- patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch
+ # backport apache/arrow#34498, 34878 & #34881 for pandas 2.0 compatibility
+ - patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch
+ - patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch
+ - patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch
# testing-submodule not part of release tarball
- git_url: https://github.com/apache/arrow-testing.git
git_rev: 00c483283433b4c02cb811f260dbe35414c806a4
folder: testing
build:
- number: 13
+ number: 14
# for cuda support, building with one version is enough to be compatible with
# all later versions, since arrow is only using libcuda, and not libcudart.
skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)]
- # temporary: skip CUDA on aarch/ppc until cross-compilation works, see
- # https://github.com/conda-forge/conda-forge-ci-setup-feedstock/pull/210
- skip: true # [(aarch64 or ppc64le) and (cuda_compiler_version != "None")]
run_exports:
- {{ pin_subpackage("libarrow", max_pin="x.x.x") }}
@@ -88,12 +89,13 @@ outputs:
- {{ compiler("c") }}
- {{ compiler("cxx") }}
- {{ compiler("cuda") }} # [cuda_compiler_version != "None"]
- - clangdev {{ llvm_version }} # [osx and arm64]
- - llvmdev {{ llvm_version }} # [osx and arm64]
- - gnuconfig # [osx and arm64]
# needs to run protoc & grpc_cpp_plugin
- libgrpc # [build_platform != target_platform]
- libprotobuf # [build_platform != target_platform]
+ # needed for gandiva
+ - clangdev {{ llvm_version }} # [build_platform != target_platform]
+ - llvmdev {{ llvm_version }} # [build_platform != target_platform]
+ - gnuconfig # [build_platform != target_platform]
- cmake
- ninja
# necessary for vendored jemalloc
diff --git a/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch b/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch
index b87b72104..246e10899 100644
--- a/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch
+++ b/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch
@@ -1,7 +1,7 @@
From 7f1350d76bf71a8123cf78abe18ddb5876a7ca80 Mon Sep 17 00:00:00 2001
From: "H. Vetinari"
Date: Thu, 26 Jan 2023 12:37:02 +1100
-Subject: [PATCH 1/2] don't bake non-relocatable CMAKE_INSTALL_FULL_LIBDIR into
+Subject: [PATCH 1/5] don't bake non-relocatable CMAKE_INSTALL_FULL_LIBDIR into
gdb-integration
---
diff --git a/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch b/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch
index 324fb2259..5a9c5a33b 100644
--- a/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch
+++ b/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch
@@ -1,7 +1,7 @@
From 179a3caa661b7a93f6a136ea44f6f53671611533 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei
Date: Sat, 4 Feb 2023 22:08:54 +0900
-Subject: [PATCH 2/2] GH-33882: [C++] Don't find .pc files with
+Subject: [PATCH 2/5] GH-33882: [C++] Don't find .pc files with
ARROW_BUILD_STATIC=OFF (#34019)
Because they are needless and `pkg-config grpc++` is slow.
diff --git a/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch b/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch
new file mode 100644
index 000000000..21211d851
--- /dev/null
+++ b/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch
@@ -0,0 +1,130 @@
+From da0b59a5d55032fb94618d776578b156a244ab2b Mon Sep 17 00:00:00 2001
+From: Alenka Frim
+Date: Fri, 10 Mar 2023 17:36:41 +0100
+Subject: [PATCH 3/5] GH-34404: [Python] Failing tests because pandas.Index can
+ now store all numeric dtypes (not only 64bit versions) (#34498)
+
+### Rationale for this change
+Several failing tests in the nightly build (https://github.com/ursacomputing/crossbow/actions/runs/4277727973/jobs/7446784501)
+
+### What changes are included in this PR?
+Due to change in supported dtypes for Index in pandas, the tests expecting `int64`and not `int32` are failing with dev version of pandas. The failing tests are updated to match the new pandas behaviour.
+* Closes: #34404
+
+Authored-by: Alenka Frim
+Signed-off-by: Joris Van den Bossche
+---
+ python/pyarrow/tests/parquet/test_dataset.py | 11 +++++-
+ python/pyarrow/tests/test_compute.py | 40 +++++++++++++-------
+ python/pyarrow/tests/test_pandas.py | 4 +-
+ 3 files changed, 38 insertions(+), 17 deletions(-)
+
+diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
+index 1bfde4e17..fd24f1642 100644
+--- a/python/pyarrow/tests/parquet/test_dataset.py
++++ b/python/pyarrow/tests/parquet/test_dataset.py
+@@ -735,8 +735,15 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
+ .reset_index(drop=True)
+ .reindex(columns=result_df.columns))
+
+- expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys)
+- expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys)
++ if use_legacy_dataset or Version(pd.__version__) < Version("2.0.0"):
++ expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys)
++ expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys)
++ else:
++ # With pandas 2.0.0 Index can store all numeric dtypes (not just
++ # int64/uint64/float64). Using astype() to create a categorical
++ # column preserves original dtype (int32)
++ expected_df['foo'] = expected_df['foo'].astype("category")
++ expected_df['bar'] = expected_df['bar'].astype("category")
+
+ assert (result_df.columns == ['index', 'values', 'foo', 'bar']).all()
+
+diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
+index 996509999..81c0a4e8b 100644
+--- a/python/pyarrow/tests/test_compute.py
++++ b/python/pyarrow/tests/test_compute.py
+@@ -1916,22 +1916,36 @@ def _check_datetime_components(timestamps, timezone=None):
+ [iso_year, iso_week, iso_day],
+ fields=iso_calendar_fields)
+
+- assert pc.year(tsa).equals(pa.array(ts.dt.year))
++ # Casting is required because pandas with 2.0.0 various numeric
++ # date/time attributes have dtype int32 (previously int64)
++ year = ts.dt.year.astype("int64")
++ month = ts.dt.month.astype("int64")
++ day = ts.dt.day.astype("int64")
++ dayofweek = ts.dt.dayofweek.astype("int64")
++ dayofyear = ts.dt.dayofyear.astype("int64")
++ quarter = ts.dt.quarter.astype("int64")
++ hour = ts.dt.hour.astype("int64")
++ minute = ts.dt.minute.astype("int64")
++ second = ts.dt.second.values.astype("int64")
++ microsecond = ts.dt.microsecond.astype("int64")
++ nanosecond = ts.dt.nanosecond.astype("int64")
++
++ assert pc.year(tsa).equals(pa.array(year))
+ assert pc.is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year))
+- assert pc.month(tsa).equals(pa.array(ts.dt.month))
+- assert pc.day(tsa).equals(pa.array(ts.dt.day))
+- assert pc.day_of_week(tsa).equals(pa.array(ts.dt.dayofweek))
+- assert pc.day_of_year(tsa).equals(pa.array(ts.dt.dayofyear))
++ assert pc.month(tsa).equals(pa.array(month))
++ assert pc.day(tsa).equals(pa.array(day))
++ assert pc.day_of_week(tsa).equals(pa.array(dayofweek))
++ assert pc.day_of_year(tsa).equals(pa.array(dayofyear))
+ assert pc.iso_year(tsa).equals(pa.array(iso_year))
+ assert pc.iso_week(tsa).equals(pa.array(iso_week))
+ assert pc.iso_calendar(tsa).equals(iso_calendar)
+- assert pc.quarter(tsa).equals(pa.array(ts.dt.quarter))
+- assert pc.hour(tsa).equals(pa.array(ts.dt.hour))
+- assert pc.minute(tsa).equals(pa.array(ts.dt.minute))
+- assert pc.second(tsa).equals(pa.array(ts.dt.second.values))
+- assert pc.millisecond(tsa).equals(pa.array(ts.dt.microsecond // 10 ** 3))
+- assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10 ** 3))
+- assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond))
++ assert pc.quarter(tsa).equals(pa.array(quarter))
++ assert pc.hour(tsa).equals(pa.array(hour))
++ assert pc.minute(tsa).equals(pa.array(minute))
++ assert pc.second(tsa).equals(pa.array(second))
++ assert pc.millisecond(tsa).equals(pa.array(microsecond // 10 ** 3))
++ assert pc.microsecond(tsa).equals(pa.array(microsecond % 10 ** 3))
++ assert pc.nanosecond(tsa).equals(pa.array(nanosecond))
+ assert pc.subsecond(tsa).equals(pa.array(subseconds))
+
+ if ts.dt.tz:
+@@ -1945,7 +1959,7 @@ def _check_datetime_components(timestamps, timezone=None):
+ day_of_week_options = pc.DayOfWeekOptions(
+ count_from_zero=False, week_start=1)
+ assert pc.day_of_week(tsa, options=day_of_week_options).equals(
+- pa.array(ts.dt.dayofweek + 1))
++ pa.array(dayofweek + 1))
+
+ week_options = pc.WeekOptions(
+ week_starts_monday=True, count_from_zero=False,
+diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
+index 4d0ddf875..ddec0f0de 100644
+--- a/python/pyarrow/tests/test_pandas.py
++++ b/python/pyarrow/tests/test_pandas.py
+@@ -3250,7 +3250,7 @@ def test_table_from_pandas_schema_index_columns():
+ schema = pa.schema([
+ ('a', pa.int64()),
+ ('b', pa.float64()),
+- ('index', pa.int32()),
++ ('index', pa.int64()),
+ ])
+
+ # schema includes index with name not in dataframe
+@@ -3283,7 +3283,7 @@ def test_table_from_pandas_schema_index_columns():
+
+ # schema has different order (index column not at the end)
+ schema = pa.schema([
+- ('index', pa.int32()),
++ ('index', pa.int64()),
+ ('a', pa.int64()),
+ ('b', pa.float64()),
+ ])
+--
+2.38.1.windows.1
+
diff --git a/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch b/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch
new file mode 100644
index 000000000..24048ad3e
--- /dev/null
+++ b/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch
@@ -0,0 +1,130 @@
+From 041263a350d8c7e00c665480370e41c2031df1b5 Mon Sep 17 00:00:00 2001
+From: Joris Van den Bossche
+Date: Tue, 4 Apr 2023 16:43:26 +0200
+Subject: [PATCH 4/5] GH-15070: [Python][CI] Compatibility with pandas 2.0
+ (#34878)
+
+### What changes are included in this PR?
+
+- The issue with numpy 1.25 in the assert equal helper was fixed in pandas 1.5.3 -> removing the skip (in theory can still run into this error when using an older pandas version with the latest numpy, but that's not something you should do)
+- Casting tz-aware strings to datetime64[ns] was not fixed in pandas (https://github.com/pandas-dev/pandas/issues/50140) -> updating our implementation to work around it
+- Casting to numpy string dtype (https://github.com/pandas-dev/pandas/issues/50127) is not yet fixed -> updating the skip
+
+### Are there any user-facing changes?
+
+No
+* Closes: #15070
+
+Authored-by: Joris Van den Bossche
+Signed-off-by: Joris Van den Bossche
+---
+ python/pyarrow/pandas_compat.py | 3 +--
+ python/pyarrow/tests/test_pandas.py | 34 ++++++-----------------------
+ 2 files changed, 8 insertions(+), 29 deletions(-)
+
+diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
+index d624459ca..b0ab28bf1 100644
+--- a/python/pyarrow/pandas_compat.py
++++ b/python/pyarrow/pandas_compat.py
+@@ -1148,8 +1148,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
+ if pandas_dtype == "datetimetz":
+ tz = pa.lib.string_to_tzinfo(
+ column_indexes[0]['metadata']['timezone'])
+- dt = level.astype(numpy_dtype)
+- level = dt.tz_localize('utc').tz_convert(tz)
++ level = pd.to_datetime(level, utc=True).tz_convert(tz)
+ elif level.dtype != dtype:
+ level = level.astype(dtype)
+ # ARROW-9096: if original DataFrame was upcast we keep that
+diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
+index ddec0f0de..e8ae7f1f5 100644
+--- a/python/pyarrow/tests/test_pandas.py
++++ b/python/pyarrow/tests/test_pandas.py
+@@ -187,17 +187,12 @@ class TestConvertMetadata:
+ _check_pandas_roundtrip(df, preserve_index=True)
+
+ def test_column_index_names_with_tz(self):
+- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"):
+- # TODO: regression in pandas, should be fixed before final 2.0.0
+- # https://github.com/pandas-dev/pandas/issues/50140
+- pytest.skip("Regression in pandas 2.0.0.dev")
+ # ARROW-13756
+ # Bug if index is timezone aware DataTimeIndex
+
+ df = pd.DataFrame(
+ np.random.randn(5, 3),
+- columns=pd.date_range(
+- "2021-01-01", "2021-01-3", freq="D", tz="CET")
++ columns=pd.date_range("2021-01-01", periods=3, freq="50D", tz="CET")
+ )
+ _check_pandas_roundtrip(df, preserve_index=True)
+
+@@ -453,11 +448,11 @@ class TestConvertMetadata:
+ preserve_index=True)
+
+ def test_binary_column_name(self):
+- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"):
+- # TODO: regression in pandas, should be fixed before final 2.0.0
++ if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"):
++ # TODO: regression in pandas, hopefully fixed in next version
+ # https://issues.apache.org/jira/browse/ARROW-18394
+ # https://github.com/pandas-dev/pandas/issues/50127
+- pytest.skip("Regression in pandas 2.0.0.dev")
++ pytest.skip("Regression in pandas 2.0.0")
+ column_data = ['い']
+ key = 'あ'.encode()
+ data = {key: column_data}
+@@ -2064,11 +2059,6 @@ class TestConvertListTypes:
+ assert result3.equals(expected3)
+
+ def test_infer_lists(self):
+- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and
+- (Version(pd.__version__) < Version("2.0.0"))):
+- # TODO: regression in pandas with numpy 1.25dev
+- # https://github.com/pandas-dev/pandas/issues/50360
+- pytest.skip("Regression in pandas with numpy 1.25")
+ data = OrderedDict([
+ ('nan_ints', [[None, 1], [2, 3]]),
+ ('ints', [[0, 1], [2, 3]]),
+@@ -2118,11 +2108,6 @@ class TestConvertListTypes:
+ _check_pandas_roundtrip(df, expected_schema=expected_schema)
+
+ def test_to_list_of_structs_pandas(self):
+- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and
+- (Version(pd.__version__) < Version("2.0.0"))):
+- # TODO: regression in pandas with numpy 1.25dev
+- # https://github.com/pandas-dev/pandas/issues/50360
+- pytest.skip("Regression in pandas with numpy 1.25")
+ ints = pa.array([1, 2, 3], pa.int32())
+ strings = pa.array([['a', 'b'], ['c', 'd'], ['e', 'f']],
+ pa.list_(pa.string()))
+@@ -2192,11 +2177,6 @@ class TestConvertListTypes:
+ assert result.equals(expected)
+
+ def test_nested_large_list(self):
+- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and
+- (Version(pd.__version__) < Version("2.0.0"))):
+- # TODO: regression in pandas with numpy 1.25dev
+- # https://github.com/pandas-dev/pandas/issues/50360
+- pytest.skip("Regression in pandas with numpy 1.25")
+ s = (pa.array([[[1, 2, 3], [4]], None],
+ type=pa.large_list(pa.large_list(pa.int64())))
+ .to_pandas())
+@@ -2950,11 +2930,11 @@ def _fully_loaded_dataframe_example():
+
+ @pytest.mark.parametrize('columns', ([b'foo'], ['foo']))
+ def test_roundtrip_with_bytes_unicode(columns):
+- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"):
+- # TODO: regression in pandas, should be fixed before final 2.0.0
++ if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"):
++ # TODO: regression in pandas, hopefully fixed in next version
+ # https://issues.apache.org/jira/browse/ARROW-18394
+ # https://github.com/pandas-dev/pandas/issues/50127
+- pytest.skip("Regression in pandas 2.0.0.dev")
++ pytest.skip("Regression in pandas 2.0.0")
+
+ df = pd.DataFrame(columns=columns)
+ table1 = pa.Table.from_pandas(df)
+--
+2.38.1.windows.1
+
diff --git a/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch b/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch
new file mode 100644
index 000000000..151d32ded
--- /dev/null
+++ b/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch
@@ -0,0 +1,69 @@
+From 7cef42a8ce057fba2008042de9c95fc8c2b15f90 Mon Sep 17 00:00:00 2001
+From: Joris Van den Bossche
+Date: Tue, 4 Apr 2023 20:38:25 +0200
+Subject: [PATCH 5/5] GH-34880: [Python][CI] Fix Windows tests failing with
+ latest pandas 2.0 (#34881)
+
+* Closes: #34880
+
+Authored-by: Joris Van den Bossche
+Signed-off-by: Jacob Wujciak-Jens
+---
+ ci/appveyor-cpp-setup.bat | 3 ++-
+ python/pyarrow/tests/parquet/test_pandas.py | 4 +++-
+ python/pyarrow/tests/test_pandas.py | 3 ++-
+ 3 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat
+index 64f930a16..73def17e5 100644
+--- a/ci/appveyor-cpp-setup.bat
++++ b/ci/appveyor-cpp-setup.bat
+@@ -64,7 +64,7 @@ if "%ARROW_BUILD_GANDIVA%" == "ON" (
+ @rem Install pre-built "toolchain" packages for faster builds
+ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt
+ @rem Arrow conda environment
+-mamba create -n arrow -q -y -c conda-forge ^
++mamba create -n arrow -y -c conda-forge ^
+ --file=ci\conda_env_python.txt ^
+ %CONDA_PACKAGES% ^
+ "ccache" ^
+@@ -75,6 +75,7 @@ mamba create -n arrow -q -y -c conda-forge ^
+ "fsspec" ^
+ "python=%PYTHON%" ^
+ || exit /B
++conda list -n arrow
+
+ @rem
+ @rem Configure compiler
+diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
+index b6250e8fc..c27b16de7 100644
+--- a/python/pyarrow/tests/parquet/test_pandas.py
++++ b/python/pyarrow/tests/parquet/test_pandas.py
+@@ -643,7 +643,9 @@ def test_dataset_read_pandas_common_metadata(
+ paths = []
+ for i in range(nfiles):
+ df = _test_dataframe(size, seed=i)
+- df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index')
++ df.index = pd.Index(
++ np.arange(i * size, (i + 1) * size, dtype="int64"), name='index'
++ )
+
+ path = dirpath / '{}.parquet'.format(i)
+
+diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
+index e8ae7f1f5..45a462a7b 100644
+--- a/python/pyarrow/tests/test_pandas.py
++++ b/python/pyarrow/tests/test_pandas.py
+@@ -2570,7 +2570,8 @@ class TestZeroCopyConversion:
+ def test_zero_copy_dictionaries(self):
+ arr = pa.DictionaryArray.from_arrays(
+ np.array([0, 0]),
+- np.array([5]))
++ np.array([5], dtype="int64"),
++ )
+
+ result = arr.to_pandas(zero_copy_only=True)
+ values = pd.Categorical([5, 5])
+--
+2.38.1.windows.1
+
|