diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml index 51fbe8529..6cd2103f4 100755 --- a/.azure-pipelines/azure-pipelines-linux.yml +++ b/.azure-pipelines/azure-pipelines-linux.yml @@ -16,10 +16,18 @@ jobs: CONFIG: linux_64_cuda_compiler_versionNone UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-x86_64 + linux_aarch64_cuda_compiler_version11.2: + CONFIG: linux_aarch64_cuda_compiler_version11.2 + UPLOAD_PACKAGES: 'True' + DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2 linux_aarch64_cuda_compiler_versionNone: CONFIG: linux_aarch64_cuda_compiler_versionNone UPLOAD_PACKAGES: 'True' DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-x86_64 + linux_ppc64le_cuda_compiler_version11.2: + CONFIG: linux_ppc64le_cuda_compiler_version11.2 + UPLOAD_PACKAGES: 'True' + DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2 linux_ppc64le_cuda_compiler_versionNone: CONFIG: linux_ppc64le_cuda_compiler_versionNone UPLOAD_PACKAGES: 'True' diff --git a/.ci_support/linux_64_cuda_compiler_version10.2.yaml b/.ci_support/linux_64_cuda_compiler_version10.2.yaml index 5d80a17c4..62efe90e2 100644 --- a/.ci_support/linux_64_cuda_compiler_version10.2.yaml +++ b/.ci_support/linux_64_cuda_compiler_version10.2.yaml @@ -45,8 +45,8 @@ lz4_c: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/linux_64_cuda_compiler_versionNone.yaml b/.ci_support/linux_64_cuda_compiler_versionNone.yaml index 39b25b446..91549a610 100644 --- a/.ci_support/linux_64_cuda_compiler_versionNone.yaml +++ b/.ci_support/linux_64_cuda_compiler_versionNone.yaml @@ -45,8 +45,8 @@ lz4_c: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/linux_aarch64_cuda_compiler_version11.2.yaml b/.ci_support/linux_aarch64_cuda_compiler_version11.2.yaml new file mode 100644 index 000000000..dceed4989 --- /dev/null +++ b/.ci_support/linux_aarch64_cuda_compiler_version11.2.yaml @@ -0,0 +1,88 @@ +BUILD: +- aarch64-conda_cos7-linux-gnu +aws_crt_cpp: +- 0.19.8 +aws_sdk_cpp: +- 1.10.57 +bzip2: +- '1' +c_ares: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '10' +cdt_arch: +- aarch64 +cdt_name: +- cos7 +channel_sources: +- conda-forge +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- '11.2' +cuda_compiler_version_min: +- '11.2' +cxx_compiler: +- gxx +cxx_compiler_version: +- '10' +docker_image: +- quay.io/condaforge/linux-anvil-cuda:11.2 +gflags: +- '2.2' +glog: +- '0.6' +google_cloud_cpp: +- 2.8.0 +libabseil: +- '20230125' +libgrpc: +- '1.52' +libprotobuf: +- '3.21' +lz4_c: +- 1.9.3 +numpy: +- '1.21' +- '1.23' +- '1.21' +- '1.21' +openssl: +- '3' +orc: +- 1.8.3 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.8.* *_cpython +- 3.9.* *_cpython +re2: +- 2023.02.02 +snappy: +- '1' +target_platform: +- linux-aarch64 +thrift_cpp: +- 0.18.1 +ucx: +- 1.14.0 +zip_keys: +- - c_compiler_version + - cxx_compiler_version + - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml b/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml index af0fc2dcd..9bff007d8 100644 --- a/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml +++ b/.ci_support/linux_aarch64_cuda_compiler_versionNone.yaml @@ -20,8 +20,12 @@ channel_sources: - conda-forge channel_targets: - conda-forge main +cuda_compiler: +- nvcc cuda_compiler_version: - None +cuda_compiler_version_min: +- '11.2' cxx_compiler: - gxx cxx_compiler_version: @@ -45,8 +49,8 @@ lz4_c: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/linux_ppc64le_cuda_compiler_version11.2.yaml b/.ci_support/linux_ppc64le_cuda_compiler_version11.2.yaml new file mode 100644 index 000000000..d8ad133cb --- /dev/null +++ b/.ci_support/linux_ppc64le_cuda_compiler_version11.2.yaml @@ -0,0 +1,84 @@ +aws_crt_cpp: +- 0.19.8 +aws_sdk_cpp: +- 1.10.57 +bzip2: +- '1' +c_ares: +- '1' +c_compiler: +- gcc +c_compiler_version: +- '10' +cdt_name: +- cos7 +channel_sources: +- conda-forge +channel_targets: +- conda-forge main +cuda_compiler: +- nvcc +cuda_compiler_version: +- '11.2' +cuda_compiler_version_min: +- '11.2' +cxx_compiler: +- gxx +cxx_compiler_version: +- '10' +docker_image: +- quay.io/condaforge/linux-anvil-cuda:11.2 +gflags: +- '2.2' +glog: +- '0.6' +google_cloud_cpp: +- 2.8.0 +libabseil: +- '20230125' +libgrpc: +- '1.52' +libprotobuf: +- '3.21' +lz4_c: +- 1.9.3 +numpy: +- '1.21' +- '1.23' +- '1.21' +- '1.21' +openssl: +- '3' +orc: +- 1.8.3 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.8.* *_cpython +- 3.9.* *_cpython +re2: +- 2023.02.02 +snappy: +- '1' +target_platform: +- linux-ppc64le +thrift_cpp: +- 0.18.1 +ucx: +- 1.14.0 +zip_keys: +- - c_compiler_version + - cxx_compiler_version + - cuda_compiler_version + - cdt_name + - docker_image +- - python + - numpy +zlib: +- '1.2' +zstd: +- '1.5' diff --git a/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml b/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml index 83a1f7f74..6126222b5 100644 --- a/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml +++ b/.ci_support/linux_ppc64le_cuda_compiler_versionNone.yaml @@ -16,8 +16,12 @@ channel_sources: - conda-forge channel_targets: - conda-forge main +cuda_compiler: +- nvcc cuda_compiler_version: - None +cuda_compiler_version_min: +- '11.2' cxx_compiler: - gxx cxx_compiler_version: @@ -41,8 +45,8 @@ lz4_c: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/migrations/libabseil20230125.yaml b/.ci_support/migrations/libabseil20230125.yaml deleted file mode 100644 index 2dea689fc..000000000 --- a/.ci_support/migrations/libabseil20230125.yaml +++ /dev/null @@ -1,7 +0,0 @@ -__migrator: - build_number: 1 - kind: version - migration_number: 1 -libabseil: -- '20230125' -migrator_ts: 1676218104.8853533 diff --git a/.ci_support/migrations/libthrift0181.yaml b/.ci_support/migrations/libthrift0181.yaml deleted file mode 100644 index fa3b1772e..000000000 --- a/.ci_support/migrations/libthrift0181.yaml +++ /dev/null @@ -1,9 +0,0 @@ -__migrator: - build_number: 1 - kind: version - migration_number: 1 -libthrift: -- 0.18.1 -thrift_cpp: -- 0.18.1 -migrator_ts: 1678838391.6043901 diff --git a/.ci_support/osx_64_.yaml b/.ci_support/osx_64_.yaml index 0cf990cc1..b4a0efcd6 100644 --- a/.ci_support/osx_64_.yaml +++ b/.ci_support/osx_64_.yaml @@ -11,7 +11,7 @@ c_ares: c_compiler: - clang c_compiler_version: -- '14' +- '15' channel_sources: - conda-forge channel_targets: @@ -21,7 +21,7 @@ cuda_compiler_version: cxx_compiler: - clangxx cxx_compiler_version: -- '14' +- '15' gflags: - '2.2' glog: @@ -41,8 +41,8 @@ macos_machine: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/osx_arm64_.yaml b/.ci_support/osx_arm64_.yaml index 3faa6278e..b6c0ae50d 100644 --- a/.ci_support/osx_arm64_.yaml +++ b/.ci_support/osx_arm64_.yaml @@ -11,7 +11,7 @@ c_ares: c_compiler: - clang c_compiler_version: -- '14' +- '15' channel_sources: - conda-forge channel_targets: @@ -21,7 +21,7 @@ cuda_compiler_version: cxx_compiler: - clangxx cxx_compiler_version: -- '14' +- '15' gflags: - '2.2' glog: @@ -41,8 +41,8 @@ macos_machine: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/win_64_cuda_compiler_version10.2.yaml b/.ci_support/win_64_cuda_compiler_version10.2.yaml index 6ea00e3bd..408b53a07 100644 --- a/.ci_support/win_64_cuda_compiler_version10.2.yaml +++ b/.ci_support/win_64_cuda_compiler_version10.2.yaml @@ -29,7 +29,7 @@ libabseil: libcrc32c: - '1.1' libcurl: -- '7' +- '8' libgrpc: - '1.52' libprotobuf: @@ -39,8 +39,8 @@ lz4_c: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/.ci_support/win_64_cuda_compiler_versionNone.yaml b/.ci_support/win_64_cuda_compiler_versionNone.yaml index 183356662..f406d107b 100644 --- a/.ci_support/win_64_cuda_compiler_versionNone.yaml +++ b/.ci_support/win_64_cuda_compiler_versionNone.yaml @@ -29,7 +29,7 @@ libabseil: libcrc32c: - '1.1' libcurl: -- '7' +- '8' libgrpc: - '1.52' libprotobuf: @@ -39,8 +39,8 @@ lz4_c: numpy: - '1.21' - '1.23' -- '1.20' -- '1.20' +- '1.21' +- '1.21' openssl: - '3' orc: diff --git a/README.md b/README.md index b79b78269..1b36c4bbd 100644 --- a/README.md +++ b/README.md @@ -75,6 +75,13 @@ Current build status variant + + linux_aarch64_cuda_compiler_version11.2 + + + variant + + linux_aarch64_cuda_compiler_versionNone @@ -82,6 +89,13 @@ Current build status variant + + linux_ppc64le_cuda_compiler_version11.2 + + + variant + + linux_ppc64le_cuda_compiler_versionNone diff --git a/recipe/build-arrow.sh b/recipe/build-arrow.sh index ba1ffa7da..55b3c78d1 100644 --- a/recipe/build-arrow.sh +++ b/recipe/build-arrow.sh @@ -42,13 +42,19 @@ then return 1 fi fi - EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} -DCMAKE_LIBRARY_PATH=${CUDA_HOME}/lib64/stubs" + EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=ON -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME} -DCMAKE_LIBRARY_PATH=${CONDA_BUILD_SYSROOT}/lib" else EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_CUDA=OFF" fi -if [[ "${target_platform}" == "osx-arm64" ]]; then - EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCLANG_EXECUTABLE=${BUILD_PREFIX}/bin/clang -DLLVM_LINK_EXECUTABLE=${BUILD_PREFIX}/bin/llvm-link" +if [[ "${build_platform}" != "${target_platform}" ]]; then + # point to a usable protoc/grpc_cpp_plugin if we're cross-compiling + EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc" + if [[ ! -f ${BUILD_PREFIX}/bin/${CONDA_TOOLCHAIN_HOST}-clang ]]; then + ln -sf ${BUILD_PREFIX}/bin/clang ${BUILD_PREFIX}/bin/${CONDA_TOOLCHAIN_HOST}-clang + fi + EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCLANG_EXECUTABLE=${BUILD_PREFIX}/bin/${CONDA_TOOLCHAIN_HOST}-clang" + EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DLLVM_LINK_EXECUTABLE=${BUILD_PREFIX}/bin/llvm-link" sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt sed -ie 's;"--with-jemalloc-prefix\=je_arrow_";"--with-jemalloc-prefix\=je_arrow_" "--with-lg-page\=14";g' ../cmake_modules/ThirdpartyToolchain.cmake fi @@ -64,11 +70,6 @@ if [[ "${target_platform}" == "linux-aarch64" ]] || [[ "${target_platform}" == " export CMAKE_BUILD_PARALLEL_LEVEL=3 fi -# point to a usable protoc if we're running on a different architecture than the target -if [[ "${build_platform}" != "${target_platform}" ]]; then - EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc" -fi - # reusable variable for dependencies we cannot yet unvendor export READ_RECIPE_META_YAML_WHY_NOT=OFF diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml new file mode 100644 index 000000000..dd8dd01f1 --- /dev/null +++ b/recipe/conda_build_config.yaml @@ -0,0 +1,4 @@ +c_compiler_version: # [osx] + - 15 # [osx] +cxx_compiler_version: # [osx] + - 15 # [osx] diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 09f77fb72..423527421 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -3,7 +3,7 @@ {% set build_ext_version = "4.0.0" %} {% set build_ext = "cuda" if cuda_enabled else "cpu" %} {% set proc_build_number = "0" %} -{% set llvm_version = "14" %} +{% set llvm_version = "15" %} # see https://github.com/apache/arrow/blob/apache-arrow-10.0.1/cpp/CMakeLists.txt#L88-L90 {% set so_version = (version.split(".")[0] | int * 100 + version.split(".")[1] | int) ~ "." ~ version.split(".")[2] ~ ".0" %} @@ -24,19 +24,20 @@ source: - patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch # backport apache/arrow#34019 to disable useless pkgconfig search that takes ~15min - patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch + # backport apache/arrow#34498, 34878 & #34881 for pandas 2.0 compatibility + - patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch + - patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch + - patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch # testing-submodule not part of release tarball - git_url: https://github.com/apache/arrow-testing.git git_rev: 00c483283433b4c02cb811f260dbe35414c806a4 folder: testing build: - number: 13 + number: 14 # for cuda support, building with one version is enough to be compatible with # all later versions, since arrow is only using libcuda, and not libcudart. skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)] - # temporary: skip CUDA on aarch/ppc until cross-compilation works, see - # https://github.com/conda-forge/conda-forge-ci-setup-feedstock/pull/210 - skip: true # [(aarch64 or ppc64le) and (cuda_compiler_version != "None")] run_exports: - {{ pin_subpackage("libarrow", max_pin="x.x.x") }} @@ -88,12 +89,13 @@ outputs: - {{ compiler("c") }} - {{ compiler("cxx") }} - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] - - clangdev {{ llvm_version }} # [osx and arm64] - - llvmdev {{ llvm_version }} # [osx and arm64] - - gnuconfig # [osx and arm64] # needs to run protoc & grpc_cpp_plugin - libgrpc # [build_platform != target_platform] - libprotobuf # [build_platform != target_platform] + # needed for gandiva + - clangdev {{ llvm_version }} # [build_platform != target_platform] + - llvmdev {{ llvm_version }} # [build_platform != target_platform] + - gnuconfig # [build_platform != target_platform] - cmake - ninja # necessary for vendored jemalloc diff --git a/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch b/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch index b87b72104..246e10899 100644 --- a/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch +++ b/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch @@ -1,7 +1,7 @@ From 7f1350d76bf71a8123cf78abe18ddb5876a7ca80 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 26 Jan 2023 12:37:02 +1100 -Subject: [PATCH 1/2] don't bake non-relocatable CMAKE_INSTALL_FULL_LIBDIR into +Subject: [PATCH 1/5] don't bake non-relocatable CMAKE_INSTALL_FULL_LIBDIR into gdb-integration --- diff --git a/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch b/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch index 324fb2259..5a9c5a33b 100644 --- a/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch +++ b/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch @@ -1,7 +1,7 @@ From 179a3caa661b7a93f6a136ea44f6f53671611533 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 4 Feb 2023 22:08:54 +0900 -Subject: [PATCH 2/2] GH-33882: [C++] Don't find .pc files with +Subject: [PATCH 2/5] GH-33882: [C++] Don't find .pc files with ARROW_BUILD_STATIC=OFF (#34019) Because they are needless and `pkg-config grpc++` is slow. diff --git a/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch b/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch new file mode 100644 index 000000000..21211d851 --- /dev/null +++ b/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch @@ -0,0 +1,130 @@ +From da0b59a5d55032fb94618d776578b156a244ab2b Mon Sep 17 00:00:00 2001 +From: Alenka Frim +Date: Fri, 10 Mar 2023 17:36:41 +0100 +Subject: [PATCH 3/5] GH-34404: [Python] Failing tests because pandas.Index can + now store all numeric dtypes (not only 64bit versions) (#34498) + +### Rationale for this change +Several failing tests in the nightly build (https://github.com/ursacomputing/crossbow/actions/runs/4277727973/jobs/7446784501) + +### What changes are included in this PR? +Due to change in supported dtypes for Index in pandas, the tests expecting `int64`and not `int32` are failing with dev version of pandas. The failing tests are updated to match the new pandas behaviour. +* Closes: #34404 + +Authored-by: Alenka Frim +Signed-off-by: Joris Van den Bossche +--- + python/pyarrow/tests/parquet/test_dataset.py | 11 +++++- + python/pyarrow/tests/test_compute.py | 40 +++++++++++++------- + python/pyarrow/tests/test_pandas.py | 4 +- + 3 files changed, 38 insertions(+), 17 deletions(-) + +diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py +index 1bfde4e17..fd24f1642 100644 +--- a/python/pyarrow/tests/parquet/test_dataset.py ++++ b/python/pyarrow/tests/parquet/test_dataset.py +@@ -735,8 +735,15 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True): + .reset_index(drop=True) + .reindex(columns=result_df.columns)) + +- expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys) +- expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys) ++ if use_legacy_dataset or Version(pd.__version__) < Version("2.0.0"): ++ expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys) ++ expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys) ++ else: ++ # With pandas 2.0.0 Index can store all numeric dtypes (not just ++ # int64/uint64/float64). Using astype() to create a categorical ++ # column preserves original dtype (int32) ++ expected_df['foo'] = expected_df['foo'].astype("category") ++ expected_df['bar'] = expected_df['bar'].astype("category") + + assert (result_df.columns == ['index', 'values', 'foo', 'bar']).all() + +diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py +index 996509999..81c0a4e8b 100644 +--- a/python/pyarrow/tests/test_compute.py ++++ b/python/pyarrow/tests/test_compute.py +@@ -1916,22 +1916,36 @@ def _check_datetime_components(timestamps, timezone=None): + [iso_year, iso_week, iso_day], + fields=iso_calendar_fields) + +- assert pc.year(tsa).equals(pa.array(ts.dt.year)) ++ # Casting is required because pandas with 2.0.0 various numeric ++ # date/time attributes have dtype int32 (previously int64) ++ year = ts.dt.year.astype("int64") ++ month = ts.dt.month.astype("int64") ++ day = ts.dt.day.astype("int64") ++ dayofweek = ts.dt.dayofweek.astype("int64") ++ dayofyear = ts.dt.dayofyear.astype("int64") ++ quarter = ts.dt.quarter.astype("int64") ++ hour = ts.dt.hour.astype("int64") ++ minute = ts.dt.minute.astype("int64") ++ second = ts.dt.second.values.astype("int64") ++ microsecond = ts.dt.microsecond.astype("int64") ++ nanosecond = ts.dt.nanosecond.astype("int64") ++ ++ assert pc.year(tsa).equals(pa.array(year)) + assert pc.is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year)) +- assert pc.month(tsa).equals(pa.array(ts.dt.month)) +- assert pc.day(tsa).equals(pa.array(ts.dt.day)) +- assert pc.day_of_week(tsa).equals(pa.array(ts.dt.dayofweek)) +- assert pc.day_of_year(tsa).equals(pa.array(ts.dt.dayofyear)) ++ assert pc.month(tsa).equals(pa.array(month)) ++ assert pc.day(tsa).equals(pa.array(day)) ++ assert pc.day_of_week(tsa).equals(pa.array(dayofweek)) ++ assert pc.day_of_year(tsa).equals(pa.array(dayofyear)) + assert pc.iso_year(tsa).equals(pa.array(iso_year)) + assert pc.iso_week(tsa).equals(pa.array(iso_week)) + assert pc.iso_calendar(tsa).equals(iso_calendar) +- assert pc.quarter(tsa).equals(pa.array(ts.dt.quarter)) +- assert pc.hour(tsa).equals(pa.array(ts.dt.hour)) +- assert pc.minute(tsa).equals(pa.array(ts.dt.minute)) +- assert pc.second(tsa).equals(pa.array(ts.dt.second.values)) +- assert pc.millisecond(tsa).equals(pa.array(ts.dt.microsecond // 10 ** 3)) +- assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10 ** 3)) +- assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond)) ++ assert pc.quarter(tsa).equals(pa.array(quarter)) ++ assert pc.hour(tsa).equals(pa.array(hour)) ++ assert pc.minute(tsa).equals(pa.array(minute)) ++ assert pc.second(tsa).equals(pa.array(second)) ++ assert pc.millisecond(tsa).equals(pa.array(microsecond // 10 ** 3)) ++ assert pc.microsecond(tsa).equals(pa.array(microsecond % 10 ** 3)) ++ assert pc.nanosecond(tsa).equals(pa.array(nanosecond)) + assert pc.subsecond(tsa).equals(pa.array(subseconds)) + + if ts.dt.tz: +@@ -1945,7 +1959,7 @@ def _check_datetime_components(timestamps, timezone=None): + day_of_week_options = pc.DayOfWeekOptions( + count_from_zero=False, week_start=1) + assert pc.day_of_week(tsa, options=day_of_week_options).equals( +- pa.array(ts.dt.dayofweek + 1)) ++ pa.array(dayofweek + 1)) + + week_options = pc.WeekOptions( + week_starts_monday=True, count_from_zero=False, +diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py +index 4d0ddf875..ddec0f0de 100644 +--- a/python/pyarrow/tests/test_pandas.py ++++ b/python/pyarrow/tests/test_pandas.py +@@ -3250,7 +3250,7 @@ def test_table_from_pandas_schema_index_columns(): + schema = pa.schema([ + ('a', pa.int64()), + ('b', pa.float64()), +- ('index', pa.int32()), ++ ('index', pa.int64()), + ]) + + # schema includes index with name not in dataframe +@@ -3283,7 +3283,7 @@ def test_table_from_pandas_schema_index_columns(): + + # schema has different order (index column not at the end) + schema = pa.schema([ +- ('index', pa.int32()), ++ ('index', pa.int64()), + ('a', pa.int64()), + ('b', pa.float64()), + ]) +-- +2.38.1.windows.1 + diff --git a/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch b/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch new file mode 100644 index 000000000..24048ad3e --- /dev/null +++ b/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch @@ -0,0 +1,130 @@ +From 041263a350d8c7e00c665480370e41c2031df1b5 Mon Sep 17 00:00:00 2001 +From: Joris Van den Bossche +Date: Tue, 4 Apr 2023 16:43:26 +0200 +Subject: [PATCH 4/5] GH-15070: [Python][CI] Compatibility with pandas 2.0 + (#34878) + +### What changes are included in this PR? + +- The issue with numpy 1.25 in the assert equal helper was fixed in pandas 1.5.3 -> removing the skip (in theory can still run into this error when using an older pandas version with the latest numpy, but that's not something you should do) +- Casting tz-aware strings to datetime64[ns] was not fixed in pandas (https://github.com/pandas-dev/pandas/issues/50140) -> updating our implementation to work around it +- Casting to numpy string dtype (https://github.com/pandas-dev/pandas/issues/50127) is not yet fixed -> updating the skip + +### Are there any user-facing changes? + +No +* Closes: #15070 + +Authored-by: Joris Van den Bossche +Signed-off-by: Joris Van den Bossche +--- + python/pyarrow/pandas_compat.py | 3 +-- + python/pyarrow/tests/test_pandas.py | 34 ++++++----------------------- + 2 files changed, 8 insertions(+), 29 deletions(-) + +diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py +index d624459ca..b0ab28bf1 100644 +--- a/python/pyarrow/pandas_compat.py ++++ b/python/pyarrow/pandas_compat.py +@@ -1148,8 +1148,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes): + if pandas_dtype == "datetimetz": + tz = pa.lib.string_to_tzinfo( + column_indexes[0]['metadata']['timezone']) +- dt = level.astype(numpy_dtype) +- level = dt.tz_localize('utc').tz_convert(tz) ++ level = pd.to_datetime(level, utc=True).tz_convert(tz) + elif level.dtype != dtype: + level = level.astype(dtype) + # ARROW-9096: if original DataFrame was upcast we keep that +diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py +index ddec0f0de..e8ae7f1f5 100644 +--- a/python/pyarrow/tests/test_pandas.py ++++ b/python/pyarrow/tests/test_pandas.py +@@ -187,17 +187,12 @@ class TestConvertMetadata: + _check_pandas_roundtrip(df, preserve_index=True) + + def test_column_index_names_with_tz(self): +- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"): +- # TODO: regression in pandas, should be fixed before final 2.0.0 +- # https://github.com/pandas-dev/pandas/issues/50140 +- pytest.skip("Regression in pandas 2.0.0.dev") + # ARROW-13756 + # Bug if index is timezone aware DataTimeIndex + + df = pd.DataFrame( + np.random.randn(5, 3), +- columns=pd.date_range( +- "2021-01-01", "2021-01-3", freq="D", tz="CET") ++ columns=pd.date_range("2021-01-01", periods=3, freq="50D", tz="CET") + ) + _check_pandas_roundtrip(df, preserve_index=True) + +@@ -453,11 +448,11 @@ class TestConvertMetadata: + preserve_index=True) + + def test_binary_column_name(self): +- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"): +- # TODO: regression in pandas, should be fixed before final 2.0.0 ++ if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): ++ # TODO: regression in pandas, hopefully fixed in next version + # https://issues.apache.org/jira/browse/ARROW-18394 + # https://github.com/pandas-dev/pandas/issues/50127 +- pytest.skip("Regression in pandas 2.0.0.dev") ++ pytest.skip("Regression in pandas 2.0.0") + column_data = ['い'] + key = 'あ'.encode() + data = {key: column_data} +@@ -2064,11 +2059,6 @@ class TestConvertListTypes: + assert result3.equals(expected3) + + def test_infer_lists(self): +- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and +- (Version(pd.__version__) < Version("2.0.0"))): +- # TODO: regression in pandas with numpy 1.25dev +- # https://github.com/pandas-dev/pandas/issues/50360 +- pytest.skip("Regression in pandas with numpy 1.25") + data = OrderedDict([ + ('nan_ints', [[None, 1], [2, 3]]), + ('ints', [[0, 1], [2, 3]]), +@@ -2118,11 +2108,6 @@ class TestConvertListTypes: + _check_pandas_roundtrip(df, expected_schema=expected_schema) + + def test_to_list_of_structs_pandas(self): +- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and +- (Version(pd.__version__) < Version("2.0.0"))): +- # TODO: regression in pandas with numpy 1.25dev +- # https://github.com/pandas-dev/pandas/issues/50360 +- pytest.skip("Regression in pandas with numpy 1.25") + ints = pa.array([1, 2, 3], pa.int32()) + strings = pa.array([['a', 'b'], ['c', 'd'], ['e', 'f']], + pa.list_(pa.string())) +@@ -2192,11 +2177,6 @@ class TestConvertListTypes: + assert result.equals(expected) + + def test_nested_large_list(self): +- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and +- (Version(pd.__version__) < Version("2.0.0"))): +- # TODO: regression in pandas with numpy 1.25dev +- # https://github.com/pandas-dev/pandas/issues/50360 +- pytest.skip("Regression in pandas with numpy 1.25") + s = (pa.array([[[1, 2, 3], [4]], None], + type=pa.large_list(pa.large_list(pa.int64()))) + .to_pandas()) +@@ -2950,11 +2930,11 @@ def _fully_loaded_dataframe_example(): + + @pytest.mark.parametrize('columns', ([b'foo'], ['foo'])) + def test_roundtrip_with_bytes_unicode(columns): +- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"): +- # TODO: regression in pandas, should be fixed before final 2.0.0 ++ if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): ++ # TODO: regression in pandas, hopefully fixed in next version + # https://issues.apache.org/jira/browse/ARROW-18394 + # https://github.com/pandas-dev/pandas/issues/50127 +- pytest.skip("Regression in pandas 2.0.0.dev") ++ pytest.skip("Regression in pandas 2.0.0") + + df = pd.DataFrame(columns=columns) + table1 = pa.Table.from_pandas(df) +-- +2.38.1.windows.1 + diff --git a/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch b/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch new file mode 100644 index 000000000..151d32ded --- /dev/null +++ b/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch @@ -0,0 +1,69 @@ +From 7cef42a8ce057fba2008042de9c95fc8c2b15f90 Mon Sep 17 00:00:00 2001 +From: Joris Van den Bossche +Date: Tue, 4 Apr 2023 20:38:25 +0200 +Subject: [PATCH 5/5] GH-34880: [Python][CI] Fix Windows tests failing with + latest pandas 2.0 (#34881) + +* Closes: #34880 + +Authored-by: Joris Van den Bossche +Signed-off-by: Jacob Wujciak-Jens +--- + ci/appveyor-cpp-setup.bat | 3 ++- + python/pyarrow/tests/parquet/test_pandas.py | 4 +++- + python/pyarrow/tests/test_pandas.py | 3 ++- + 3 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat +index 64f930a16..73def17e5 100644 +--- a/ci/appveyor-cpp-setup.bat ++++ b/ci/appveyor-cpp-setup.bat +@@ -64,7 +64,7 @@ if "%ARROW_BUILD_GANDIVA%" == "ON" ( + @rem Install pre-built "toolchain" packages for faster builds + set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt + @rem Arrow conda environment +-mamba create -n arrow -q -y -c conda-forge ^ ++mamba create -n arrow -y -c conda-forge ^ + --file=ci\conda_env_python.txt ^ + %CONDA_PACKAGES% ^ + "ccache" ^ +@@ -75,6 +75,7 @@ mamba create -n arrow -q -y -c conda-forge ^ + "fsspec" ^ + "python=%PYTHON%" ^ + || exit /B ++conda list -n arrow + + @rem + @rem Configure compiler +diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py +index b6250e8fc..c27b16de7 100644 +--- a/python/pyarrow/tests/parquet/test_pandas.py ++++ b/python/pyarrow/tests/parquet/test_pandas.py +@@ -643,7 +643,9 @@ def test_dataset_read_pandas_common_metadata( + paths = [] + for i in range(nfiles): + df = _test_dataframe(size, seed=i) +- df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index') ++ df.index = pd.Index( ++ np.arange(i * size, (i + 1) * size, dtype="int64"), name='index' ++ ) + + path = dirpath / '{}.parquet'.format(i) + +diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py +index e8ae7f1f5..45a462a7b 100644 +--- a/python/pyarrow/tests/test_pandas.py ++++ b/python/pyarrow/tests/test_pandas.py +@@ -2570,7 +2570,8 @@ class TestZeroCopyConversion: + def test_zero_copy_dictionaries(self): + arr = pa.DictionaryArray.from_arrays( + np.array([0, 0]), +- np.array([5])) ++ np.array([5], dtype="int64"), ++ ) + + result = arr.to_pandas(zero_copy_only=True) + values = pd.Categorical([5, 5]) +-- +2.38.1.windows.1 +