diff --git a/recipe/activate.sh b/recipe/activate.sh index be3073de1..f76d48083 100755 --- a/recipe/activate.sh +++ b/recipe/activate.sh @@ -10,7 +10,7 @@ # where the GDB wrappers get installed GDB_PREFIX="$CONDA_PREFIX/share/gdb/auto-load" -# this needs to be in sync with the respective patch +# this needs to be in sync with ARROW_GDB_INSTALL_DIR in build.sh PLACEHOLDER="replace_this_section_with_absolute_slashed_path_to_CONDA_PREFIX" # the paths here are intentionally stacked, see #935, resp. # https://github.com/apache/arrow/blob/master/docs/source/cpp/gdb.rst#manual-loading diff --git a/recipe/build-arrow.sh b/recipe/build-arrow.sh index 55b3c78d1..ea98d3a60 100644 --- a/recipe/build-arrow.sh +++ b/recipe/build-arrow.sh @@ -75,6 +75,7 @@ export READ_RECIPE_META_YAML_WHY_NOT=OFF # for available switches see # https://github.com/apache/arrow/blame/apache-arrow-11.0.0/cpp/cmake_modules/DefineOptions.cmake +# placeholder in ARROW_GDB_INSTALL_DIR must match what's used for replacement in activate.sh cmake -GNinja \ -DARROW_BOOST_USE_SHARED=ON \ -DARROW_BUILD_BENCHMARKS=OFF \ @@ -93,6 +94,7 @@ cmake -GNinja \ -DARROW_GANDIVA=ON \ -DARROW_GANDIVA_PC_CXX_FLAGS="${ARROW_GANDIVA_PC_CXX_FLAGS}" \ -DARROW_GCS=ON \ + -DARROW_GDB_INSTALL_DIR=replace_this_section_with_absolute_slashed_path_to_CONDA_PREFIX/lib \ -DARROW_HDFS=ON \ -DARROW_JEMALLOC=ON \ -DARROW_JSON=ON \ @@ -100,7 +102,6 @@ cmake -GNinja \ -DARROW_ORC=ON \ -DARROW_PACKAGE_PREFIX=$PREFIX \ -DARROW_PARQUET=ON \ - -DARROW_PLASMA=ON \ -DARROW_S3=ON \ -DARROW_SIMD_LEVEL=NONE \ -DARROW_SUBSTRAIT=ON \ diff --git a/recipe/build-pyarrow.sh b/recipe/build-pyarrow.sh index b50ccbb4a..14c67ede6 100644 --- a/recipe/build-pyarrow.sh +++ b/recipe/build-pyarrow.sh @@ -6,7 +6,6 @@ export ARROW_HOME=$PREFIX export PARQUET_HOME=$PREFIX export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION export PYARROW_BUILD_TYPE=release -export PYARROW_BUNDLE_ARROW_CPP_HEADERS=0 export PYARROW_WITH_DATASET=1 export PYARROW_WITH_FLIGHT=1 export PYARROW_WITH_GANDIVA=1 @@ -15,7 +14,6 @@ export PYARROW_WITH_HDFS=1 export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PARQUET_ENCRYPTION=1 -export PYARROW_WITH_PLASMA=1 export PYARROW_WITH_S3=1 export PYARROW_WITH_SUBSTRAIT=1 export PYARROW_CMAKE_GENERATOR=Ninja diff --git a/recipe/meta.yaml b/recipe/meta.yaml index ec2482954..83630e918 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "11.0.0" %} +{% set version = "12.0.0" %} {% set cuda_enabled = cuda_compiler_version != "None" %} {% set build_ext_version = "4.0.0" %} {% set build_ext = "cuda" if cuda_enabled else "cpu" %} @@ -14,30 +14,14 @@ package: source: - url: https://dist.apache.org/repos/dist/release/arrow/arrow-{{ version }}/apache-arrow-{{ version }}.tar.gz - sha256: 2dd8f0ea0848a58785628ee3a57675548d509e17213a2f5d72b0d900b43f5430 - patches: - # gdb-integration needs full path to shared library, see - # https://github.com/apache/arrow/blob/master/docs/source/cpp/gdb.rst#manual-loading - # however, baking the installation-env into the path for the gdb-file cannot work - # with the usual relocation treatement and leads to other issues, see #935; - # replace with placeholder that allows interested users to fix the file path - - patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch - # backport apache/arrow#34019 to disable useless pkgconfig search that takes ~15min - - patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch - # backport apache/arrow#34498, 34878, #34881 & #35031 for pandas 2.0 compatibility - - patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch - - patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch - - patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch - - patches/0006-GH-15070-Python-CI-Update-pandas-test-for-empty-colu.patch - # backport apache/arrow#34747 for compatibility with grpc 1.54 - - patches/0007-GH-34743-Python-Relax-condition-in-flaky-Flight-test.patch + sha256: ddd8347882775e53af7d0965a1902b7d8fcd0a030fd14f783d4f85e821352d52 # testing-submodule not part of release tarball - git_url: https://github.com/apache/arrow-testing.git - git_rev: 00c483283433b4c02cb811f260dbe35414c806a4 + git_rev: 47f7b56b25683202c1fd957668e13f2abafc0f12 folder: testing build: - number: 18 + number: 0 # for cuda support, building with one version is enough to be compatible with # all later versions, since arrow is only using libcuda, and not libcudart. skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)] @@ -85,8 +69,8 @@ outputs: - cudatoolkit track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }} missing_dso_whitelist: - - "*/libcuda.so.*" # [linux] - - "*/nvcuda.dll" # [win] + - '*/libcuda.so.*' # [linux] + - '*/nvcuda.dll' # [win] requirements: build: - {{ compiler("c") }} @@ -158,7 +142,6 @@ outputs: "arrow/api.h", "arrow/flight/types.h", "arrow/flight/sql/api.h", "gandiva/engine.h", "parquet/api/reader.h" ] %} - {% set headers = headers + ["plasma/client.h"] %} # [unix] {% for each_header in headers %} # headers - test -f $PREFIX/include/{{ each_header }} || (echo "{{ each_header }} not found" && exit 1) # [unix] @@ -169,7 +152,6 @@ outputs: "arrow", "arrow_dataset", "arrow_flight", "arrow_flight_sql", "arrow_substrait", "gandiva", "parquet" ] %} - {% set libs = libs + ["plasma"] %} # [unix] {% for each_lib in libs %} # shared - test -f $PREFIX/lib/lib{{ each_lib }}.so # [linux] @@ -230,8 +212,8 @@ outputs: - {{ SP_DIR }}/pyarrow missing_dso_whitelist: # not actually missing, but installed into SP_DIR, see tests - - "*/arrow_python.dll" # [win] - - "*/arrow_python_flight.dll" # [win] + - '*/arrow_python.dll' # [win] + - '*/arrow_python_flight.dll' # [win] requirements: build: - {{ compiler("c") }} @@ -274,7 +256,6 @@ outputs: - pyarrow.gandiva - pyarrow.orc # [unix] - pyarrow.parquet - - pyarrow.plasma # [unix] - pyarrow.fs - pyarrow._s3fs - pyarrow._hdfs @@ -349,9 +330,11 @@ outputs: test: requires: + # test_cpp_extension_in_python requires a compiler + - {{ compiler("cxx") }} # [linux] - pytest - pytest-lazy-fixture - - backports.zoneinfo # [py<39] + - backports.zoneinfo # [py<39] - cffi - cloudpickle - cython @@ -384,6 +367,8 @@ outputs: # skip tests that raise SIGINT and crash the test suite {% set tests_to_skip = tests_to_skip + " or (test_csv and test_cancellation)" %} # [linux] {% set tests_to_skip = tests_to_skip + " or (test_flight and test_interrupt)" %} # [linux] + # cannot pass -D_LIBCPP_DISABLE_AVAILABILITY to test suite for our older macos sdk + {% set tests_to_skip = tests_to_skip + " or test_cpp_extension_in_python" %} # [osx] # skip tests that make invalid(-for-conda) assumptions about the compilers setup {% set tests_to_skip = tests_to_skip + " or test_cython_api" %} # [unix] {% set tests_to_skip = tests_to_skip + " or test_visit_strings" %} # [unix] @@ -391,10 +376,9 @@ outputs: {% set tests_to_skip = tests_to_skip + " or test_debug_memory_pool_disabled" %} # [aarch64 or ppc64le] {% set tests_to_skip = tests_to_skip + " or test_env_var_io_thread_count" %} # [aarch64 or ppc64le] # vvvvvvv TESTS THAT SHOULDN'T HAVE TO BE SKIPPED vvvvvvv + {% set tests_to_skip = tests_to_skip + " or test_extension_to_pandas_storage_type" %} # segfaults on OSX: to investigate ASAP {% set tests_to_skip = tests_to_skip + " or test_flight" %} # [osx] - # failing on linux with "OSError: Could not connect to socket /tmp/[...]/plasma.sock" - {% set tests_to_skip = tests_to_skip + " or test_plasma" %} # [linux] # gandiva tests are segfaulting on ppc {% set tests_to_skip = tests_to_skip + " or test_gandiva" %} # [ppc64le] # test failures on ppc @@ -405,6 +389,7 @@ outputs: {% set tests_to_skip = tests_to_skip + " or (test_memory and test_env_var)" %} # [unix] # test is broken; header is in $PREFIX, not $SP_DIR {% set tests_to_skip = tests_to_skip + " or (test_misc and test_get_include)" %} # [unix] + # ^^^^^^^ TESTS THAT SHOULDN'T HAVE TO BE SKIPPED ^^^^^^^ - pytest -v -rfEs -k "not ({{ tests_to_skip }})" about: diff --git a/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch b/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch deleted file mode 100644 index d7ea301f6..000000000 --- a/recipe/patches/0001-don-t-bake-non-relocatable-CMAKE_INSTALL_FULL_LIBDIR.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 7f1350d76bf71a8123cf78abe18ddb5876a7ca80 Mon Sep 17 00:00:00 2001 -From: "H. Vetinari" -Date: Thu, 26 Jan 2023 12:37:02 +1100 -Subject: [PATCH 1/7] don't bake non-relocatable CMAKE_INSTALL_FULL_LIBDIR into - gdb-integration - ---- - cpp/src/arrow/CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt -index 90ab1e6ac..c1f51e0da 100644 ---- a/cpp/src/arrow/CMakeLists.txt -+++ b/cpp/src/arrow/CMakeLists.txt -@@ -644,7 +644,7 @@ if(ARROW_BUILD_SHARED AND NOT WIN32) - endif() - else() - set(ARROW_GDB_AUTO_LOAD_LIBARROW_GDB_DIR -- "${ARROW_GDB_AUTO_LOAD_DIR}/${CMAKE_INSTALL_FULL_LIBDIR}") -+ "${ARROW_GDB_AUTO_LOAD_DIR}/replace_this_section_with_absolute_slashed_path_to_CONDA_PREFIX/lib/") - set(ARROW_GDB_AUTO_LOAD_LIBARROW_GDB_INSTALL TRUE) - endif() - if(ARROW_GDB_AUTO_LOAD_LIBARROW_GDB_INSTALL) --- -2.40.1.windows.1 - diff --git a/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch b/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch deleted file mode 100644 index 50ce6d75a..000000000 --- a/recipe/patches/0002-GH-33882-C-Don-t-find-.pc-files-with-ARROW_BUILD_STA.patch +++ /dev/null @@ -1,161 +0,0 @@ -From 179a3caa661b7a93f6a136ea44f6f53671611533 Mon Sep 17 00:00:00 2001 -From: Sutou Kouhei -Date: Sat, 4 Feb 2023 22:08:54 +0900 -Subject: [PATCH 2/7] GH-33882: [C++] Don't find .pc files with - ARROW_BUILD_STATIC=OFF (#34019) - -Because they are needless and `pkg-config grpc++` is slow. - -Don't find .pc files with `ARROW_BUILD_STATIC=OFF`. - -Yes. - -No. -* Closes: #33882 - -Authored-by: Sutou Kouhei -Signed-off-by: Sutou Kouhei ---- - cpp/cmake_modules/ThirdpartyToolchain.cmake | 93 +++++++++++---------- - 1 file changed, 47 insertions(+), 46 deletions(-) - -diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake -index 3eda538fb..5c800502d 100644 ---- a/cpp/cmake_modules/ThirdpartyToolchain.cmake -+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake -@@ -287,22 +287,24 @@ macro(resolve_dependency DEPENDENCY_NAME) - if(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM" AND ARG_IS_RUNTIME_DEPENDENCY) - provide_find_module(${PACKAGE_NAME} "Arrow") - list(APPEND ARROW_SYSTEM_DEPENDENCIES ${PACKAGE_NAME}) -- find_package(PkgConfig QUIET) -- foreach(ARG_PC_PACKAGE_NAME ${ARG_PC_PACKAGE_NAMES}) -- pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC -- ${ARG_PC_PACKAGE_NAME} -- NO_CMAKE_PATH -- NO_CMAKE_ENVIRONMENT_PATH -- QUIET) -- if(${${ARG_PC_PACKAGE_NAME}_PC_FOUND}) -- message(STATUS "Using pkg-config package for ${ARG_PC_PACKAGE_NAME} for static link" -- ) -- string(APPEND ARROW_PC_REQUIRES_PRIVATE " ${ARG_PC_PACKAGE_NAME}") -- else() -- message(STATUS "pkg-config package for ${ARG_PC_PACKAGE_NAME} for static link isn't found" -- ) -- endif() -- endforeach() -+ if(ARROW_BUILD_STATIC) -+ find_package(PkgConfig QUIET) -+ foreach(ARG_PC_PACKAGE_NAME ${ARG_PC_PACKAGE_NAMES}) -+ pkg_check_modules(${ARG_PC_PACKAGE_NAME}_PC -+ ${ARG_PC_PACKAGE_NAME} -+ NO_CMAKE_PATH -+ NO_CMAKE_ENVIRONMENT_PATH -+ QUIET) -+ if(${${ARG_PC_PACKAGE_NAME}_PC_FOUND}) -+ message(STATUS "Using pkg-config package for ${ARG_PC_PACKAGE_NAME} for static link" -+ ) -+ string(APPEND ARROW_PC_REQUIRES_PRIVATE " ${ARG_PC_PACKAGE_NAME}") -+ else() -+ message(STATUS "pkg-config package for ${ARG_PC_PACKAGE_NAME} for static link isn't found" -+ ) -+ endif() -+ endforeach() -+ endif() - endif() - endmacro() - -@@ -1189,21 +1191,12 @@ if(ARROW_WITH_SNAPPY) - TRUE - PC_PACKAGE_NAMES - snappy) -- if(${Snappy_SOURCE} STREQUAL "SYSTEM" AND NOT snappy_PC_FOUND) -+ if(${Snappy_SOURCE} STREQUAL "SYSTEM" -+ AND NOT snappy_PC_FOUND -+ AND ARROW_BUILD_STATIC) - get_target_property(SNAPPY_TYPE ${Snappy_TARGET} TYPE) - if(NOT SNAPPY_TYPE STREQUAL "INTERFACE_LIBRARY") -- get_target_property(SNAPPY_LIB ${Snappy_TARGET} -- IMPORTED_LOCATION_${UPPERCASE_BUILD_TYPE}) -- if(NOT SNAPPY_LIB) -- get_target_property(SNAPPY_LIB ${Snappy_TARGET} IMPORTED_LOCATION_RELEASE) -- endif() -- if(NOT SNAPPY_LIB) -- get_target_property(SNAPPY_LIB ${Snappy_TARGET} IMPORTED_LOCATION_NOCONFIG) -- endif() -- if(NOT SNAPPY_LIB) -- get_target_property(SNAPPY_LIB ${Snappy_TARGET} IMPORTED_LOCATION) -- endif() -- string(APPEND ARROW_PC_LIBS_PRIVATE " ${SNAPPY_LIB}") -+ string(APPEND ARROW_PC_LIBS_PRIVATE " $") - endif() - endif() - endif() -@@ -2504,17 +2497,10 @@ if(ARROW_WITH_RE2) - # include -std=c++11. It's not compatible with C source and C++ - # source not uses C++ 11. - resolve_dependency(re2 HAVE_ALT TRUE) -- if(${re2_SOURCE} STREQUAL "SYSTEM") -+ if(${re2_SOURCE} STREQUAL "SYSTEM" AND ARROW_BUILD_STATIC) - get_target_property(RE2_TYPE re2::re2 TYPE) - if(NOT RE2_TYPE STREQUAL "INTERFACE_LIBRARY") -- get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION_${UPPERCASE_BUILD_TYPE}) -- if(NOT RE2_LIB) -- get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION_RELEASE) -- endif() -- if(NOT RE2_LIB) -- get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION) -- endif() -- string(APPEND ARROW_PC_LIBS_PRIVATE " ${RE2_LIB}") -+ string(APPEND ARROW_PC_LIBS_PRIVATE " $") - endif() - endif() - add_definitions(-DARROW_WITH_RE2) -@@ -2580,6 +2566,19 @@ if(ARROW_WITH_BZ2) - PROPERTIES IMPORTED_LOCATION "${BZIP2_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}") - endif() -+ -+ if(${BZip2_SOURCE} STREQUAL "SYSTEM" -+ AND NOT bzip2_PC_FOUND -+ AND ARROW_BUILD_STATIC) -+ get_target_property(BZIP2_TYPE BZip2::BZip2 TYPE) -+ if(BZIP2_TYPE STREQUAL "INTERFACE_LIBRARY") -+ # Conan -+ string(APPEND ARROW_PC_LIBS_PRIVATE -+ " $>") -+ else() -+ string(APPEND ARROW_PC_LIBS_PRIVATE " $") -+ endif() -+ endif() - endif() - - macro(build_utf8proc) -@@ -4765,15 +4764,17 @@ if(ARROW_S3) - message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}") - message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}") - -- if(${AWSSDK_SOURCE} STREQUAL "SYSTEM") -- foreach(AWSSDK_LINK_LIBRARY ${AWSSDK_LINK_LIBRARIES}) -- string(APPEND ARROW_PC_LIBS_PRIVATE " $") -- endforeach() -- endif() -- if(UNIX) -- string(APPEND ARROW_PC_REQUIRES_PRIVATE " libcurl") -+ if(ARROW_BUILD_STATIC) -+ if(${AWSSDK_SOURCE} STREQUAL "SYSTEM") -+ foreach(AWSSDK_LINK_LIBRARY ${AWSSDK_LINK_LIBRARIES}) -+ string(APPEND ARROW_PC_LIBS_PRIVATE " $") -+ endforeach() -+ endif() -+ if(UNIX) -+ string(APPEND ARROW_PC_REQUIRES_PRIVATE " libcurl") -+ endif() -+ string(APPEND ARROW_PC_REQUIRES_PRIVATE " openssl") - endif() -- string(APPEND ARROW_PC_REQUIRES_PRIVATE " openssl") - - if(APPLE) - # CoreFoundation's path is hardcoded in the CMake files provided by --- -2.40.1.windows.1 - diff --git a/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch b/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch deleted file mode 100644 index c76357700..000000000 --- a/recipe/patches/0003-GH-34404-Python-Failing-tests-because-pandas.Index-c.patch +++ /dev/null @@ -1,130 +0,0 @@ -From da0b59a5d55032fb94618d776578b156a244ab2b Mon Sep 17 00:00:00 2001 -From: Alenka Frim -Date: Fri, 10 Mar 2023 17:36:41 +0100 -Subject: [PATCH 3/7] GH-34404: [Python] Failing tests because pandas.Index can - now store all numeric dtypes (not only 64bit versions) (#34498) - -### Rationale for this change -Several failing tests in the nightly build (https://github.com/ursacomputing/crossbow/actions/runs/4277727973/jobs/7446784501) - -### What changes are included in this PR? -Due to change in supported dtypes for Index in pandas, the tests expecting `int64`and not `int32` are failing with dev version of pandas. The failing tests are updated to match the new pandas behaviour. -* Closes: #34404 - -Authored-by: Alenka Frim -Signed-off-by: Joris Van den Bossche ---- - python/pyarrow/tests/parquet/test_dataset.py | 11 +++++- - python/pyarrow/tests/test_compute.py | 40 +++++++++++++------- - python/pyarrow/tests/test_pandas.py | 4 +- - 3 files changed, 38 insertions(+), 17 deletions(-) - -diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py -index 1bfde4e17..fd24f1642 100644 ---- a/python/pyarrow/tests/parquet/test_dataset.py -+++ b/python/pyarrow/tests/parquet/test_dataset.py -@@ -735,8 +735,15 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True): - .reset_index(drop=True) - .reindex(columns=result_df.columns)) - -- expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys) -- expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys) -+ if use_legacy_dataset or Version(pd.__version__) < Version("2.0.0"): -+ expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys) -+ expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys) -+ else: -+ # With pandas 2.0.0 Index can store all numeric dtypes (not just -+ # int64/uint64/float64). Using astype() to create a categorical -+ # column preserves original dtype (int32) -+ expected_df['foo'] = expected_df['foo'].astype("category") -+ expected_df['bar'] = expected_df['bar'].astype("category") - - assert (result_df.columns == ['index', 'values', 'foo', 'bar']).all() - -diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py -index 996509999..81c0a4e8b 100644 ---- a/python/pyarrow/tests/test_compute.py -+++ b/python/pyarrow/tests/test_compute.py -@@ -1916,22 +1916,36 @@ def _check_datetime_components(timestamps, timezone=None): - [iso_year, iso_week, iso_day], - fields=iso_calendar_fields) - -- assert pc.year(tsa).equals(pa.array(ts.dt.year)) -+ # Casting is required because pandas with 2.0.0 various numeric -+ # date/time attributes have dtype int32 (previously int64) -+ year = ts.dt.year.astype("int64") -+ month = ts.dt.month.astype("int64") -+ day = ts.dt.day.astype("int64") -+ dayofweek = ts.dt.dayofweek.astype("int64") -+ dayofyear = ts.dt.dayofyear.astype("int64") -+ quarter = ts.dt.quarter.astype("int64") -+ hour = ts.dt.hour.astype("int64") -+ minute = ts.dt.minute.astype("int64") -+ second = ts.dt.second.values.astype("int64") -+ microsecond = ts.dt.microsecond.astype("int64") -+ nanosecond = ts.dt.nanosecond.astype("int64") -+ -+ assert pc.year(tsa).equals(pa.array(year)) - assert pc.is_leap_year(tsa).equals(pa.array(ts.dt.is_leap_year)) -- assert pc.month(tsa).equals(pa.array(ts.dt.month)) -- assert pc.day(tsa).equals(pa.array(ts.dt.day)) -- assert pc.day_of_week(tsa).equals(pa.array(ts.dt.dayofweek)) -- assert pc.day_of_year(tsa).equals(pa.array(ts.dt.dayofyear)) -+ assert pc.month(tsa).equals(pa.array(month)) -+ assert pc.day(tsa).equals(pa.array(day)) -+ assert pc.day_of_week(tsa).equals(pa.array(dayofweek)) -+ assert pc.day_of_year(tsa).equals(pa.array(dayofyear)) - assert pc.iso_year(tsa).equals(pa.array(iso_year)) - assert pc.iso_week(tsa).equals(pa.array(iso_week)) - assert pc.iso_calendar(tsa).equals(iso_calendar) -- assert pc.quarter(tsa).equals(pa.array(ts.dt.quarter)) -- assert pc.hour(tsa).equals(pa.array(ts.dt.hour)) -- assert pc.minute(tsa).equals(pa.array(ts.dt.minute)) -- assert pc.second(tsa).equals(pa.array(ts.dt.second.values)) -- assert pc.millisecond(tsa).equals(pa.array(ts.dt.microsecond // 10 ** 3)) -- assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10 ** 3)) -- assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond)) -+ assert pc.quarter(tsa).equals(pa.array(quarter)) -+ assert pc.hour(tsa).equals(pa.array(hour)) -+ assert pc.minute(tsa).equals(pa.array(minute)) -+ assert pc.second(tsa).equals(pa.array(second)) -+ assert pc.millisecond(tsa).equals(pa.array(microsecond // 10 ** 3)) -+ assert pc.microsecond(tsa).equals(pa.array(microsecond % 10 ** 3)) -+ assert pc.nanosecond(tsa).equals(pa.array(nanosecond)) - assert pc.subsecond(tsa).equals(pa.array(subseconds)) - - if ts.dt.tz: -@@ -1945,7 +1959,7 @@ def _check_datetime_components(timestamps, timezone=None): - day_of_week_options = pc.DayOfWeekOptions( - count_from_zero=False, week_start=1) - assert pc.day_of_week(tsa, options=day_of_week_options).equals( -- pa.array(ts.dt.dayofweek + 1)) -+ pa.array(dayofweek + 1)) - - week_options = pc.WeekOptions( - week_starts_monday=True, count_from_zero=False, -diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py -index 4d0ddf875..ddec0f0de 100644 ---- a/python/pyarrow/tests/test_pandas.py -+++ b/python/pyarrow/tests/test_pandas.py -@@ -3250,7 +3250,7 @@ def test_table_from_pandas_schema_index_columns(): - schema = pa.schema([ - ('a', pa.int64()), - ('b', pa.float64()), -- ('index', pa.int32()), -+ ('index', pa.int64()), - ]) - - # schema includes index with name not in dataframe -@@ -3283,7 +3283,7 @@ def test_table_from_pandas_schema_index_columns(): - - # schema has different order (index column not at the end) - schema = pa.schema([ -- ('index', pa.int32()), -+ ('index', pa.int64()), - ('a', pa.int64()), - ('b', pa.float64()), - ]) --- -2.40.1.windows.1 - diff --git a/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch b/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch deleted file mode 100644 index 9c037cd91..000000000 --- a/recipe/patches/0004-GH-15070-Python-CI-Compatibility-with-pandas-2.0-348.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 041263a350d8c7e00c665480370e41c2031df1b5 Mon Sep 17 00:00:00 2001 -From: Joris Van den Bossche -Date: Tue, 4 Apr 2023 16:43:26 +0200 -Subject: [PATCH 4/7] GH-15070: [Python][CI] Compatibility with pandas 2.0 - (#34878) - -### What changes are included in this PR? - -- The issue with numpy 1.25 in the assert equal helper was fixed in pandas 1.5.3 -> removing the skip (in theory can still run into this error when using an older pandas version with the latest numpy, but that's not something you should do) -- Casting tz-aware strings to datetime64[ns] was not fixed in pandas (https://github.com/pandas-dev/pandas/issues/50140) -> updating our implementation to work around it -- Casting to numpy string dtype (https://github.com/pandas-dev/pandas/issues/50127) is not yet fixed -> updating the skip - -### Are there any user-facing changes? - -No -* Closes: #15070 - -Authored-by: Joris Van den Bossche -Signed-off-by: Joris Van den Bossche ---- - python/pyarrow/pandas_compat.py | 3 +-- - python/pyarrow/tests/test_pandas.py | 34 ++++++----------------------- - 2 files changed, 8 insertions(+), 29 deletions(-) - -diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py -index d624459ca..b0ab28bf1 100644 ---- a/python/pyarrow/pandas_compat.py -+++ b/python/pyarrow/pandas_compat.py -@@ -1148,8 +1148,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes): - if pandas_dtype == "datetimetz": - tz = pa.lib.string_to_tzinfo( - column_indexes[0]['metadata']['timezone']) -- dt = level.astype(numpy_dtype) -- level = dt.tz_localize('utc').tz_convert(tz) -+ level = pd.to_datetime(level, utc=True).tz_convert(tz) - elif level.dtype != dtype: - level = level.astype(dtype) - # ARROW-9096: if original DataFrame was upcast we keep that -diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py -index ddec0f0de..e8ae7f1f5 100644 ---- a/python/pyarrow/tests/test_pandas.py -+++ b/python/pyarrow/tests/test_pandas.py -@@ -187,17 +187,12 @@ class TestConvertMetadata: - _check_pandas_roundtrip(df, preserve_index=True) - - def test_column_index_names_with_tz(self): -- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"): -- # TODO: regression in pandas, should be fixed before final 2.0.0 -- # https://github.com/pandas-dev/pandas/issues/50140 -- pytest.skip("Regression in pandas 2.0.0.dev") - # ARROW-13756 - # Bug if index is timezone aware DataTimeIndex - - df = pd.DataFrame( - np.random.randn(5, 3), -- columns=pd.date_range( -- "2021-01-01", "2021-01-3", freq="D", tz="CET") -+ columns=pd.date_range("2021-01-01", periods=3, freq="50D", tz="CET") - ) - _check_pandas_roundtrip(df, preserve_index=True) - -@@ -453,11 +448,11 @@ class TestConvertMetadata: - preserve_index=True) - - def test_binary_column_name(self): -- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"): -- # TODO: regression in pandas, should be fixed before final 2.0.0 -+ if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): -+ # TODO: regression in pandas, hopefully fixed in next version - # https://issues.apache.org/jira/browse/ARROW-18394 - # https://github.com/pandas-dev/pandas/issues/50127 -- pytest.skip("Regression in pandas 2.0.0.dev") -+ pytest.skip("Regression in pandas 2.0.0") - column_data = ['い'] - key = 'あ'.encode() - data = {key: column_data} -@@ -2064,11 +2059,6 @@ class TestConvertListTypes: - assert result3.equals(expected3) - - def test_infer_lists(self): -- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and -- (Version(pd.__version__) < Version("2.0.0"))): -- # TODO: regression in pandas with numpy 1.25dev -- # https://github.com/pandas-dev/pandas/issues/50360 -- pytest.skip("Regression in pandas with numpy 1.25") - data = OrderedDict([ - ('nan_ints', [[None, 1], [2, 3]]), - ('ints', [[0, 1], [2, 3]]), -@@ -2118,11 +2108,6 @@ class TestConvertListTypes: - _check_pandas_roundtrip(df, expected_schema=expected_schema) - - def test_to_list_of_structs_pandas(self): -- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and -- (Version(pd.__version__) < Version("2.0.0"))): -- # TODO: regression in pandas with numpy 1.25dev -- # https://github.com/pandas-dev/pandas/issues/50360 -- pytest.skip("Regression in pandas with numpy 1.25") - ints = pa.array([1, 2, 3], pa.int32()) - strings = pa.array([['a', 'b'], ['c', 'd'], ['e', 'f']], - pa.list_(pa.string())) -@@ -2192,11 +2177,6 @@ class TestConvertListTypes: - assert result.equals(expected) - - def test_nested_large_list(self): -- if ((Version(np.__version__) >= Version("1.25.0.dev0")) and -- (Version(pd.__version__) < Version("2.0.0"))): -- # TODO: regression in pandas with numpy 1.25dev -- # https://github.com/pandas-dev/pandas/issues/50360 -- pytest.skip("Regression in pandas with numpy 1.25") - s = (pa.array([[[1, 2, 3], [4]], None], - type=pa.large_list(pa.large_list(pa.int64()))) - .to_pandas()) -@@ -2950,11 +2930,11 @@ def _fully_loaded_dataframe_example(): - - @pytest.mark.parametrize('columns', ([b'foo'], ['foo'])) - def test_roundtrip_with_bytes_unicode(columns): -- if Version("2.0.0.dev0") <= Version(pd.__version__) < Version("2.0.0"): -- # TODO: regression in pandas, should be fixed before final 2.0.0 -+ if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): -+ # TODO: regression in pandas, hopefully fixed in next version - # https://issues.apache.org/jira/browse/ARROW-18394 - # https://github.com/pandas-dev/pandas/issues/50127 -- pytest.skip("Regression in pandas 2.0.0.dev") -+ pytest.skip("Regression in pandas 2.0.0") - - df = pd.DataFrame(columns=columns) - table1 = pa.Table.from_pandas(df) --- -2.40.1.windows.1 - diff --git a/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch b/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch deleted file mode 100644 index 10ed6fd00..000000000 --- a/recipe/patches/0005-GH-34880-Python-CI-Fix-Windows-tests-failing-with-la.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 7cef42a8ce057fba2008042de9c95fc8c2b15f90 Mon Sep 17 00:00:00 2001 -From: Joris Van den Bossche -Date: Tue, 4 Apr 2023 20:38:25 +0200 -Subject: [PATCH 5/7] GH-34880: [Python][CI] Fix Windows tests failing with - latest pandas 2.0 (#34881) - -* Closes: #34880 - -Authored-by: Joris Van den Bossche -Signed-off-by: Jacob Wujciak-Jens ---- - ci/appveyor-cpp-setup.bat | 3 ++- - python/pyarrow/tests/parquet/test_pandas.py | 4 +++- - python/pyarrow/tests/test_pandas.py | 3 ++- - 3 files changed, 7 insertions(+), 3 deletions(-) - -diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat -index 64f930a16..73def17e5 100644 ---- a/ci/appveyor-cpp-setup.bat -+++ b/ci/appveyor-cpp-setup.bat -@@ -64,7 +64,7 @@ if "%ARROW_BUILD_GANDIVA%" == "ON" ( - @rem Install pre-built "toolchain" packages for faster builds - set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt - @rem Arrow conda environment --mamba create -n arrow -q -y -c conda-forge ^ -+mamba create -n arrow -y -c conda-forge ^ - --file=ci\conda_env_python.txt ^ - %CONDA_PACKAGES% ^ - "ccache" ^ -@@ -75,6 +75,7 @@ mamba create -n arrow -q -y -c conda-forge ^ - "fsspec" ^ - "python=%PYTHON%" ^ - || exit /B -+conda list -n arrow - - @rem - @rem Configure compiler -diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py -index b6250e8fc..c27b16de7 100644 ---- a/python/pyarrow/tests/parquet/test_pandas.py -+++ b/python/pyarrow/tests/parquet/test_pandas.py -@@ -643,7 +643,9 @@ def test_dataset_read_pandas_common_metadata( - paths = [] - for i in range(nfiles): - df = _test_dataframe(size, seed=i) -- df.index = pd.Index(np.arange(i * size, (i + 1) * size), name='index') -+ df.index = pd.Index( -+ np.arange(i * size, (i + 1) * size, dtype="int64"), name='index' -+ ) - - path = dirpath / '{}.parquet'.format(i) - -diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py -index e8ae7f1f5..45a462a7b 100644 ---- a/python/pyarrow/tests/test_pandas.py -+++ b/python/pyarrow/tests/test_pandas.py -@@ -2570,7 +2570,8 @@ class TestZeroCopyConversion: - def test_zero_copy_dictionaries(self): - arr = pa.DictionaryArray.from_arrays( - np.array([0, 0]), -- np.array([5])) -+ np.array([5], dtype="int64"), -+ ) - - result = arr.to_pandas(zero_copy_only=True) - values = pd.Categorical([5, 5]) --- -2.40.1.windows.1 - diff --git a/recipe/patches/0006-GH-15070-Python-CI-Update-pandas-test-for-empty-colu.patch b/recipe/patches/0006-GH-15070-Python-CI-Update-pandas-test-for-empty-colu.patch deleted file mode 100644 index 11828f7f2..000000000 --- a/recipe/patches/0006-GH-15070-Python-CI-Update-pandas-test-for-empty-colu.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 2184cfcf8a3e7ae6d5bd6ded01c9879ea3d8aa8e Mon Sep 17 00:00:00 2001 -From: Joris Van den Bossche -Date: Wed, 12 Apr 2023 10:32:03 +0200 -Subject: [PATCH 6/7] GH-15070: [Python][CI] Update pandas test for empty - columns dtype change in pandas 2.0.1 (#35031) - -### Rationale for this change - -Pandas changed the default dtype of the columns object for an empty DataFrame from object dtype to integer RangeIndex (see https://github.com/pandas-dev/pandas/issues/52404). This updates our tests to pass with that change. - -* Closes: #15070 - -Authored-by: Joris Van den Bossche -Signed-off-by: Sutou Kouhei ---- - python/pyarrow/tests/test_pandas.py | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py -index 45a462a7b..b472db614 100644 ---- a/python/pyarrow/tests/test_pandas.py -+++ b/python/pyarrow/tests/test_pandas.py -@@ -2829,8 +2829,12 @@ class TestConvertMisc: - - def test_table_batch_empty_dataframe(self): - df = pd.DataFrame({}) -- _check_pandas_roundtrip(df) -- _check_pandas_roundtrip(df, as_batch=True) -+ _check_pandas_roundtrip(df, preserve_index=None) -+ _check_pandas_roundtrip(df, preserve_index=None, as_batch=True) -+ -+ expected = pd.DataFrame(columns=pd.Index([])) -+ _check_pandas_roundtrip(df, expected, preserve_index=False) -+ _check_pandas_roundtrip(df, expected, preserve_index=False, as_batch=True) - - df2 = pd.DataFrame({}, index=[0, 1, 2]) - _check_pandas_roundtrip(df2, preserve_index=True) --- -2.40.1.windows.1 - diff --git a/recipe/patches/0007-GH-34743-Python-Relax-condition-in-flaky-Flight-test.patch b/recipe/patches/0007-GH-34743-Python-Relax-condition-in-flaky-Flight-test.patch deleted file mode 100644 index d42c1182f..000000000 --- a/recipe/patches/0007-GH-34743-Python-Relax-condition-in-flaky-Flight-test.patch +++ /dev/null @@ -1,43 +0,0 @@ -From cff944edcee6ef9353ab1c8acee193556cc40572 Mon Sep 17 00:00:00 2001 -From: David Li -Date: Fri, 31 Mar 2023 11:57:35 -0400 -Subject: [PATCH 7/7] GH-34743: [Python] Relax condition in flaky Flight test - (#34747) - -### Rationale for this change -This test is consistently flaky on AMD64 macOS. - -### What changes are included in this PR? - -Relax the test condition a bit. -### Are these changes tested? - -This is a change to a test. - -### Are there any user-facing changes? - -No. -* Closes: #34743 - -Authored-by: David Li -Signed-off-by: Will Jones ---- - python/pyarrow/tests/test_flight.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py -index 28ace4f93..f6c0ab978 100644 ---- a/python/pyarrow/tests/test_flight.py -+++ b/python/pyarrow/tests/test_flight.py -@@ -1972,7 +1972,7 @@ def test_generic_options(): - client = flight.connect(('localhost', s.port), - tls_root_certs=certs["root_cert"], - generic_options=options) -- with pytest.raises(pa.ArrowInvalid): -+ with pytest.raises((pa.ArrowInvalid, flight.FlightCancelledError)): - client.do_get(flight.Ticket(b'ints')) - client.close() - --- -2.40.1.windows.1 -