From f9a6edac9f175de3ad993887470dd1dff4f151c1 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 13 Dec 2024 16:36:02 +0100 Subject: [PATCH] GH-45006: [CI][Python] Fix test_memory failures (#45007) `test_memory.py` has started failing on some builds after https://github.com/apache/arrow/pull/44951 was merged * GitHub Issue: #45006 Lead-authored-by: Antoine Pitrou Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- ci/docker/conda-cpp.dockerfile | 3 +- ci/docker/debian-12-cpp.dockerfile | 1 + ci/docker/fedora-39-cpp.dockerfile | 1 + ci/docker/ubuntu-20.04-cpp.dockerfile | 1 + ci/docker/ubuntu-22.04-cpp.dockerfile | 1 + ci/docker/ubuntu-24.04-cpp.dockerfile | 1 + ci/scripts/cpp_build.sh | 4 +-- docker-compose.yml | 2 ++ python/pyarrow/tests/test_memory.py | 44 ++++++++++++--------------- 9 files changed, 30 insertions(+), 28 deletions(-) diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index f0084894e19dc..6d4be52baec05 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -48,7 +48,7 @@ ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default -# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to # be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH @@ -68,6 +68,7 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=$CONDA_PREFIX \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index 354e7829cc41f..f486d07ff8894 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -124,6 +124,7 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=/usr/local \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile index 52e879aba4ebf..6c5edd444e253 100644 --- a/ci/docker/fedora-39-cpp.dockerfile +++ b/ci/docker/fedora-39-cpp.dockerfile @@ -87,6 +87,7 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=/usr/local \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index ec8c9840cf0a7..8dc778d544a6d 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -161,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 78a44b0119e6c..2e4d658bf9549 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -205,6 +205,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 8cb7f9d5f614e..53113bccfe4fa 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -190,6 +190,7 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index e70f5da85ae2e..c1e7adf6a05e0 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -171,10 +171,10 @@ else -DARROW_GCS=${ARROW_GCS:-OFF} \ -DARROW_HDFS=${ARROW_HDFS:-ON} \ -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-OFF} \ -DARROW_JSON=${ARROW_JSON:-ON} \ -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \ -DARROW_ORC=${ARROW_ORC:-OFF} \ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ diff --git a/docker-compose.yml b/docker-compose.yml index 4911a30752a10..7aabbb43b491a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -639,6 +639,7 @@ services: ARROW_FLIGHT_SQL: "OFF" ARROW_FUZZING: "ON" # Check fuzz regressions ARROW_JEMALLOC: "OFF" + ARROW_MIMALLOC: "OFF" ARROW_ORC: "OFF" ARROW_S3: "OFF" ARROW_USE_ASAN: "ON" @@ -677,6 +678,7 @@ services: ARROW_FLIGHT: "OFF" ARROW_FLIGHT_SQL: "OFF" ARROW_JEMALLOC: "OFF" + ARROW_MIMALLOC: "OFF" ARROW_ORC: "OFF" ARROW_USE_TSAN: "ON" command: *cpp-command diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py index b1eef176665af..6ed999db42cee 100644 --- a/python/pyarrow/tests/test_memory.py +++ b/python/pyarrow/tests/test_memory.py @@ -17,7 +17,6 @@ import contextlib import os -import platform import signal import subprocess import sys @@ -30,15 +29,19 @@ pytestmark = pytest.mark.processes possible_backends = ["system", "jemalloc", "mimalloc"] +# Backends which are expected to be present in all builds of PyArrow, +# except if the user manually recompiled Arrow C++. +mandatory_backends = ["system", "mimalloc"] -should_have_jemalloc = (sys.platform == "linux" and platform.machine() == 'x86_64') -should_have_mimalloc = sys.platform == "win32" + +def backend_factory(backend_name): + return getattr(pa, f"{backend_name}_memory_pool") def supported_factories(): yield pa.default_memory_pool - for backend in pa.supported_memory_backends(): - yield getattr(pa, f"{backend}_memory_pool") + for backend_name in pa.supported_memory_backends(): + yield backend_factory(backend_name) @contextlib.contextmanager @@ -149,17 +152,12 @@ def check_env_var(name, expected, *, expect_warning=False): def test_env_var(): - check_env_var("system", ["system"]) - if should_have_jemalloc: - check_env_var("jemalloc", ["jemalloc"]) - if should_have_mimalloc: - check_env_var("mimalloc", ["mimalloc"]) + for backend_name in mandatory_backends: + check_env_var(backend_name, [backend_name]) check_env_var("nonexistent", possible_backends, expect_warning=True) -def test_specific_memory_pools(): - specific_pools = set() - +def test_memory_pool_factories(): def check(factory, name, *, can_fail=False): if can_fail: try: @@ -169,23 +167,16 @@ def check(factory, name, *, can_fail=False): else: pool = factory() assert pool.backend_name == name - specific_pools.add(pool) - check(pa.system_memory_pool, "system") - check(pa.jemalloc_memory_pool, "jemalloc", - can_fail=not should_have_jemalloc) - check(pa.mimalloc_memory_pool, "mimalloc", - can_fail=not should_have_mimalloc) + for backend_name in possible_backends: + check(backend_factory(backend_name), backend_name, + can_fail=backend_name not in mandatory_backends) def test_supported_memory_backends(): backends = pa.supported_memory_backends() - - assert "system" in backends - if should_have_jemalloc: - assert "jemalloc" in backends - if should_have_mimalloc: - assert "mimalloc" in backends + assert set(backends) >= set(mandatory_backends) + assert set(backends) <= set(possible_backends) def run_debug_memory_pool(pool_factory, env_value): @@ -246,6 +237,9 @@ def test_debug_memory_pool_warn(pool_factory): def check_debug_memory_pool_disabled(pool_factory, env_value, msg): + if sys.maxsize < 2**32: + # GH-45011: mimalloc may print warnings in this test on 32-bit Linux, ignore. + pytest.skip("Test may fail on 32-bit platforms") res = run_debug_memory_pool(pool_factory.__name__, env_value) # The subprocess either returned successfully or was killed by a signal # (due to writing out of bounds), depending on the underlying allocator.