From 36fe1daaca6322224ce81dca6a671d434e0106de Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 16 Jul 2024 14:06:47 +0200
Subject: [PATCH] GH-43254: [C++] Always prefer mimalloc to jemalloc (#40875)

### Rationale for this change

As discussed [on the mailing-list](https://lists.apache.org/thread/dts9ggvkthczfpmd25wrz449mxod76o2), this PR switches the default memory pool to mimalloc for all platforms. This should have several desirable effects:

* less variability between platforms
* mimalloc generally has a nicer, more consistent API and is easier to work with (in particular, jemalloc's configuration scheme is slightly abtruse)
* potentially better performance, or at least not significantly worse, than the statu quo

### Are these changes tested?

Yes, by existing CI configurations.

### Are there any user-facing changes?

Behavior should not change. Performance characteristics of some user workloads might improve or regress, but this is something we cannot predict in advance.

* GitHub Issue: #43254

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/memory_pool.cc              | 18 ++++++++----------
 dev/archery/archery/benchmark/runner.py   |  2 ++
 dev/tasks/linux-packages/github.linux.yml |  2 +-
 docs/source/cpp/memory.rst                |  6 +++---
 docs/source/python/memory.rst             |  8 ++++----
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 2f8ce3a6fa8c7..1e855311a98ed 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -85,19 +85,17 @@ struct SupportedBackend {
 
 const std::vector<SupportedBackend>& SupportedBackends() {
   static std::vector<SupportedBackend> backends = {
-  // ARROW-12316: Apple => mimalloc first, then jemalloc
-  //              non-Apple => jemalloc first, then mimalloc
-#if defined(ARROW_JEMALLOC) && !defined(__APPLE__)
-    {"jemalloc", MemoryPoolBackend::Jemalloc},
-#endif
+  // mimalloc is our preferred allocator for several reasons:
+  // 1) it has good performance
+  // 2) it is well-supported on all our main platforms (Linux, macOS, Windows)
+  // 3) it is easy to configure and has a consistent API.
 #ifdef ARROW_MIMALLOC
-    {"mimalloc", MemoryPoolBackend::Mimalloc},
+      {"mimalloc", MemoryPoolBackend::Mimalloc},
 #endif
-#if defined(ARROW_JEMALLOC) && defined(__APPLE__)
-    {"jemalloc", MemoryPoolBackend::Jemalloc},
+#ifdef ARROW_JEMALLOC
+      {"jemalloc", MemoryPoolBackend::Jemalloc},
 #endif
-    {"system", MemoryPoolBackend::System}
-  };
+      {"system", MemoryPoolBackend::System}};
   return backends;
 }
 
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index a91989fb95257..9ebb9226e3743 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -123,6 +123,8 @@ def default_configuration(**kwargs):
             with_csv=True,
             with_dataset=True,
             with_json=True,
+            with_jemalloc=True,
+            with_mimalloc=True,
             with_parquet=True,
             with_python=False,
             with_brotli=True,
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 9e24835b8b627..891682c4358d8 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -64,7 +64,7 @@ jobs:
         run: |
           set -e
           pushd arrow/dev/tasks/linux-packages
-          rake version:update
+          rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)"
           rake docker:pull || :
           rake --trace {{ task_namespace }}:build BUILD_DIR=build
           popd
diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst
index 33907b5580f61..032b7d1ac90f1 100644
--- a/docs/source/cpp/memory.rst
+++ b/docs/source/cpp/memory.rst
@@ -139,9 +139,9 @@ Default Memory Pool
 
 The default memory pool depends on how Arrow C++ was compiled:
 
-- if enabled at compile time, a `jemalloc <http://jemalloc.net/>`_ heap;
-- otherwise, if enabled at compile time, a
-  `mimalloc <https://github.com/microsoft/mimalloc>`_ heap;
+- if enabled at compile time, a `mimalloc <https://github.com/microsoft/mimalloc>`_
+  heap;
+- otherwise, if enabled at compile time, a `jemalloc <http://jemalloc.net/>`_ heap;
 - otherwise, the C library ``malloc`` heap.
 
 Overriding the Default Memory Pool
diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst
index 7b49d48ab20fa..029d30cc1b693 100644
--- a/docs/source/python/memory.rst
+++ b/docs/source/python/memory.rst
@@ -110,12 +110,12 @@ the buffer is garbage-collected, all of the memory is freed:
    pa.total_allocated_bytes()
 
 Besides the default built-in memory pool, there may be additional memory pools
-to choose (such as `mimalloc <https://github.com/microsoft/mimalloc>`_)
-from depending on how Arrow was built.  One can get the backend
-name for a memory pool::
+to choose from (such as `jemalloc <http://jemalloc.net/>`_)
+depending on how Arrow was built.  One can get the backend name for a memory
+pool::
 
    >>> pa.default_memory_pool().backend_name
-   'jemalloc'
+   'mimalloc'
 
 .. seealso::
    :ref:`API documentation for memory pools <api.memory_pool>`.