From 2876cefe35f2ebaf9545424928bc1a69feb12af2 Mon Sep 17 00:00:00 2001 From: Tobias Zagorni Date: Wed, 23 Mar 2022 20:16:07 +0100 Subject: [PATCH 1/4] fix signed overflow in CopyStrideNatural --- cpp/src/arrow/python/numpy_to_arrow.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index a180e3a675ad0..7e0518b73a5d2 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -404,8 +404,9 @@ class NumPyStridedConverter { ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(sizeof(T) * length_, pool_)); const int64_t stride = PyArray_STRIDES(arr)[0]; - if (stride % sizeof(T) == 0) { - const int64_t stride_elements = stride / sizeof(T); + const int64_t element_size = sizeof(T); + if (stride % element_size == 0) { + const int64_t stride_elements = stride / element_size; CopyStridedNatural(reinterpret_cast(PyArray_DATA(arr)), length_, stride_elements, reinterpret_cast(buffer_->mutable_data())); } else { From ffafd8cb0205b4746354a00731cb4936d9a51cc0 Mon Sep 17 00:00:00 2001 From: Tobias Zagorni Date: Thu, 24 Mar 2022 14:42:56 +0100 Subject: [PATCH 2/4] add comment about ARROW-16013 --- cpp/src/arrow/python/numpy_to_arrow.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index 7e0518b73a5d2..272ea83a4fe76 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -404,6 +404,10 @@ class NumPyStridedConverter { ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(sizeof(T) * length_, pool_)); const int64_t stride = PyArray_STRIDES(arr)[0]; + // ARROW-16013: covert sizeof(T) to signed int64 first, otherwise dividing by it would + // do an unsigned division. This cannot be caught by tests without ubsan, since common + // signed overflow behavoir and the fact that the sizeof(T) is currently always a + // power of two here cause CopyStridedNatural to still produce correct results const int64_t element_size = sizeof(T); if (stride % element_size == 0) { const int64_t stride_elements = stride / element_size; From 3c1591daded9708191f6178375b81b9703aa37b2 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 28 Mar 2022 15:36:39 +0200 Subject: [PATCH 3/4] Update cpp/src/arrow/python/numpy_to_arrow.cc --- cpp/src/arrow/python/numpy_to_arrow.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index 272ea83a4fe76..7f6daba09da56 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -404,9 +404,9 @@ class NumPyStridedConverter { ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(sizeof(T) * length_, pool_)); const int64_t stride = PyArray_STRIDES(arr)[0]; - // ARROW-16013: covert sizeof(T) to signed int64 first, otherwise dividing by it would + // ARROW-16013: convert sizeof(T) to signed int64 first, otherwise dividing by it would // do an unsigned division. This cannot be caught by tests without ubsan, since common - // signed overflow behavoir and the fact that the sizeof(T) is currently always a + // signed overflow behavior and the fact that the sizeof(T) is currently always a // power of two here cause CopyStridedNatural to still produce correct results const int64_t element_size = sizeof(T); if (stride % element_size == 0) { From 2c2e8b9b4d931cfa317f312ba7b6037747f7aab0 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 28 Mar 2022 17:38:51 +0200 Subject: [PATCH 4/4] Fix lint --- cpp/src/arrow/python/numpy_to_arrow.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index 7f6daba09da56..2727ce32f4494 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -404,10 +404,10 @@ class NumPyStridedConverter { ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(sizeof(T) * length_, pool_)); const int64_t stride = PyArray_STRIDES(arr)[0]; - // ARROW-16013: convert sizeof(T) to signed int64 first, otherwise dividing by it would - // do an unsigned division. This cannot be caught by tests without ubsan, since common - // signed overflow behavior and the fact that the sizeof(T) is currently always a - // power of two here cause CopyStridedNatural to still produce correct results + // ARROW-16013: convert sizeof(T) to signed int64 first, otherwise dividing by it + // would do an unsigned division. This cannot be caught by tests without ubsan, since + // common signed overflow behavior and the fact that the sizeof(T) is currently always + // a power of two here cause CopyStridedNatural to still produce correct results const int64_t element_size = sizeof(T); if (stride % element_size == 0) { const int64_t stride_elements = stride / element_size;