From 7270da4cf840f0d401891690e97e29855f7c37de Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 5 Jan 2025 15:14:02 -0600 Subject: [PATCH 1/9] update --- dev/benchmarks/c/array_view_benchmark.cc | 38 ++++++++--------------- dev/benchmarks/c/benchmark_util.hpp | 39 ++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 25 deletions(-) create mode 100644 dev/benchmarks/c/benchmark_util.hpp diff --git a/dev/benchmarks/c/array_view_benchmark.cc b/dev/benchmarks/c/array_view_benchmark.cc index dbd849d..ae00396 100644 --- a/dev/benchmarks/c/array_view_benchmark.cc +++ b/dev/benchmarks/c/array_view_benchmark.cc @@ -15,19 +15,14 @@ // specific language governing permissions and limitations // under the License. -#include #include -#include #include #include "geoarrow.h" #include "benchmark_lib.h" - -// The length of most arrays used in these benchmarks. Just big enough so -// that the benchmark takes a non-trivial amount of time to run. -static const int64_t kNumItemsPrettyBig = 100000000; +#include "benchmark_util.hpp" enum CoordAccessStrategy { GENERIC, OPTIMIZED, LOOP_THEN_IF }; @@ -39,8 +34,8 @@ static void CoordViewLoop(benchmark::State& state) { struct GeoArrowArrayView view; GeoArrowArrayViewInitFromType(&view, type); - // Generate a circle with n_coords points - int64_t n_coords = kNumItemsPrettyBig; + // Memory for circle with n points + uint32_t n_coords = geoarrow::benchmark_util::kNumCoordsPrettyBig; int n_dims = view.coords.n_values; std::vector coords(n_coords * n_dims); double angle_inc_radians = M_PI / 100; @@ -51,26 +46,19 @@ static void CoordViewLoop(benchmark::State& state) { for (int i = 0; i < n_dims; i++) { view.coords.values[i] = coords.data() + (i * n_coords); } - - for (int64_t i = 0; i < n_coords; i++) { - coords[i] = cos(angle) * radius; - coords[n_coords + i] = sin(angle) * radius; - angle += angle_inc_radians; - } } else { for (int i = 0; i < n_dims; i++) { view.coords.values[i] = coords.data() + i; } - - for (int64_t i = 0; i < n_coords; i++) { - coords[n_dims * i] = cos(angle) * radius; - coords[n_dims * i + 1] = sin(angle) * radius; - angle += angle_inc_radians; - } } + std::array bounds{}; + + geoarrow::benchmark_util::PointsOnCircle(n_coords, view.coords.coords_stride, + const_cast(view.coords.values[0]), + const_cast(view.coords.values[1])); + if (operation == BOUNDS) { - std::array bounds{}; switch (strategy) { case GENERIC: for (auto _ : state) { @@ -121,10 +109,10 @@ static void CoordViewLoop(benchmark::State& state) { } state.SetItemsProcessed(n_coords * state.iterations()); - // Check the result - // std::cout << bounds[0] << ", " << bounds[1] << ", " << bounds[2] << ", " << - // bounds[3] - // << std::endl; + // Check the result (centroid should more or less be 0, 0; bounds should be more or less + // -484..483 in both dimensions) + // std::cout << bounds[0] << ", " << bounds[1] << ", " << bounds[2] << ", " << bounds[3] + // << std::endl; } BENCHMARK(CoordViewLoop); diff --git a/dev/benchmarks/c/benchmark_util.hpp b/dev/benchmarks/c/benchmark_util.hpp new file mode 100644 index 0000000..34f6d14 --- /dev/null +++ b/dev/benchmarks/c/benchmark_util.hpp @@ -0,0 +1,39 @@ + +#include +#include +#include + +namespace geoarrow { + +namespace benchmark_util { + +static const int64_t kNumCoordsPrettyBig = 10000000; + +static inline void PointsOnCircle(uint32_t n, uint32_t stride, double* out_x, + double* out_y, double dangle_radians = M_PI / 100.0, + double radius = 483.0) { + double angle = 0; + + for (uint32_t i = 0; i < n; i++) { + *out_x = std::cos(angle) * radius; + *out_y = std::sin(angle) * radius; + angle += dangle_radians; + out_x += stride; + out_y += stride; + } +} + +static inline void FillRandom(uint32_t n, uint32_t stride, double* out, + uint32_t seed = 1234, double range_min = -1.0, + double range_max = 1.0) { + std::srand(seed); + double range = range_max - range_max; + for (uint32_t i = 0; i < n; i++) { + double value01 = static_cast(std::rand()) / static_cast(RAND_MAX); + *out = range_min + value01 * range; + out += stride; + } +} + +} // namespace benchmark_util +} // namespace geoarrow From f6fc959f19ce4afd81603f9f5771197a5179dc17 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 5 Jan 2025 15:40:46 -0600 Subject: [PATCH 2/9] add bench --- dev/benchmarks/CMakeLists.txt | 3 +- dev/benchmarks/c/array_view_benchmark.cc | 123 +++++++++--------- dev/benchmarks/c/benchmark_lib.cc | 152 ----------------------- dev/benchmarks/c/benchmark_lib.h | 26 ---- dev/benchmarks/c/benchmark_util.hpp | 3 +- 5 files changed, 59 insertions(+), 248 deletions(-) delete mode 100644 dev/benchmarks/c/benchmark_lib.cc delete mode 100644 dev/benchmarks/c/benchmark_lib.h diff --git a/dev/benchmarks/CMakeLists.txt b/dev/benchmarks/CMakeLists.txt index 587cdc5..1eb0ad6 100644 --- a/dev/benchmarks/CMakeLists.txt +++ b/dev/benchmarks/CMakeLists.txt @@ -58,7 +58,6 @@ if(IS_DIRECTORY "${GEOARROW_BENCHMARK_SOURCE_URL}") elseif(NOT "${GEOARROW_BENCHMARK_SOURCE_URL}" STREQUAL "") fetchcontent_declare(geoarrow URL "${GEOARROW_BENCHMARK_SOURCE_URL}") fetchcontent_makeavailable(geoarrow) - fetchcontent_makeavailable(geoarrow_ipc) endif() # Check that either the parent scope or this CMakeLists.txt defines a geoarrow target @@ -75,7 +74,7 @@ include(CTest) enable_testing() foreach(ITEM array_view) - add_executable(${ITEM}_benchmark "c/${ITEM}_benchmark.cc" c/benchmark_lib.cc) + add_executable(${ITEM}_benchmark "c/${ITEM}_benchmark.cc") target_link_libraries(${ITEM}_benchmark PRIVATE geoarrow benchmark::benchmark_main) add_test(NAME ${ITEM}_benchmark COMMAND ${ITEM}_benchmark --benchmark_out=${ITEM}_benchmark.json) diff --git a/dev/benchmarks/c/array_view_benchmark.cc b/dev/benchmarks/c/array_view_benchmark.cc index ae00396..7e52431 100644 --- a/dev/benchmarks/c/array_view_benchmark.cc +++ b/dev/benchmarks/c/array_view_benchmark.cc @@ -21,15 +21,53 @@ #include "geoarrow.h" -#include "benchmark_lib.h" #include "benchmark_util.hpp" -enum CoordAccessStrategy { GENERIC, OPTIMIZED, LOOP_THEN_IF }; +using geoarrow::benchmark_util::Operation; +using Operation::BOUNDS; +using Operation::CENTROID; + +// Slightly faster than std::min/std::max +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +// Calculates bounds using GEOARROW_COORD_VIEW_VALUE +std::array BoundsUsingCoordViewValue(struct GeoArrowCoordView* coords, + int64_t n_coords) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + double x, y; + for (int64_t i = 0; i < n_coords; i++) { + x = GEOARROW_COORD_VIEW_VALUE(coords, i, 0); + y = GEOARROW_COORD_VIEW_VALUE(coords, i, 1); + xmin = MIN(xmin, x); + xmax = MAX(xmax, y); + ymin = MIN(ymin, x); + ymax = MAX(ymax, y); + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates centroid using GEOARROW_COORD_VIEW_VALUE +std::array CentroidUsingCoordViewValue(struct GeoArrowCoordView* coords, + int64_t n_coords) { + double xsum = 0; + double ysum = 0; -enum Operation { BOUNDS, CENTROID }; + double x, y; + for (int64_t i = 0; i < n_coords; i++) { + xsum += GEOARROW_COORD_VIEW_VALUE(coords, i, 0); + ysum += GEOARROW_COORD_VIEW_VALUE(coords, i, 1); + } + + return {xsum / n_coords, ysum / n_coords}; +} -template +template static void CoordViewLoop(benchmark::State& state) { struct GeoArrowArrayView view; GeoArrowArrayViewInitFromType(&view, type); @@ -38,9 +76,6 @@ static void CoordViewLoop(benchmark::State& state) { uint32_t n_coords = geoarrow::benchmark_util::kNumCoordsPrettyBig; int n_dims = view.coords.n_values; std::vector coords(n_coords * n_dims); - double angle_inc_radians = M_PI / 100; - double radius = 483; - double angle = 0; if (view.schema_view.coord_type == GEOARROW_COORD_TYPE_SEPARATE) { for (int i = 0; i < n_dims; i++) { @@ -59,72 +94,26 @@ static void CoordViewLoop(benchmark::State& state) { const_cast(view.coords.values[1])); if (operation == BOUNDS) { - switch (strategy) { - case GENERIC: - for (auto _ : state) { - bounds = CalculateBoundsGeneric(&view.coords, n_coords); - benchmark::DoNotOptimize(bounds); - } - break; - case OPTIMIZED: - for (auto _ : state) { - bounds = CalculateBoundsOptimized(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(bounds); - } - break; - case LOOP_THEN_IF: - for (auto _ : state) { - bounds = CalculateBoundsLoopThenIf(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(bounds); - } - break; + for (auto _ : state) { + bounds = BoundsUsingCoordViewValue(&view.coords, n_coords); + benchmark::DoNotOptimize(bounds); } - } else if (operation == CENTROID) { std::array centroid{}; - switch (strategy) { - case GENERIC: - for (auto _ : state) { - centroid = CalculateCentroidGeneric(&view.coords, n_coords); - benchmark::DoNotOptimize(centroid); - } - break; - case OPTIMIZED: - for (auto _ : state) { - centroid = CalculateCentroidOptimized(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(centroid); - } - break; - case LOOP_THEN_IF: - for (auto _ : state) { - centroid = CalculateCentroidLoopThenIf(&view.coords, n_coords, - view.schema_view.coord_type); - benchmark::DoNotOptimize(centroid); - } - break; + for (auto _ : state) { + centroid = CentroidUsingCoordViewValue(&view.coords, n_coords); + benchmark::DoNotOptimize(centroid); } } state.SetItemsProcessed(n_coords * state.iterations()); - // Check the result (centroid should more or less be 0, 0; bounds should be more or less - // -484..483 in both dimensions) - // std::cout << bounds[0] << ", " << bounds[1] << ", " << bounds[2] << ", " << bounds[3] + // Check the result (centroid should more or less be 0, 0; bounds should be more or + // less -484..483 in both dimensions) std::cout << bounds[0] << ", " << bounds[1] << + // ", " << bounds[2] << ", " << bounds[3] // << std::endl; } -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); - -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); diff --git a/dev/benchmarks/c/benchmark_lib.cc b/dev/benchmarks/c/benchmark_lib.cc deleted file mode 100644 index 9fa768e..0000000 --- a/dev/benchmarks/c/benchmark_lib.cc +++ /dev/null @@ -1,152 +0,0 @@ - -#include -#include - -#include "geoarrow.h" - -// Slightly faster than std::min/std::max -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) - -std::array CalculateBoundsGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords) { - double xmin = std::numeric_limits::infinity(); - double xmax = -std::numeric_limits::infinity(); - double ymin = std::numeric_limits::infinity(); - double ymax = -std::numeric_limits::infinity(); - - double x, y; - for (int64_t i = 0; i < n_coords; i++) { - x = GEOARROW_COORD_VIEW_VALUE(coords, i, 0); - y = GEOARROW_COORD_VIEW_VALUE(coords, i, 1); - xmin = MIN(xmin, x); - xmax = MAX(xmax, y); - ymin = MIN(ymin, x); - ymax = MAX(ymax, y); - } - - return {xmin, xmax, ymin, ymax}; -} - -std::array CalculateBoundsOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xmin = std::numeric_limits::infinity(); - double xmax = -std::numeric_limits::infinity(); - double ymin = std::numeric_limits::infinity(); - double ymax = -std::numeric_limits::infinity(); - - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - const double* xs = coords->values[0]; - const double* ys = coords->values[1]; - for (int64_t i = 0; i < n_coords; i++) { - xmin = MIN(xmin, xs[i]); - xmax = MAX(xmax, xs[i]); - ymin = MIN(ymin, ys[i]); - ymax = MAX(ymax, ys[i]); - } - } else { - int n_dims = coords->n_values; - const double* xs = coords->values[0]; - const double* ys = xs + 1; - for (int64_t i = 0; i < n_coords; i++) { - int64_t offset = i * n_dims; - xmin = MIN(xmin, xs[offset]); - xmax = MAX(xmax, xs[offset]); - ymin = MIN(ymin, ys[offset]); - ymax = MAX(ymax, ys[offset]); - } - } - - return {xmin, xmax, ymin, ymax}; -} - -std::array CalculateBoundsLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xmin = std::numeric_limits::infinity(); - double xmax = -std::numeric_limits::infinity(); - double ymin = std::numeric_limits::infinity(); - double ymax = -std::numeric_limits::infinity(); - - double x, y; - int n_dims = coords->n_values; - for (int64_t i = 0; i < n_coords; i++) { - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - x = coords->values[0][i]; - y = coords->values[1][i]; - } else { - x = coords->values[0][i * n_dims]; - y = coords->values[0][i * n_dims + 1]; - } - - xmin = MIN(xmin, x); - xmax = MAX(xmax, y); - ymin = MIN(ymin, x); - ymax = MAX(ymax, y); - } - - return {xmin, xmax, ymin, ymax}; -} - -std::array CalculateCentroidGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords) { - double xsum = 0; - double ysum = 0; - - double x, y; - for (int64_t i = 0; i < n_coords; i++) { - xsum += GEOARROW_COORD_VIEW_VALUE(coords, i, 0); - ysum += GEOARROW_COORD_VIEW_VALUE(coords, i, 1); - } - - return {xsum / n_coords, ysum / n_coords}; -} - -std::array CalculateCentroidOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xsum = 0; - double ysum = 0; - - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - // This version exploits that we can do this one element at a time - const double* xs = coords->values[0]; - const double* ys = coords->values[1]; - for (int64_t i = 0; i < n_coords; i++) { - xsum += xs[i]; - ysum += ys[i]; - } - } else { - int n_dims = coords->n_values; - const double* xs = coords->values[0]; - const double* ys = xs + 1; - for (int64_t i = 0; i < n_coords; i++) { - int64_t offset = i * n_dims; - xsum += xs[offset]; - ysum += ys[offset]; - } - } - - return {xsum / n_coords, ysum / n_coords}; -} - -std::array CalculateCentroidLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type) { - double xsum = 0; - double ysum = 0; - - int n_dims = coords->n_values; - for (int64_t i = 0; i < n_coords; i++) { - if (coord_type == GEOARROW_COORD_TYPE_SEPARATE) { - xsum += coords->values[0][i]; - ysum += coords->values[1][i]; - } else { - xsum += coords->values[0][i * n_dims]; - xsum += coords->values[0][i * n_dims + 1]; - } - } - - return {xsum / n_coords, ysum / n_coords}; -} diff --git a/dev/benchmarks/c/benchmark_lib.h b/dev/benchmarks/c/benchmark_lib.h deleted file mode 100644 index 29c6189..0000000 --- a/dev/benchmarks/c/benchmark_lib.h +++ /dev/null @@ -1,26 +0,0 @@ - -#include - -#include "geoarrow.h" - -std::array CalculateBoundsGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords); - -std::array CalculateBoundsOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); - -std::array CalculateBoundsLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); - -std::array CalculateCentroidGeneric(struct GeoArrowCoordView* coords, - int64_t n_coords); - -std::array CalculateCentroidOptimized(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); - -std::array CalculateCentroidLoopThenIf(struct GeoArrowCoordView* coords, - int64_t n_coords, - enum GeoArrowCoordType coord_type); diff --git a/dev/benchmarks/c/benchmark_util.hpp b/dev/benchmarks/c/benchmark_util.hpp index 34f6d14..3ec6b27 100644 --- a/dev/benchmarks/c/benchmark_util.hpp +++ b/dev/benchmarks/c/benchmark_util.hpp @@ -1,5 +1,4 @@ -#include #include #include @@ -9,6 +8,8 @@ namespace benchmark_util { static const int64_t kNumCoordsPrettyBig = 10000000; +enum Operation { BOUNDS, CENTROID }; + static inline void PointsOnCircle(uint32_t n, uint32_t stride, double* out_x, double* out_y, double dangle_radians = M_PI / 100.0, double radius = 483.0) { From 5c254e29906478c286dcbc95e180945d1547033d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 5 Jan 2025 22:17:06 -0600 Subject: [PATCH 3/9] fix array view benchmarks --- dev/benchmarks/c/array_view_benchmark.cc | 113 ++++++++++++++++++----- 1 file changed, 88 insertions(+), 25 deletions(-) diff --git a/dev/benchmarks/c/array_view_benchmark.cc b/dev/benchmarks/c/array_view_benchmark.cc index 7e52431..a6d6bed 100644 --- a/dev/benchmarks/c/array_view_benchmark.cc +++ b/dev/benchmarks/c/array_view_benchmark.cc @@ -27,20 +27,21 @@ using geoarrow::benchmark_util::Operation; using Operation::BOUNDS; using Operation::CENTROID; +enum Strategy { COORD_VIEW_VALUE, POINTERS }; + // Slightly faster than std::min/std::max #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) > (b)) ? (a) : (b)) // Calculates bounds using GEOARROW_COORD_VIEW_VALUE -std::array BoundsUsingCoordViewValue(struct GeoArrowCoordView* coords, - int64_t n_coords) { +std::array BoundsUsingCoordViewValue(struct GeoArrowCoordView* coords) { double xmin = std::numeric_limits::infinity(); double xmax = -std::numeric_limits::infinity(); double ymin = std::numeric_limits::infinity(); double ymax = -std::numeric_limits::infinity(); double x, y; - for (int64_t i = 0; i < n_coords; i++) { + for (int64_t i = 0; i < coords->n_coords; i++) { x = GEOARROW_COORD_VIEW_VALUE(coords, i, 0); y = GEOARROW_COORD_VIEW_VALUE(coords, i, 1); xmin = MIN(xmin, x); @@ -52,34 +53,77 @@ std::array BoundsUsingCoordViewValue(struct GeoArrowCoordView* coords return {xmin, ymin, xmax, ymax}; } +// Calculates total bounds using raw pointer iteration +std::array BoundsUsingPointers(struct GeoArrowCoordView* coords) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + const double* x = coords->values[0]; + const double* y = coords->values[1]; + for (int64_t i = 0; i < coords->n_coords; i++) { + xmin = MIN(xmin, *x); + xmax = MAX(xmax, *x); + x += coords->coords_stride; + + ymin = MIN(xmin, *y); + ymax = MAX(xmax, *y); + y += coords->coords_stride; + } + + return {xmin, ymin, xmax, ymax}; +} + // Calculates centroid using GEOARROW_COORD_VIEW_VALUE -std::array CentroidUsingCoordViewValue(struct GeoArrowCoordView* coords, - int64_t n_coords) { +std::array CentroidUsingCoordViewValue(struct GeoArrowCoordView* coords) { double xsum = 0; double ysum = 0; - double x, y; - for (int64_t i = 0; i < n_coords; i++) { + for (int64_t i = 0; i < coords->n_coords; i++) { xsum += GEOARROW_COORD_VIEW_VALUE(coords, i, 0); ysum += GEOARROW_COORD_VIEW_VALUE(coords, i, 1); } - return {xsum / n_coords, ysum / n_coords}; + return {xsum / coords->n_coords, ysum / coords->n_coords}; +} + +// Calculates centroid using raw pointer iteration +std::array CentroidUsingPointers(struct GeoArrowCoordView* coords) { + double xsum = 0; + double ysum = 0; + + const double* x = coords->values[0]; + const double* y = coords->values[1]; + for (int64_t i = 0; i < coords->n_coords; i++) { + xsum += *x++; + ysum += *y++; + } + + return {xsum / coords->n_coords, ysum / coords->n_coords}; } -template +/// \brief Benchmark iteration over all coordinates in a GeoArrowCoordView +/// +/// The Operation here is a way to ensure that all coordinates are actually iterated over +/// and nothing is optimized out. The type is to check interleaved and separated +/// coordinates, and the strategy is to check GEOARROW_COORD_VIEW_VALUE() against raw +/// pointer iteration. Interestingly, this does not affect centroid calculations but does +/// affect bounds calculation on some architectures (possibly because raw pointer +/// iteration is autovectorized but the `* coord_stride` prevents that from occurring). +template static void CoordViewLoop(benchmark::State& state) { struct GeoArrowArrayView view; GeoArrowArrayViewInitFromType(&view, type); // Memory for circle with n points - uint32_t n_coords = geoarrow::benchmark_util::kNumCoordsPrettyBig; + view.coords.n_coords = geoarrow::benchmark_util::kNumCoordsPrettyBig; int n_dims = view.coords.n_values; - std::vector coords(n_coords * n_dims); + std::vector coords(view.coords.n_coords * n_dims); if (view.schema_view.coord_type == GEOARROW_COORD_TYPE_SEPARATE) { for (int i = 0; i < n_dims; i++) { - view.coords.values[i] = coords.data() + (i * n_coords); + view.coords.values[i] = coords.data() + (i * view.coords.n_coords); } } else { for (int i = 0; i < n_dims; i++) { @@ -88,32 +132,51 @@ static void CoordViewLoop(benchmark::State& state) { } std::array bounds{}; + std::array centroid{}; - geoarrow::benchmark_util::PointsOnCircle(n_coords, view.coords.coords_stride, + geoarrow::benchmark_util::PointsOnCircle(view.coords.n_coords, + view.coords.coords_stride, const_cast(view.coords.values[0]), const_cast(view.coords.values[1])); if (operation == BOUNDS) { - for (auto _ : state) { - bounds = BoundsUsingCoordViewValue(&view.coords, n_coords); - benchmark::DoNotOptimize(bounds); + if (strategy == COORD_VIEW_VALUE) { + for (auto _ : state) { + bounds = BoundsUsingCoordViewValue(&view.coords); + benchmark::DoNotOptimize(bounds); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + bounds = BoundsUsingPointers(&view.coords); + benchmark::DoNotOptimize(bounds); + } } } else if (operation == CENTROID) { - std::array centroid{}; - for (auto _ : state) { - centroid = CentroidUsingCoordViewValue(&view.coords, n_coords); - benchmark::DoNotOptimize(centroid); + if (strategy == COORD_VIEW_VALUE) { + for (auto _ : state) { + centroid = CentroidUsingCoordViewValue(&view.coords); + benchmark::DoNotOptimize(centroid); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + centroid = CentroidUsingPointers(&view.coords); + benchmark::DoNotOptimize(centroid); + } } } - state.SetItemsProcessed(n_coords * state.iterations()); + state.SetItemsProcessed(view.coords.n_coords * state.iterations()); // Check the result (centroid should more or less be 0, 0; bounds should be more or // less -484..483 in both dimensions) std::cout << bounds[0] << ", " << bounds[1] << // ", " << bounds[2] << ", " << bounds[3] // << std::endl; } -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); From 44cabe4e852ab31584ea6e94da86de1cad323735 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 5 Jan 2025 22:47:41 -0600 Subject: [PATCH 4/9] add coord sequence benchmarks --- dev/benchmarks/CMakeLists.txt | 4 +- dev/benchmarks/c/benchmark_util.hpp | 2 - ...w_benchmark.cc => coord_view_benchmark.cc} | 21 +-- .../c/hpp_coord_sequence_benchmark.cc | 159 ++++++++++++++++++ 4 files changed, 162 insertions(+), 24 deletions(-) rename dev/benchmarks/c/{array_view_benchmark.cc => coord_view_benchmark.cc} (86%) create mode 100644 dev/benchmarks/c/hpp_coord_sequence_benchmark.cc diff --git a/dev/benchmarks/CMakeLists.txt b/dev/benchmarks/CMakeLists.txt index 1eb0ad6..0c6c0b9 100644 --- a/dev/benchmarks/CMakeLists.txt +++ b/dev/benchmarks/CMakeLists.txt @@ -27,7 +27,7 @@ if(NOT DEFINED CMAKE_C_STANDARD) endif() if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) endif() @@ -73,7 +73,7 @@ endif() include(CTest) enable_testing() -foreach(ITEM array_view) +foreach(ITEM coord_view hpp_coord_sequence) add_executable(${ITEM}_benchmark "c/${ITEM}_benchmark.cc") target_link_libraries(${ITEM}_benchmark PRIVATE geoarrow benchmark::benchmark_main) add_test(NAME ${ITEM}_benchmark COMMAND ${ITEM}_benchmark diff --git a/dev/benchmarks/c/benchmark_util.hpp b/dev/benchmarks/c/benchmark_util.hpp index 3ec6b27..b335937 100644 --- a/dev/benchmarks/c/benchmark_util.hpp +++ b/dev/benchmarks/c/benchmark_util.hpp @@ -8,8 +8,6 @@ namespace benchmark_util { static const int64_t kNumCoordsPrettyBig = 10000000; -enum Operation { BOUNDS, CENTROID }; - static inline void PointsOnCircle(uint32_t n, uint32_t stride, double* out_x, double* out_y, double dangle_radians = M_PI / 100.0, double radius = 483.0) { diff --git a/dev/benchmarks/c/array_view_benchmark.cc b/dev/benchmarks/c/coord_view_benchmark.cc similarity index 86% rename from dev/benchmarks/c/array_view_benchmark.cc rename to dev/benchmarks/c/coord_view_benchmark.cc index a6d6bed..ecf2faa 100644 --- a/dev/benchmarks/c/array_view_benchmark.cc +++ b/dev/benchmarks/c/coord_view_benchmark.cc @@ -1,19 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. #include @@ -23,10 +7,7 @@ #include "benchmark_util.hpp" -using geoarrow::benchmark_util::Operation; -using Operation::BOUNDS; -using Operation::CENTROID; - +enum Operation { BOUNDS, CENTROID }; enum Strategy { COORD_VIEW_VALUE, POINTERS }; // Slightly faster than std::min/std::max diff --git a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc new file mode 100644 index 0000000..f0c1ab3 --- /dev/null +++ b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc @@ -0,0 +1,159 @@ + +#include + +#include + +#include "hpp/array_util.hpp" + +#include "benchmark_util.hpp" + +using geoarrow::array_util::CoordSequence; +using geoarrow::array_util::XY; + +enum Operation { BOUNDS, CENTROID }; +enum Strategy { COORD_VIEW_VALUE, POINTERS }; + +// Slightly faster than std::min/std::max +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +// Calculates total bounds using the STL iterator +std::array BoundsUsingCoordIterator(const CoordSequence& seq) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + for (const auto& coord : seq) { + xmin = MIN(xmin, coord.x()); + xmax = MAX(xmax, coord.x()); + ymin = MIN(ymin, coord.y()); + ymax = MAX(ymax, coord.y()); + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates total bounds using raw pointer iteration +std::array BoundsUsingPointers(const CoordSequence& seq) { + double xmin = std::numeric_limits::infinity(); + double xmax = -std::numeric_limits::infinity(); + double ymin = std::numeric_limits::infinity(); + double ymax = -std::numeric_limits::infinity(); + + const double* x = seq.values[0]; + const double* y = seq.values[1]; + for (uint32_t i = 0; i < seq.size(); i++) { + xmin = MIN(xmin, *x); + xmax = MAX(xmax, *x); + x += seq.stride; + + ymin = MIN(xmin, *y); + ymax = MAX(xmax, *y); + y += seq.stride; + } + + return {xmin, ymin, xmax, ymax}; +} + +// Calculates centroid using the STL iterator +std::array CentroidUsingCoordIterator(const CoordSequence& seq) { + double xsum = 0; + double ysum = 0; + + for (const auto& coord : seq) { + xsum += coord.x(); + ysum += coord.y(); + } + + return {xsum / seq.size(), ysum / seq.size()}; +} + +// Calculates centroid using raw pointer iteration +std::array CentroidUsingPointers(const CoordSequence& seq) { + double xsum = 0; + double ysum = 0; + + const double* x = seq.values[0]; + const double* y = seq.values[1]; + for (uint32_t i = 0; i < seq.size(); i++) { + xsum += *x++; + ysum += *y++; + } + + return {xsum / seq.size(), ysum / seq.size()}; +} + +/// \brief Benchmark iteration over all coordinates in a CoordSequence +/// +/// The Operation here is a way to ensure that all coordinates are actually iterated over +/// and nothing is optimized out. The type is to check interleaved and separated +/// coordinates, and the strategy is to check STL iterators against raw +/// pointer iteration. +template +static void CoordViewLoop(benchmark::State& state) { + CoordSequence seq; + + // Memory for circle with n points + seq.offset = 0; + seq.length = geoarrow::benchmark_util::kNumCoordsPrettyBig; + std::vector coords(seq.size() * seq.coord_size); + + if (type == GEOARROW_TYPE_POINT) { + for (uint32_t i = 0; i < seq.coord_size; i++) { + seq.values[i] = coords.data() + (i * seq.size()); + } + } else { + for (uint32_t i = 0; i < seq.coord_size; i++) { + seq.values[i] = coords.data() + i; + } + } + + geoarrow::benchmark_util::PointsOnCircle(seq.size(), seq.stride, + const_cast(seq.values[0]), + const_cast(seq.values[1])); + + std::array bounds{}; + std::array centroid{}; + + if (operation == BOUNDS) { + if (strategy == COORD_VIEW_VALUE) { + for (auto _ : state) { + bounds = BoundsUsingCoordIterator(seq); + benchmark::DoNotOptimize(bounds); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + bounds = BoundsUsingPointers(seq); + benchmark::DoNotOptimize(bounds); + } + } + } else if (operation == CENTROID) { + if (strategy == COORD_VIEW_VALUE) { + for (auto _ : state) { + centroid = CentroidUsingCoordIterator(seq); + benchmark::DoNotOptimize(centroid); + } + } else if (strategy == POINTERS) { + for (auto _ : state) { + centroid = CentroidUsingPointers(seq); + benchmark::DoNotOptimize(centroid); + } + } + } + + state.SetItemsProcessed(seq.size() * state.iterations()); + // Check the result (centroid should more or less be 0, 0; bounds should be more or + // less -484..483 in both dimensions) std::cout << bounds[0] << ", " << bounds[1] << + // ", " << bounds[2] << ", " << bounds[3] + // << std::endl; +} + +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); +BENCHMARK(CoordViewLoop); From ff682965ad8314ff3b9eda264cf79a85081a564b Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 5 Jan 2025 23:20:44 -0600 Subject: [PATCH 5/9] fix stride --- .../c/hpp_coord_sequence_benchmark.cc | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc index f0c1ab3..14601ef 100644 --- a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc +++ b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc @@ -11,7 +11,7 @@ using geoarrow::array_util::CoordSequence; using geoarrow::array_util::XY; enum Operation { BOUNDS, CENTROID }; -enum Strategy { COORD_VIEW_VALUE, POINTERS }; +enum Strategy { STL_ITERATOR, POINTERS }; // Slightly faster than std::min/std::max #define MIN(a, b) (((a) < (b)) ? (a) : (b)) @@ -24,7 +24,7 @@ std::array BoundsUsingCoordIterator(const CoordSequence& seq) { double ymin = std::numeric_limits::infinity(); double ymax = -std::numeric_limits::infinity(); - for (const auto& coord : seq) { + for (const XY coord : seq) { xmin = MIN(xmin, coord.x()); xmax = MAX(xmax, coord.x()); ymin = MIN(ymin, coord.y()); @@ -61,7 +61,7 @@ std::array CentroidUsingCoordIterator(const CoordSequence& seq) { double xsum = 0; double ysum = 0; - for (const auto& coord : seq) { + for (const XY coord : seq) { xsum += coord.x(); ysum += coord.y(); } @@ -91,7 +91,7 @@ std::array CentroidUsingPointers(const CoordSequence& seq) { /// coordinates, and the strategy is to check STL iterators against raw /// pointer iteration. template -static void CoordViewLoop(benchmark::State& state) { +static void CoordSequenceLoop(benchmark::State& state) { CoordSequence seq; // Memory for circle with n points @@ -100,10 +100,12 @@ static void CoordViewLoop(benchmark::State& state) { std::vector coords(seq.size() * seq.coord_size); if (type == GEOARROW_TYPE_POINT) { + seq.stride = 1; for (uint32_t i = 0; i < seq.coord_size; i++) { seq.values[i] = coords.data() + (i * seq.size()); } } else { + seq.stride = seq.coord_size; for (uint32_t i = 0; i < seq.coord_size; i++) { seq.values[i] = coords.data() + i; } @@ -117,7 +119,7 @@ static void CoordViewLoop(benchmark::State& state) { std::array centroid{}; if (operation == BOUNDS) { - if (strategy == COORD_VIEW_VALUE) { + if (strategy == STL_ITERATOR) { for (auto _ : state) { bounds = BoundsUsingCoordIterator(seq); benchmark::DoNotOptimize(bounds); @@ -129,7 +131,7 @@ static void CoordViewLoop(benchmark::State& state) { } } } else if (operation == CENTROID) { - if (strategy == COORD_VIEW_VALUE) { + if (strategy == STL_ITERATOR) { for (auto _ : state) { centroid = CentroidUsingCoordIterator(seq); benchmark::DoNotOptimize(centroid); @@ -149,11 +151,11 @@ static void CoordViewLoop(benchmark::State& state) { // << std::endl; } -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); -BENCHMARK(CoordViewLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); +BENCHMARK(CoordSequenceLoop); From 1a79860ee31e6c943b0fa6e27c947925bc5c8562 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 6 Jan 2025 00:01:23 -0600 Subject: [PATCH 6/9] tweak benchmarks --- dev/benchmarks/c/hpp_coord_sequence_benchmark.cc | 14 ++++++++------ src/geoarrow/hpp/array_util.hpp | 6 ++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc index 14601ef..1f6ab20 100644 --- a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc +++ b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc @@ -41,8 +41,8 @@ std::array BoundsUsingPointers(const CoordSequence& seq) { double ymin = std::numeric_limits::infinity(); double ymax = -std::numeric_limits::infinity(); - const double* x = seq.values[0]; - const double* y = seq.values[1]; + const double* x = seq.dbegin(0); + const double* y = seq.dbegin(1); for (uint32_t i = 0; i < seq.size(); i++) { xmin = MIN(xmin, *x); xmax = MAX(xmax, *x); @@ -74,11 +74,13 @@ std::array CentroidUsingPointers(const CoordSequence& seq) { double xsum = 0; double ysum = 0; - const double* x = seq.values[0]; - const double* y = seq.values[1]; + const double* x = seq.dbegin(0); + const double* y = seq.dbegin(1); for (uint32_t i = 0; i < seq.size(); i++) { - xsum += *x++; - ysum += *y++; + xsum += *x; + ysum += *y; + x += seq.stride; + y += seq.stride; } return {xsum / seq.size(), ysum / seq.size()}; diff --git a/src/geoarrow/hpp/array_util.hpp b/src/geoarrow/hpp/array_util.hpp index 221870b..8c2d72a 100644 --- a/src/geoarrow/hpp/array_util.hpp +++ b/src/geoarrow/hpp/array_util.hpp @@ -263,8 +263,10 @@ struct CoordSequence { const_iterator begin() const { return const_iterator(*this, 0); } const_iterator end() const { return const_iterator(*this, length); } - double* dim_begin(uint32_t j) const { return values[0]; } - double* dim_end(uint32_t j) const { return values[0] + (length * stride); } + const double* dbegin(uint32_t j) const { return values[0] + (offset * stride); } + const double* dend(uint32_t j) const { + return values[0] + ((offset + length) * stride); + } }; /// \brief View of a sequence of lists From 44d645c5a2a5e9a217248f040f178b8ad3205b9d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 6 Jan 2025 00:10:42 -0600 Subject: [PATCH 7/9] test dx iteration --- src/geoarrow/hpp/array_util.hpp | 4 +-- src/geoarrow/hpp/array_util_test.cc | 46 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/geoarrow/hpp/array_util.hpp b/src/geoarrow/hpp/array_util.hpp index 8c2d72a..a36443f 100644 --- a/src/geoarrow/hpp/array_util.hpp +++ b/src/geoarrow/hpp/array_util.hpp @@ -263,9 +263,9 @@ struct CoordSequence { const_iterator begin() const { return const_iterator(*this, 0); } const_iterator end() const { return const_iterator(*this, length); } - const double* dbegin(uint32_t j) const { return values[0] + (offset * stride); } + const double* dbegin(uint32_t j) const { return values[j] + (offset * stride); } const double* dend(uint32_t j) const { - return values[0] + ((offset + length) * stride); + return values[j] + ((offset + length) * stride); } }; diff --git a/src/geoarrow/hpp/array_util_test.cc b/src/geoarrow/hpp/array_util_test.cc index a48163d..f51b742 100644 --- a/src/geoarrow/hpp/array_util_test.cc +++ b/src/geoarrow/hpp/array_util_test.cc @@ -197,6 +197,29 @@ TEST(GeoArrowHppTest, IterateCoords) { EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); EXPECT_THAT(sequence.Slice(1, 1), ::testing::ElementsAre(XY{1, 6})); + + // Check dbegin() iteration + coords_vec.clear(); + const double* x = sequence.dbegin(0); + const double* y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + x += sequence.stride; + y += sequence.stride; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); + + // Check dbegin() iteration with offset + coords_vec.clear(); + sequence = sequence.Slice(1, 2); + x = sequence.dbegin(0); + y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + x += sequence.stride; + y += sequence.stride; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{1, 6}, XY{2, 7})); } TEST(GeoArrowHppTest, IterateCoordsInterleaved) { @@ -213,6 +236,29 @@ TEST(GeoArrowHppTest, IterateCoordsInterleaved) { } EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); + + // Check dbegin() iteration + coords_vec.clear(); + const double* x = sequence.dbegin(0); + const double* y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + x += sequence.stride; + y += sequence.stride; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); + + // Check dbegin() iteration with offset + coords_vec.clear(); + sequence = sequence.Slice(1, 2); + x = sequence.dbegin(0); + y = sequence.dbegin(1); + for (uint32_t i = 0; i < sequence.size(); i++) { + coords_vec.push_back(XY{*x, *y}); + x += sequence.stride; + y += sequence.stride; + } + EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{1, 6}, XY{2, 7})); } TEST(GeoArrowHppTest, IterateNestedCoords) { From 4737453c94c9154efb8abb0ff4659a65c76526bb Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 6 Jan 2025 00:25:48 -0600 Subject: [PATCH 8/9] make dimensions an actual iterator --- .../c/hpp_coord_sequence_benchmark.cc | 18 +++--- src/geoarrow/hpp/array_util.hpp | 56 ++++++++++++++++++- src/geoarrow/hpp/array_util_test.cc | 24 ++++---- 3 files changed, 73 insertions(+), 25 deletions(-) diff --git a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc index 1f6ab20..5579881 100644 --- a/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc +++ b/dev/benchmarks/c/hpp_coord_sequence_benchmark.cc @@ -41,16 +41,16 @@ std::array BoundsUsingPointers(const CoordSequence& seq) { double ymin = std::numeric_limits::infinity(); double ymax = -std::numeric_limits::infinity(); - const double* x = seq.dbegin(0); - const double* y = seq.dbegin(1); + auto x = seq.dbegin(0); + auto y = seq.dbegin(1); for (uint32_t i = 0; i < seq.size(); i++) { xmin = MIN(xmin, *x); xmax = MAX(xmax, *x); - x += seq.stride; + ++x; ymin = MIN(xmin, *y); ymax = MAX(xmax, *y); - y += seq.stride; + ++y; } return {xmin, ymin, xmax, ymax}; @@ -74,13 +74,11 @@ std::array CentroidUsingPointers(const CoordSequence& seq) { double xsum = 0; double ysum = 0; - const double* x = seq.dbegin(0); - const double* y = seq.dbegin(1); + auto x = seq.dbegin(0); + auto y = seq.dbegin(1); for (uint32_t i = 0; i < seq.size(); i++) { - xsum += *x; - ysum += *y; - x += seq.stride; - y += seq.stride; + xsum += x++; + ysum += y++; } return {xsum / seq.size(), ysum / seq.size()}; diff --git a/src/geoarrow/hpp/array_util.hpp b/src/geoarrow/hpp/array_util.hpp index a36443f..a8f1868 100644 --- a/src/geoarrow/hpp/array_util.hpp +++ b/src/geoarrow/hpp/array_util.hpp @@ -97,6 +97,53 @@ class ListSequenceIterator : public BaseRandomAccessIterator { typename ListSequence::child_type stashed_; }; +template +class StridedIterator { + public: + explicit StridedIterator(const T* ptr, ptrdiff_t stride) : ptr_(ptr), stride_(stride) {} + StridedIterator& operator++() { + ptr_ += stride_; + return *this; + } + T operator++(int) { + T retval = *ptr_; + ptr_ += stride_; + return retval; + } + StridedIterator& operator--() { + ptr_ -= stride_; + return *this; + } + StridedIterator& operator+=(int64_t n) { + ptr_ += (n * stride_); + return *this; + } + StridedIterator& operator-=(int64_t n) { + ptr_ -= (n * stride_); + return *this; + } + int64_t operator-(const StridedIterator& other) const { + return (ptr_ - other.ptr_) / stride_; + } + bool operator<(const StridedIterator& other) const { return ptr_ < other.ptr_; } + bool operator>(const StridedIterator& other) const { return ptr_ > other.ptr_; } + bool operator<=(const StridedIterator& other) const { return ptr_ <= other.ptr_; } + bool operator>=(const StridedIterator& other) const { return ptr_ >= other.ptr_; } + bool operator==(const StridedIterator& other) const { return ptr_ == other.ptr_; } + bool operator!=(const StridedIterator& other) const { return ptr_ != other.ptr_; } + + T operator*() const { return *ptr_; } + T operator[](ptrdiff_t i) const { return ptr_[i]; } + + using iterator_category = std::random_access_iterator_tag; + using difference_type = int64_t; + using value_type = T; + + protected: + const T* ptr_; + ptrdiff_t stride_; +}; + } // namespace internal struct BoxXY; @@ -263,9 +310,12 @@ struct CoordSequence { const_iterator begin() const { return const_iterator(*this, 0); } const_iterator end() const { return const_iterator(*this, length); } - const double* dbegin(uint32_t j) const { return values[j] + (offset * stride); } - const double* dend(uint32_t j) const { - return values[j] + ((offset + length) * stride); + using dimension_iterator = internal::StridedIterator; + dimension_iterator dbegin(uint32_t j) const { + return dimension_iterator(values[j] + (offset * stride), stride); + } + dimension_iterator dend(uint32_t j) const { + return dimension_iterator(values[j] + ((offset + length) * stride), stride); } }; diff --git a/src/geoarrow/hpp/array_util_test.cc b/src/geoarrow/hpp/array_util_test.cc index f51b742..06ebc6e 100644 --- a/src/geoarrow/hpp/array_util_test.cc +++ b/src/geoarrow/hpp/array_util_test.cc @@ -200,12 +200,12 @@ TEST(GeoArrowHppTest, IterateCoords) { // Check dbegin() iteration coords_vec.clear(); - const double* x = sequence.dbegin(0); - const double* y = sequence.dbegin(1); + auto x = sequence.dbegin(0); + auto y = sequence.dbegin(1); for (uint32_t i = 0; i < sequence.size(); i++) { coords_vec.push_back(XY{*x, *y}); - x += sequence.stride; - y += sequence.stride; + ++x; + ++y; } EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); @@ -216,8 +216,8 @@ TEST(GeoArrowHppTest, IterateCoords) { y = sequence.dbegin(1); for (uint32_t i = 0; i < sequence.size(); i++) { coords_vec.push_back(XY{*x, *y}); - x += sequence.stride; - y += sequence.stride; + ++x; + ++y; } EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{1, 6}, XY{2, 7})); } @@ -239,12 +239,12 @@ TEST(GeoArrowHppTest, IterateCoordsInterleaved) { // Check dbegin() iteration coords_vec.clear(); - const double* x = sequence.dbegin(0); - const double* y = sequence.dbegin(1); + auto x = sequence.dbegin(0); + auto y = sequence.dbegin(1); for (uint32_t i = 0; i < sequence.size(); i++) { coords_vec.push_back(XY{*x, *y}); - x += sequence.stride; - y += sequence.stride; + ++x; + ++y; } EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{0, 5}, XY{1, 6}, XY{2, 7})); @@ -255,8 +255,8 @@ TEST(GeoArrowHppTest, IterateCoordsInterleaved) { y = sequence.dbegin(1); for (uint32_t i = 0; i < sequence.size(); i++) { coords_vec.push_back(XY{*x, *y}); - x += sequence.stride; - y += sequence.stride; + ++x; + ++y; } EXPECT_THAT(coords_vec, ::testing::ElementsAre(XY{1, 6}, XY{2, 7})); } From 49106effce198da0cead5cad56595c2593f64ec9 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 6 Jan 2025 00:45:15 -0600 Subject: [PATCH 9/9] test slicing/stl --- src/geoarrow/hpp/array_util.hpp | 9 ++++-- src/geoarrow/hpp/array_util_test.cc | 44 +++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/geoarrow/hpp/array_util.hpp b/src/geoarrow/hpp/array_util.hpp index a8f1868..7b14daa 100644 --- a/src/geoarrow/hpp/array_util.hpp +++ b/src/geoarrow/hpp/array_util.hpp @@ -68,6 +68,8 @@ class CoordSequenceIterator : public BaseRandomAccessIterator { using iterator_category = std::random_access_iterator_tag; using difference_type = int64_t; using value_type = typename CoordSequence::value_type; + using reference = value_type&; + using pointer = value_type*; explicit CoordSequenceIterator(const CoordSequence& outer, uint32_t i) : BaseRandomAccessIterator(outer, i) {} @@ -97,6 +99,7 @@ class ListSequenceIterator : public BaseRandomAccessIterator { typename ListSequence::child_type stashed_; }; +// Iterator for dimension begin/end template class StridedIterator { public: @@ -114,11 +117,11 @@ class StridedIterator { ptr_ -= stride_; return *this; } - StridedIterator& operator+=(int64_t n) { + StridedIterator& operator+=(ptrdiff_t n) { ptr_ += (n * stride_); return *this; } - StridedIterator& operator-=(int64_t n) { + StridedIterator& operator-=(ptrdiff_t n) { ptr_ -= (n * stride_); return *this; } @@ -138,6 +141,8 @@ class StridedIterator { using iterator_category = std::random_access_iterator_tag; using difference_type = int64_t; using value_type = T; + using reference = T&; + using pointer = T*; protected: const T* ptr_; diff --git a/src/geoarrow/hpp/array_util_test.cc b/src/geoarrow/hpp/array_util_test.cc index 06ebc6e..95aac9b 100644 --- a/src/geoarrow/hpp/array_util_test.cc +++ b/src/geoarrow/hpp/array_util_test.cc @@ -369,6 +369,15 @@ TEST(GeoArrowHppTest, SetArrayBox) { EXPECT_THAT(boxes, ::testing::ElementsAre(BoxXY{0, 1, 2, 3}, BoxXY{4, 5, 6, 7}, BoxXY{8, 9, 12, 13})); + std::vector xmins(native_array.value.dbegin(0), native_array.value.dend(0)); + EXPECT_THAT(xmins, ::testing::ElementsAre(0, 4, 8)); + std::vector ymins(native_array.value.dbegin(1), native_array.value.dend(1)); + EXPECT_THAT(ymins, ::testing::ElementsAre(1, 5, 9)); + std::vector xmaxs(native_array.value.dbegin(2), native_array.value.dend(2)); + EXPECT_THAT(xmaxs, ::testing::ElementsAre(2, 6, 12)); + std::vector ymaxs(native_array.value.dbegin(3), native_array.value.dend(3)); + EXPECT_THAT(ymaxs, ::testing::ElementsAre(3, 7, 13)); + EXPECT_THAT(native_array.LowerBound().value, ::testing::ElementsAre(XY{0, 1}, XY{4, 5}, XY{8, 9})); EXPECT_THAT(native_array.UpperBound().value, @@ -404,7 +413,14 @@ TEST(GeoArrowHppTest, SetArrayPoint) { EXPECT_THAT(points, ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5})); EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5})); - EXPECT_THAT(native_array.Slice(1, 1).Coords(), ::testing::ElementsAre(XY{2, 3})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); + EXPECT_THAT(sliced_coords, ::testing::ElementsAre(XY{2, 3})); + + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(2)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(3)); } } @@ -447,8 +463,14 @@ TEST(GeoArrowHppTest, SetArrayLinestring) { EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5}, XY{6, 7}, XY{8, 9}, XY{10, 11}, XY{12, 13})); - EXPECT_THAT(native_array.Slice(1, 1).Coords(), - ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); + EXPECT_THAT(sliced_coords, ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(4, 6)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(5, 7)); } } @@ -494,11 +516,18 @@ TEST(GeoArrowHppTest, SetArrayMultiLinestring) { std::vector>{{XY{4, 5}, XY{6, 7}}}, std::vector>{{XY{8, 9}, XY{10, 11}, XY{12, 13}}, {XY{15, 16}, XY{17, 18}}})); + EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5}, XY{6, 7}, XY{8, 9}, XY{10, 11}, XY{12, 13}, XY{15, 16}, XY{17, 18})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); EXPECT_THAT(native_array.Slice(1, 1).Coords(), ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(4, 6)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(5, 7)); } } @@ -551,7 +580,12 @@ TEST(GeoArrowHppTest, SetArrayMultiPolygon) { EXPECT_THAT(native_array.Coords(), ::testing::ElementsAre(XY{0, 1}, XY{2, 3}, XY{4, 5}, XY{6, 7}, XY{8, 9}, XY{10, 11}, XY{12, 13}, XY{15, 16}, XY{17, 18})); - EXPECT_THAT(native_array.Slice(1, 1).Coords(), - ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + + auto sliced_coords = native_array.Slice(1, 1).Coords(); + EXPECT_THAT(sliced_coords, ::testing::ElementsAre(XY{4, 5}, XY{6, 7})); + std::vector sliced_x(sliced_coords.dbegin(0), sliced_coords.dend(0)); + EXPECT_THAT(sliced_x, ::testing::ElementsAre(4, 6)); + std::vector sliced_y(sliced_coords.dbegin(1), sliced_coords.dend(1)); + EXPECT_THAT(sliced_y, ::testing::ElementsAre(5, 7)); } }