Skip to content

Commit 1bc4656

Browse files
authored
[C++] Align arrow version to system if arrow installed (#162)
* Align arrow version with system if arrow installed Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com> Committed-by: acezen from Dev container
1 parent fdd1b99 commit 1bc4656

File tree

2 files changed

+33
-11
lines changed

2 files changed

+33
-11
lines changed

cpp/cmake/apache-arrow.cmake

+9-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ function(build_arrow)
3434
endif ()
3535

3636
find_package(Threads)
37+
find_package(Arrow QUIET)
38+
set(ARROW_VERSION_TO_BUILD "10.0.1" CACHE INTERNAL "arrow version")
39+
if (Arrow_FOUND) # arrow is installed, build the same version as the installed one
40+
message(STATUS "Found Arrow installed, align to version: ${Arrow_VERSION}")
41+
set(ARROW_VERSION_TO_BUILD "${Arrow_VERSION}" CACHE INTERNAL "arrow version")
42+
endif ()
43+
3744
# If Arrow needs to be built, the default location will be within the build tree.
3845
set(GAR_ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-prefix")
3946

@@ -84,10 +91,11 @@ function(build_arrow)
8491

8592
set(GAR_ARROW_INCLUDE_DIR "${GAR_ARROW_PREFIX}/include" CACHE INTERNAL "arrow include directory")
8693
set(GAR_ARROW_BUILD_BYPRODUCTS "${GAR_ARROW_STATIC_LIB}" "${GAR_PARQUET_STATIC_LIB}")
94+
set(GAR_ARROW_SOURCE_FILE "https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${ARROW_VERSION_TO_BUILD}/apache-arrow-${ARROW_VERSION_TO_BUILD}.tar.gz")
8795

8896
include(ExternalProject)
8997
externalproject_add(arrow_ep
90-
URL https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-10.0.1/apache-arrow-10.0.1.tar.gz
98+
URL "${GAR_ARROW_SOURCE_FILE}"
9199
SOURCE_SUBDIR cpp
92100
BINARY_DIR "${GAR_ARROW_BINARY_DIR}"
93101
CMAKE_ARGS "${GAR_ARROW_CMAKE_ARGS}"

cpp/src/arrow_chunk_writer.cc

+24-10
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ limitations under the License.
1717

1818
#include "arrow/api.h"
1919
#include "arrow/compute/api.h"
20+
#if defined(ARROW_VERSION) && ARROW_VERSION >= 12000000
21+
#include "arrow/acero/exec_plan.h"
22+
#else
2023
#include "arrow/compute/exec/exec_plan.h"
24+
#endif
2125
#include "arrow/dataset/dataset.h"
2226
#include "arrow/dataset/file_base.h"
2327
#include "arrow/dataset/file_parquet.h"
@@ -29,6 +33,12 @@ limitations under the License.
2933
namespace GAR_NAMESPACE_INTERNAL {
3034
// common methods
3135

36+
#if defined(ARROW_VERSION) && ARROW_VERSION >= 12000000
37+
namespace arrow_acero_namespace = arrow::acero;
38+
#else
39+
namespace arrow_acero_namespace = arrow::compute;
40+
#endif
41+
3242
#if defined(ARROW_VERSION) && ARROW_VERSION >= 10000000
3343
using AsyncGeneratorType =
3444
arrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>>;
@@ -47,17 +57,21 @@ using AsyncGeneratorType =
4757
*/
4858
Result<std::shared_ptr<arrow::Table>> ExecutePlanAndCollectAsTable(
4959
const arrow::compute::ExecContext& exec_context,
50-
std::shared_ptr<arrow::compute::ExecPlan> plan,
60+
std::shared_ptr<arrow_acero_namespace::ExecPlan> plan,
5161
std::shared_ptr<arrow::Schema> schema, AsyncGeneratorType sink_gen) {
5262
// translate sink_gen (async) to sink_reader (sync)
5363
std::shared_ptr<arrow::RecordBatchReader> sink_reader =
54-
arrow::compute::MakeGeneratorReader(schema, std::move(sink_gen),
55-
exec_context.memory_pool());
64+
arrow_acero_namespace::MakeGeneratorReader(schema, std::move(sink_gen),
65+
exec_context.memory_pool());
5666

5767
// validate the ExecPlan
5868
RETURN_NOT_ARROW_OK(plan->Validate());
5969
// start the ExecPlan
70+
#if defined(ARROW_VERSION) && ARROW_VERSION >= 12000000
71+
plan->StartProducing(); // arrow 12.0.0 or later return void, not Status
72+
#else
6073
RETURN_NOT_ARROW_OK(plan->StartProducing());
74+
#endif
6175

6276
// collect sink_reader into a Table
6377
std::shared_ptr<arrow::Table> response_table;
@@ -643,17 +657,17 @@ Result<std::shared_ptr<arrow::Table>> EdgeChunkWriter::sortTable(
643657
const std::shared_ptr<arrow::Table>& input_table,
644658
const std::string& column_name) {
645659
auto exec_context = arrow::compute::default_exec_context();
646-
auto plan = arrow::compute::ExecPlan::Make(exec_context).ValueOrDie();
660+
auto plan = arrow_acero_namespace::ExecPlan::Make(exec_context).ValueOrDie();
647661
int max_batch_size = 2;
648-
auto table_source_options =
649-
arrow::compute::TableSourceNodeOptions{input_table, max_batch_size};
650-
auto source = arrow::compute::MakeExecNode("table_source", plan.get(), {},
651-
table_source_options)
662+
auto table_source_options = arrow_acero_namespace::TableSourceNodeOptions{
663+
input_table, max_batch_size};
664+
auto source = arrow_acero_namespace::MakeExecNode("table_source", plan.get(),
665+
{}, table_source_options)
652666
.ValueOrDie();
653667
AsyncGeneratorType sink_gen;
654-
if (!arrow::compute::MakeExecNode(
668+
if (!arrow_acero_namespace::MakeExecNode(
655669
"order_by_sink", plan.get(), {source},
656-
arrow::compute::OrderBySinkNodeOptions{
670+
arrow_acero_namespace::OrderBySinkNodeOptions{
657671
arrow::compute::SortOptions{{arrow::compute::SortKey{
658672
column_name, arrow::compute::SortOrder::Ascending}}},
659673
&sink_gen})

0 commit comments

Comments
 (0)