Skip to content

Commit

Permalink
GH-15280: [C++][Python][GLib] add libarrow_acero containing everythin…
Browse files Browse the repository at this point in the history
…g previously in compute/exec (#34711)

### Rationale for this change

See the linked issue

### What changes are included in this PR?

C++:
* remove all compute/exec/* from libarrow
* rename compute/exec -> acero and make libarrow_acero
* add new ARROW_ACERO option, required if ARROW_DATASET is on
* libarrow_dataset now depends on libarrow_acero

c_glib: add the new libarrow_acero dependency - we disallow building glib without it

python: added PYARROW_BUILD_ACERO, set to on if DATASETS are built

### Are these changes tested?

All the standard tests do work properly.

I manually compiled C++ with:
* no ARROW_ACERO
* ARROW_ACERO and no ARROW_DATASET
* ARROW_ACERO and ARROW_DATASET and no ARROW_SUBSTRAIT 

I manually compiled python without ACERO & DATASET and with ACERO and without DATASET

### Are there any user-facing changes?

If users include compute/exec files directly then they'll have to update their code.

* Closes: #15280

Lead-authored-by: Davide Pasetto <dpasetto69@gmail.com>
Co-authored-by: Li Jin <ice.xelloss@gmail.com>
Co-authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Li Jin <ice.xelloss@gmail.com>
  • Loading branch information
3 people authored Apr 1, 2023
1 parent 7e19111 commit f137f29
Show file tree
Hide file tree
Showing 202 changed files with 2,783 additions and 1,661 deletions.
62 changes: 31 additions & 31 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
#include <arrow-glib/schema.hpp>
#include <arrow-glib/table.hpp>

#include <arrow/compute/exec/exec_plan.h>
#include <arrow/compute/exec/options.h>
#include <arrow/acero/exec_plan.h>
#include <arrow/acero/options.h>

template <typename ArrowType, typename GArrowArrayType>
typename ArrowType::c_type
Expand Down Expand Up @@ -817,7 +817,7 @@ garrow_function_to_string(GArrowFunction *function)


typedef struct GArrowExecuteNodeOptionsPrivate_ {
arrow::compute::ExecNodeOptions *options;
arrow::acero::ExecNodeOptions *options;
} GArrowExecuteNodeOptionsPrivate;

enum {
Expand Down Expand Up @@ -852,7 +852,7 @@ garrow_execute_node_options_set_property(GObject *object,
switch (prop_id) {
case PROP_FUNCTION:
priv->options =
static_cast<arrow::compute::ExecNodeOptions *>(g_value_get_pointer(value));
static_cast<arrow::acero::ExecNodeOptions *>(g_value_get_pointer(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
Expand All @@ -877,7 +877,7 @@ garrow_execute_node_options_class_init(GArrowExecuteNodeOptionsClass *klass)
GParamSpec *spec;
spec = g_param_spec_pointer("options",
"Options",
"The raw arrow::compute::ExecNodeOptions *",
"The raw arrow::acero::ExecNodeOptions *",
static_cast<GParamFlags>(G_PARAM_WRITABLE |
G_PARAM_CONSTRUCT_ONLY));
g_object_class_install_property(gobject_class,
Expand Down Expand Up @@ -988,7 +988,7 @@ garrow_source_node_options_new_record_batch_reader(
GArrowRecordBatchReader *reader)
{
auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
auto arrow_options = new arrow::compute::SourceNodeOptions(
auto arrow_options = new arrow::acero::SourceNodeOptions(
arrow_reader->schema(),
[arrow_reader]() {
using ExecBatch = arrow::compute::ExecBatch;
Expand Down Expand Up @@ -1029,7 +1029,7 @@ garrow_source_node_options_new_record_batch(GArrowRecordBatch *record_batch)
auto state = std::make_shared<State>();
state->record_batch = garrow_record_batch_get_raw(record_batch);
state->generated = false;
auto arrow_options = new arrow::compute::SourceNodeOptions(
auto arrow_options = new arrow::acero::SourceNodeOptions(
state->record_batch->schema(),
[state]() {
using ExecBatch = arrow::compute::ExecBatch;
Expand Down Expand Up @@ -1095,7 +1095,7 @@ garrow_filter_node_options_new(GArrowExpression *expression)
{
auto arrow_expression = garrow_expression_get_raw(expression);
auto arrow_options =
new arrow::compute::FilterNodeOptions(*arrow_expression);
new arrow::acero::FilterNodeOptions(*arrow_expression);
auto options = g_object_new(GARROW_TYPE_FILTER_NODE_OPTIONS,
"options", arrow_options,
NULL);
Expand Down Expand Up @@ -1150,7 +1150,7 @@ garrow_project_node_options_new(GList *expressions,
}
}
auto arrow_options =
new arrow::compute::ProjectNodeOptions(arrow_expressions, arrow_names);
new arrow::acero::ProjectNodeOptions(arrow_expressions, arrow_names);
auto options = g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS,
"options", arrow_options,
NULL);
Expand Down Expand Up @@ -1430,7 +1430,7 @@ garrow_aggregate_node_options_new(GList *aggregations,
}
}
auto arrow_options =
new arrow::compute::AggregateNodeOptions(std::move(arrow_aggregates),
new arrow::acero::AggregateNodeOptions(std::move(arrow_aggregates),
std::move(arrow_keys));
auto options = g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS,
"options", arrow_options,
Expand Down Expand Up @@ -1500,7 +1500,7 @@ garrow_sink_node_options_new(void)
{
auto options = g_object_new(GARROW_TYPE_SINK_NODE_OPTIONS, NULL);
auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(options);
auto arrow_options = new arrow::compute::SinkNodeOptions(&(priv->generator));
auto arrow_options = new arrow::acero::SinkNodeOptions(&(priv->generator));
auto execute_node_options_priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(options);
execute_node_options_priv->options = arrow_options;
return GARROW_SINK_NODE_OPTIONS(options);
Expand All @@ -1523,7 +1523,7 @@ garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options,
auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(options);
if (!priv->reader) {
auto arrow_reader =
arrow::compute::MakeGeneratorReader(arrow_schema,
arrow::acero::MakeGeneratorReader(arrow_schema,
std::move(priv->generator),
arrow::default_memory_pool());
priv->reader = garrow_record_batch_reader_new_raw(&arrow_reader);
Expand Down Expand Up @@ -1570,7 +1570,7 @@ garrow_hash_join_node_options_new(GArrowJoinType type,
gsize n_right_keys,
GError **error)
{
auto arrow_type = static_cast<arrow::compute::JoinType>(type);
auto arrow_type = static_cast<arrow::acero::JoinType>(type);
std::vector<arrow::FieldRef> arrow_left_keys;
for (gsize i = 0; i < n_left_keys; ++i) {
if (!garrow_field_refs_add(arrow_left_keys,
Expand All @@ -1590,7 +1590,7 @@ garrow_hash_join_node_options_new(GArrowJoinType type,
}
}
auto arrow_options =
new arrow::compute::HashJoinNodeOptions(arrow_type,
new arrow::acero::HashJoinNodeOptions(arrow_type,
std::move(arrow_left_keys),
std::move(arrow_right_keys));
auto options = g_object_new(GARROW_TYPE_HASH_JOIN_NODE_OPTIONS,
Expand Down Expand Up @@ -1618,7 +1618,7 @@ garrow_hash_join_node_options_set_left_outputs(
GError **error)
{
auto arrow_options =
static_cast<arrow::compute::HashJoinNodeOptions *>(
static_cast<arrow::acero::HashJoinNodeOptions *>(
garrow_execute_node_options_get_raw(
GARROW_EXECUTE_NODE_OPTIONS(options)));
arrow_options->output_all = false;
Expand Down Expand Up @@ -1653,7 +1653,7 @@ garrow_hash_join_node_options_set_right_outputs(
GError **error)
{
auto arrow_options =
static_cast<arrow::compute::HashJoinNodeOptions *>(
static_cast<arrow::acero::HashJoinNodeOptions *>(
garrow_execute_node_options_get_raw(
GARROW_EXECUTE_NODE_OPTIONS(options)));
arrow_options->output_all = false;
Expand All @@ -1671,7 +1671,7 @@ garrow_hash_join_node_options_set_right_outputs(


typedef struct GArrowExecuteNodePrivate_ {
arrow::compute::ExecNode *node;
arrow::acero::ExecNode *node;
} GArrowExecuteNodePrivate;

enum {
Expand All @@ -1698,7 +1698,7 @@ garrow_execute_node_set_property(GObject *object,
switch (prop_id) {
case PROP_NODE:
priv->node =
static_cast<arrow::compute::ExecNode *>(g_value_get_pointer(value));
static_cast<arrow::acero::ExecNode *>(g_value_get_pointer(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
Expand All @@ -1720,7 +1720,7 @@ garrow_execute_node_class_init(GArrowExecuteNodeClass *klass)
GParamSpec *spec;
spec = g_param_spec_pointer("node",
"Node",
"The raw arrow::compute::ExecNode *",
"The raw arrow::acero::ExecNode *",
static_cast<GParamFlags>(G_PARAM_WRITABLE |
G_PARAM_CONSTRUCT_ONLY));
g_object_class_install_property(gobject_class, PROP_NODE, spec);
Expand Down Expand Up @@ -1759,7 +1759,7 @@ garrow_execute_node_get_output_schema(GArrowExecuteNode *node)


typedef struct GArrowExecutePlanPrivate_ {
std::shared_ptr<arrow::compute::ExecPlan> plan;
std::shared_ptr<arrow::acero::ExecPlan> plan;
} GArrowExecutePlanPrivate;

enum {
Expand Down Expand Up @@ -1794,7 +1794,7 @@ garrow_execute_plan_set_property(GObject *object,
switch (prop_id) {
case PROP_PLAN:
priv->plan =
*static_cast<std::shared_ptr<arrow::compute::ExecPlan> *>(
*static_cast<std::shared_ptr<arrow::acero::ExecPlan> *>(
g_value_get_pointer(value));
break;
default:
Expand All @@ -1807,7 +1807,7 @@ static void
garrow_execute_plan_init(GArrowExecutePlan *object)
{
auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(object);
new(&(priv->plan)) std::shared_ptr<arrow::compute::ExecPlan>;
new(&(priv->plan)) std::shared_ptr<arrow::acero::ExecPlan>;
}

static void
Expand All @@ -1820,7 +1820,7 @@ garrow_execute_plan_class_init(GArrowExecutePlanClass *klass)
GParamSpec *spec;
spec = g_param_spec_pointer("plan",
"Plan",
"The raw std::shared_ptr<arrow::compute::ExecPlan>",
"The raw std::shared_ptr<arrow::acero::ExecPlan>",
static_cast<GParamFlags>(G_PARAM_WRITABLE |
G_PARAM_CONSTRUCT_ONLY));
g_object_class_install_property(gobject_class, PROP_PLAN, spec);
Expand All @@ -1838,7 +1838,7 @@ garrow_execute_plan_class_init(GArrowExecutePlanClass *klass)
GArrowExecutePlan *
garrow_execute_plan_new(GError **error)
{
auto arrow_plan_result = arrow::compute::ExecPlan::Make();
auto arrow_plan_result = arrow::acero::ExecPlan::Make();
if (garrow::check(error, arrow_plan_result, "[execute-plan][new]")) {
return GARROW_EXECUTE_PLAN(g_object_new(GARROW_TYPE_EXECUTE_PLAN,
"plan", &(*arrow_plan_result),
Expand Down Expand Up @@ -1869,14 +1869,14 @@ garrow_execute_plan_build_node(GArrowExecutePlan *plan,
GError **error)
{
auto arrow_plan = garrow_execute_plan_get_raw(plan);
std::vector<arrow::compute::ExecNode *> arrow_inputs;
std::vector<arrow::acero::ExecNode *> arrow_inputs;
for (auto node = inputs; node; node = node->next) {
auto arrow_node =
garrow_execute_node_get_raw(GARROW_EXECUTE_NODE(node->data));
arrow_inputs.push_back(arrow_node);
}
auto arrow_options = garrow_execute_node_options_get_raw(options);
auto arrow_node_result = arrow::compute::MakeExecNode(factory_name,
auto arrow_node_result = arrow::acero::MakeExecNode(factory_name,
arrow_plan.get(),
arrow_inputs,
*arrow_options);
Expand Down Expand Up @@ -5914,15 +5914,15 @@ garrow_function_get_raw(GArrowFunction *function)

GArrowExecuteNodeOptions *
garrow_execute_node_options_new_raw(
arrow::compute::ExecNodeOptions *arrow_options)
arrow::acero::ExecNodeOptions *arrow_options)
{
return GARROW_EXECUTE_NODE_OPTIONS(
g_object_new(GARROW_TYPE_EXECUTE_NODE_OPTIONS,
"options", arrow_options,
NULL));
}

arrow::compute::ExecNodeOptions *
arrow::acero::ExecNodeOptions *
garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options)
{
auto priv = GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(options);
Expand All @@ -5931,22 +5931,22 @@ garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options)


GArrowExecuteNode *
garrow_execute_node_new_raw(arrow::compute::ExecNode *arrow_node)
garrow_execute_node_new_raw(arrow::acero::ExecNode *arrow_node)
{
return GARROW_EXECUTE_NODE(g_object_new(GARROW_TYPE_EXECUTE_NODE,
"node", arrow_node,
NULL));
}

arrow::compute::ExecNode *
arrow::acero::ExecNode *
garrow_execute_node_get_raw(GArrowExecuteNode *node)
{
auto priv = GARROW_EXECUTE_NODE_GET_PRIVATE(node);
return priv->node;
}


std::shared_ptr<arrow::compute::ExecPlan>
std::shared_ptr<arrow::acero::ExecPlan>
garrow_execute_plan_get_raw(GArrowExecutePlan *plan)
{
auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(plan);
Expand Down
12 changes: 6 additions & 6 deletions c_glib/arrow-glib/compute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#pragma once

#include <arrow/compute/api.h>
#include <arrow/compute/exec/exec_plan.h>
#include <arrow/acero/exec_plan.h>

#include <arrow-glib/compute.h>

Expand Down Expand Up @@ -53,18 +53,18 @@ garrow_function_get_raw(GArrowFunction *function);

GArrowExecuteNodeOptions *
garrow_execute_node_options_new_raw(
arrow::compute::ExecNodeOptions *arrow_options);
arrow::compute::ExecNodeOptions *
arrow::acero::ExecNodeOptions *arrow_options);
arrow::acero::ExecNodeOptions *
garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options);


GArrowExecuteNode *
garrow_execute_node_new_raw(arrow::compute::ExecNode *arrow_node);
arrow::compute::ExecNode *
garrow_execute_node_new_raw(arrow::acero::ExecNode *arrow_node);
arrow::acero::ExecNode *
garrow_execute_node_get_raw(GArrowExecuteNode *node);


std::shared_ptr<arrow::compute::ExecPlan>
std::shared_ptr<arrow::acero::ExecPlan>
garrow_execute_plan_get_raw(GArrowExecutePlan *plan);


Expand Down
1 change: 1 addition & 0 deletions c_glib/arrow-glib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ if not gio.found()
endif
dependencies = [
arrow,
arrow_acero,
gobject,
gio,
]
Expand Down
5 changes: 5 additions & 0 deletions c_glib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ if arrow_cpp_build_lib_dir == ''

have_arrow_orc = dependency('arrow-orc', required: false).found()
arrow_cuda = dependency('arrow-cuda', required: false)
# we do not support compiling glib without acero engine
arrow_acero = dependency('arrow-acero', required: true)
arrow_dataset = dependency('arrow-dataset', required: false)
arrow_flight = dependency('arrow-flight', required: false)
arrow_flight_sql = dependency('arrow-flight-sql', required: false)
Expand Down Expand Up @@ -116,6 +118,9 @@ main(void)
arrow_cuda = cpp_compiler.find_library('arrow_cuda',
dirs: [arrow_cpp_build_lib_dir],
required: false)
arrow_acero = cpp_compiler.find_library('arrow_acero',
dirs: [arrow_cpp_build_lib_dir],
required: true)
arrow_dataset = cpp_compiler.find_library('arrow_dataset',
dirs: [arrow_cpp_build_lib_dir],
required: false)
Expand Down
2 changes: 2 additions & 0 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pushd cpp\build
@rem and enable runtime assertions.

cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_ACERO=ON ^
-DARROW_BOOST_USE_SHARED=ON ^
-DARROW_BUILD_EXAMPLES=ON ^
-DARROW_BUILD_STATIC=OFF ^
Expand Down Expand Up @@ -109,6 +110,7 @@ pushd python
set PYARROW_CMAKE_GENERATOR=%GENERATOR%
set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS%
set PYARROW_PARALLEL=2
set PYARROW_WITH_ACERO=ON
set PYARROW_WITH_DATASET=ON
set PYARROW_WITH_FLIGHT=%ARROW_BUILD_FLIGHT%
set PYARROW_WITH_GANDIVA=%ARROW_BUILD_GANDIVA%
Expand Down
Loading

0 comments on commit f137f29

Please sign in to comment.