From 8bab804690fef9d60f57271f19b52451bcf25d79 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sat, 29 Dec 2018 17:41:28 +0900 Subject: [PATCH] [GLib] Add more GArrowTable constructors --- c_glib/arrow-glib/composite-array.h | 2 + c_glib/arrow-glib/orc-file-reader.h | 4 +- c_glib/arrow-glib/table.cpp | 204 +++++++++++++++++++++++++++- c_glib/arrow-glib/table.h | 33 ++++- c_glib/arrow-glib/version.h.in | 23 ++++ c_glib/test/test-table.rb | 61 +++++++-- 6 files changed, 310 insertions(+), 17 deletions(-) diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index c634dbfc3b006..10432e2e56ba3 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -130,8 +130,10 @@ GArrowStructArray *garrow_struct_array_new(GArrowDataType *data_type, GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array, gint i); +#ifndef GARROW_DISABLE_DEPRECATED GARROW_DEPRECATED_IN_0_10_FOR(garrow_struct_array_flatten) GList *garrow_struct_array_get_fields(GArrowStructArray *array); +#endif GARROW_AVAILABLE_IN_0_10 GList *garrow_struct_array_flatten(GArrowStructArray *array, GError **error); diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h index 9b2dbadefe43a..97cf1efa92ff7 100644 --- a/c_glib/arrow-glib/orc-file-reader.h +++ b/c_glib/arrow-glib/orc-file-reader.h @@ -39,7 +39,7 @@ garrow_orc_file_reader_new(GArrowSeekableInputStream *file, GError **error); #ifndef GARROW_DISABLE_DEPRECATED -G_GNUC_DEPRECATED_FOR(garrow_orc_file_reader_set_field_indices) +GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices) void garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, const gint *field_indexes, @@ -50,7 +50,7 @@ garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, const gint *field_indices, guint n_field_indices); #ifndef GARROW_DISABLE_DEPRECATED -G_GNUC_DEPRECATED_FOR(garrow_orc_file_reader_get_field_indices) +GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_get_field_indices) const gint * garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, guint *n_field_indexes); diff --git a/c_glib/arrow-glib/table.cpp b/c_glib/arrow-glib/table.cpp index f9e1b951a3658..b889eb2c9da23 100644 --- a/c_glib/arrow-glib/table.cpp +++ b/c_glib/arrow-glib/table.cpp @@ -21,8 +21,10 @@ # include #endif +#include #include #include +#include #include #include @@ -133,22 +135,218 @@ garrow_table_class_init(GArrowTableClass *klass) * @columns: (element-type GArrowColumn): The columns of the table. * * Returns: A newly created #GArrowTable. + * + * Deprecated: 0.12.0: Use garrow_table_new_values() instead. */ GArrowTable * garrow_table_new(GArrowSchema *schema, GList *columns) { + auto arrow_schema = garrow_schema_get_raw(schema); std::vector> arrow_columns; for (GList *node = columns; node; node = node->next) { - GArrowColumn *column = GARROW_COLUMN(node->data); + auto column = GARROW_COLUMN(node->data); arrow_columns.push_back(garrow_column_get_raw(column)); } - auto arrow_table = - arrow::Table::Make(garrow_schema_get_raw(schema), arrow_columns); + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns); return garrow_table_new_raw(&arrow_table); } +/** + * garrow_table_new_values: (skip) + * @schema: The schema of the table. + * @values: The values of the table. All values must be instance of the + * same class. Available classes are #GArrowColumn, #GArrowArray and + * #GArrowRecordBatch. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_values(GArrowSchema *schema, + GList *values, + GError **error) +{ + const auto context = "[table][new][values]"; + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector> arrow_columns; + std::vector> arrow_arrays; + std::vector> arrow_record_batches; + for (GList *node = values; node; node = node->next) { + if (GARROW_IS_COLUMN(node->data)) { + auto column = GARROW_COLUMN(node->data); + arrow_columns.push_back(garrow_column_get_raw(column)); + } else if (GARROW_IS_ARRAY(node->data)) { + auto array = GARROW_ARRAY(node->data); + arrow_arrays.push_back(garrow_array_get_raw(array)); + } else if (GARROW_IS_RECORD_BATCH(node->data)) { + auto record_batch = GARROW_RECORD_BATCH(node->data); + arrow_record_batches.push_back(garrow_record_batch_get_raw(record_batch)); + } else { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: %s", + context, + "value must be one of " + "GArrowColumn, GArrowArray and GArrowRecordBatch"); + return NULL; + } + } + + size_t n_types = 0; + if (!arrow_columns.empty()) { + ++n_types; + } + if (!arrow_arrays.empty()) { + ++n_types; + } + if (!arrow_record_batches.empty()) { + ++n_types; + } + if (n_types > 1) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "%s: %s", + context, + "all values must be the same objects of " + "GArrowColumn, GArrowArray or GArrowRecordBatch"); + return NULL; + } + + if (!arrow_columns.empty()) { + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, context)) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } else if (!arrow_arrays.empty()) { + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, context)) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } else { + std::shared_ptr arrow_table; + auto status = arrow::Table::FromRecordBatches(arrow_schema, + arrow_record_batches, + &arrow_table); + if (garrow_error_check(error, status, context)) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } + } +} + +/** + * garrow_table_new_columns: + * @schema: The schema of the table. + * @columns: (array length=n_columns): The columns of the table. + * @n_columns: The number of columns. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_columns(GArrowSchema *schema, + GArrowColumn **columns, + gsize n_columns, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector> arrow_columns; + for (gsize i = 0; i < n_columns; ++i) { + arrow_columns.push_back(garrow_column_get_raw(columns[i])); + } + + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, "[table][new][columns]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * garrow_table_new_arrays: + * @schema: The schema of the table. + * @arrays: (array length=n_arrays): The arrays of the table. + * @n_arrays: The number of arrays. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_arrays(GArrowSchema *schema, + GArrowArray **arrays, + gsize n_arrays, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector> arrow_arrays; + for (gsize i = 0; i < n_arrays; ++i) { + arrow_arrays.push_back(garrow_array_get_raw(arrays[i])); + } + + auto arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays); + auto status = arrow_table->Validate(); + if (garrow_error_check(error, status, "[table][new][arrays]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + +/** + * garrow_table_new_record_batches: + * @schema: The schema of the table. + * @record_batches: (array length=n_record_batches): The record batches + * that have data for the table. + * @n_record_batches: The number of record batches. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowTable or %NULL on error. + * + * Since: 0.12.0 + */ +GArrowTable * +garrow_table_new_record_batches(GArrowSchema *schema, + GArrowRecordBatch **record_batches, + gsize n_record_batches, + GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + std::vector> arrow_record_batches; + for (gsize i = 0; i < n_record_batches; ++i) { + auto arrow_record_batch = garrow_record_batch_get_raw(record_batches[i]); + arrow_record_batches.push_back(arrow_record_batch); + } + + std::shared_ptr arrow_table; + auto status = arrow::Table::FromRecordBatches(arrow_schema, + arrow_record_batches, + &arrow_table); + if (garrow_error_check(error, status, "[table][new][record-batches]")) { + return garrow_table_new_raw(&arrow_table); + } else { + return NULL; + } +} + /** * garrow_table_equal: * @table: A #GArrowTable. diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h index ef7b0f5c289ce..bde2535033c7d 100644 --- a/c_glib/arrow-glib/table.h +++ b/c_glib/arrow-glib/table.h @@ -20,7 +20,9 @@ #pragma once #include +#include #include +#include G_BEGIN_DECLS @@ -35,8 +37,35 @@ struct _GArrowTableClass GObjectClass parent_class; }; -GArrowTable *garrow_table_new (GArrowSchema *schema, - GList *columns); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_table_new_values) +GArrowTable * +garrow_table_new(GArrowSchema *schema, + GList *columns); +#endif +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_values(GArrowSchema *schema, + GList *values, + GError **error); +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_columns(GArrowSchema *schema, + GArrowColumn **columns, + gsize n_columns, + GError **error); +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_arrays(GArrowSchema *schema, + GArrowArray **arrays, + gsize n_arrays, + GError **error); +GARROW_AVAILABLE_IN_0_12 +GArrowTable * +garrow_table_new_record_batches(GArrowSchema *schema, + GArrowRecordBatch **record_batches, + gsize n_record_batches, + GError **error); gboolean garrow_table_equal (GArrowTable *table, GArrowTable *other_table); diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in index eb734250e2352..501827d06e054 100644 --- a/c_glib/arrow-glib/version.h.in +++ b/c_glib/arrow-glib/version.h.in @@ -110,6 +110,15 @@ # define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif +/** + * GARROW_VERSION_0_12: + * + * You can use this macro value for compile time API version check. + * + * Since: 0.12.0 + */ +#define GARROW_VERSION_0_12 G_ENCODE_VERSION(0, 12) + /** * GARROW_VERSION_0_10: * @@ -166,6 +175,20 @@ #define GARROW_AVAILABLE_IN_ALL +#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_12 +# define GARROW_DEPRECATED_IN_0_12 GARROW_DEPRECATED +# define GARROW_DEPRECATED_IN_0_12_FOR(function) GARROW_DEPRECATED_FOR(function) +#else +# define GARROW_DEPRECATED_IN_0_12 +# define GARROW_DEPRECATED_IN_0_12_FOR(function) +#endif + +#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_12 +# define GARROW_AVAILABLE_IN_0_12 GARROW_UNAVAILABLE(0, 12) +#else +# define GARROW_AVAILABLE_IN_0_12 +#endif + #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_10 # define GARROW_DEPRECATED_IN_0_10 GARROW_DEPRECATED # define GARROW_DEPRECATED_IN_0_10_FOR(function) GARROW_DEPRECATED_FOR(function) diff --git a/c_glib/test/test-table.rb b/c_glib/test/test-table.rb index 4394ad1353e7d..871e0d7c5ffd4 100644 --- a/c_glib/test/test-table.rb +++ b/c_glib/test/test-table.rb @@ -17,21 +17,19 @@ class TestTable < Test::Unit::TestCase include Helper::Buildable + include Helper::Omittable sub_test_case(".new") do - def test_columns - fields = [ + def setup + @fields = [ Arrow::Field.new("visible", Arrow::BooleanDataType.new), Arrow::Field.new("valid", Arrow::BooleanDataType.new), ] - schema = Arrow::Schema.new(fields) - columns = [ - Arrow::Column.new(fields[0], build_boolean_array([true])), - Arrow::Column.new(fields[1], build_boolean_array([false])), - ] - table = Arrow::Table.new(schema, columns) + @schema = Arrow::Schema.new(@fields) + end - data = table.n_columns.times.collect do |i| + def dump_table(table) + table.n_columns.times.collect do |i| column = table.get_column(i) values = [] column.data.chunks.each do |chunk| @@ -44,11 +42,54 @@ def test_columns values, ] end + end + + def test_columns + columns = [ + Arrow::Column.new(@fields[0], build_boolean_array([true])), + Arrow::Column.new(@fields[1], build_boolean_array([false])), + ] + table = Arrow::Table.new(@schema, columns) assert_equal([ ["visible", [true]], ["valid", [false]], ], - data) + dump_table(table)) + end + + def test_arrays + require_gi_bindings(3, 3, 1) + arrays = [ + build_boolean_array([true]), + build_boolean_array([false]), + ] + table = Arrow::Table.new(@schema, arrays) + assert_equal([ + ["visible", [true]], + ["valid", [false]], + ], + dump_table(table)) + end + + def test_record_batches + require_gi_bindings(3, 3, 1) + record_batches = [ + build_record_batch({ + "visible" => build_boolean_array([true]), + "valid" => build_boolean_array([false]) + }), + build_record_batch({ + "visible" => build_boolean_array([false]), + "valid" => build_boolean_array([true]) + }), + ] + table = Arrow::Table.new(@schema, record_batches) + + assert_equal([ + ["visible", [true, false]], + ["valid", [false, true]], + ], + dump_table(table)) end end