From cfafd5fb491318aa2a97577e86b1d209cd05ed2c Mon Sep 17 00:00:00 2001 From: takuya kodama Date: Tue, 5 Sep 2023 09:36:36 +0800 Subject: [PATCH] GH-33749: [Ruby] Add Arrow::RecordBatch#each_raw_record (#37137) ### Rationale for this change This change allows for efficient iteration over large datasets, particularly those utilizing the Apache Parquet format. ### What changes are included in this PR? - Add the following methods to make the raw_records method iterable. - Arrow::RecordBatch#each_raw_record - Arrow::Table#each_raw_record - Add related test ### Are these changes tested? Yes. ### Are there any user-facing changes? No. This PR is related to #33749 * Closes: #33749 Lead-authored-by: otegami Co-authored-by: takuya kodama Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ruby/red-arrow/ext/arrow/arrow.cpp | 6 + ruby/red-arrow/ext/arrow/raw-records.cpp | 152 ++++ ruby/red-arrow/ext/arrow/red-arrow.hpp | 2 + .../test/each-raw-record/test-basic-arrays.rb | 531 ++++++++++++ .../each-raw-record/test-dense-union-array.rb | 706 ++++++++++++++++ .../each-raw-record/test-dictionary-array.rb | 457 +++++++++++ .../test/each-raw-record/test-list-array.rb | 771 ++++++++++++++++++ .../test/each-raw-record/test-map-array.rb | 647 +++++++++++++++ .../each-raw-record/test-multiple-columns.rb | 80 ++ .../test-sparse-union-array.rb | 662 +++++++++++++++ .../test/each-raw-record/test-struct-array.rb | 669 +++++++++++++++ .../test/each-raw-record/test-table.rb | 51 ++ .../test/raw-records/test-basic-arrays.rb | 2 +- 13 files changed, 4735 insertions(+), 1 deletion(-) create mode 100644 ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-list-array.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-map-array.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-struct-array.rb create mode 100644 ruby/red-arrow/test/each-raw-record/test-table.rb diff --git a/ruby/red-arrow/ext/arrow/arrow.cpp b/ruby/red-arrow/ext/arrow/arrow.cpp index 8eb3b610090b5..404ec8996f232 100644 --- a/ruby/red-arrow/ext/arrow/arrow.cpp +++ b/ruby/red-arrow/ext/arrow/arrow.cpp @@ -82,11 +82,17 @@ extern "C" void Init_arrow() { rb_define_method(cArrowRecordBatch, "raw_records", reinterpret_cast(red_arrow::record_batch_raw_records), 0); + rb_define_method(cArrowRecordBatch, "each_raw_record", + reinterpret_cast(red_arrow::record_batch_each_raw_record), + 0); auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table")); rb_define_method(cArrowTable, "raw_records", reinterpret_cast(red_arrow::table_raw_records), 0); + rb_define_method(cArrowTable, "each_raw_record", + reinterpret_cast(red_arrow::table_each_raw_record), + 0); red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date")); diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp b/ruby/red-arrow/ext/arrow/raw-records.cpp index e0326f9d2fdb7..0043ca3aaf2cc 100644 --- a/ruby/red-arrow/ext/arrow/raw-records.cpp +++ b/ruby/red-arrow/ext/arrow/raw-records.cpp @@ -144,6 +144,128 @@ namespace red_arrow { // The number of columns. const int n_columns_; }; + + class RawRecordsProducer : private Converter, public arrow::ArrayVisitor { + public: + explicit RawRecordsProducer() + : Converter(), + record_(Qnil), + column_index_(0), + row_offset_(0) { + } + + void produce(const arrow::RecordBatch& record_batch) { + rb::protect([&] { + const auto n_columns = record_batch.num_columns(); + const auto n_rows = record_batch.num_rows(); + for (int64_t i = 0; i < n_rows; ++i) { + record_ = rb_ary_new_capa(n_columns); + row_offset_ = i; + for (int i = 0; i < n_columns; ++i) { + const auto array = record_batch.column(i).get(); + column_index_ = i; + check_status(array->Accept(this), + "[record-batch][each-raw-record]"); + } + rb_yield(record_); + } + return Qnil; + }); + } + + void produce(const arrow::Table& table) { + rb::protect([&] { + const auto n_columns = table.num_columns(); + const auto n_rows = table.num_rows(); + std::vector chunk_indexes(n_columns); + std::vector row_offsets(n_columns); + for (int64_t i_row = 0; i_row < n_rows; ++i_row) { + record_ = rb_ary_new_capa(n_columns); + for (int i_column = 0; i_column < n_columns; ++i_column) { + column_index_ = i_column; + const auto chunked_array = table.column(i_column).get(); + auto& chunk_index = chunk_indexes[i_column]; + auto& row_offset = row_offsets[i_column]; + auto array = chunked_array->chunk(chunk_index).get(); + while (array->length() == row_offset) { + ++chunk_index; + row_offset = 0; + array = chunked_array->chunk(chunk_index).get(); + } + row_offset_ = row_offset; + check_status(array->Accept(this), + "[table][each-raw-record]"); + ++row_offset; + } + rb_yield(record_); + } + + return Qnil; + }); + } + +#define VISIT(TYPE) \ + arrow::Status Visit(const arrow::TYPE ## Array& array) override { \ + convert(array); \ + return arrow::Status::OK(); \ + } + + VISIT(Null) + VISIT(Boolean) + VISIT(Int8) + VISIT(Int16) + VISIT(Int32) + VISIT(Int64) + VISIT(UInt8) + VISIT(UInt16) + VISIT(UInt32) + VISIT(UInt64) + VISIT(HalfFloat) + VISIT(Float) + VISIT(Double) + VISIT(Binary) + VISIT(String) + VISIT(FixedSizeBinary) + VISIT(Date32) + VISIT(Date64) + VISIT(Time32) + VISIT(Time64) + VISIT(Timestamp) + VISIT(MonthInterval) + VISIT(DayTimeInterval) + VISIT(MonthDayNanoInterval) + VISIT(List) + VISIT(Struct) + VISIT(Map) + VISIT(SparseUnion) + VISIT(DenseUnion) + VISIT(Dictionary) + VISIT(Decimal128) + VISIT(Decimal256) + // TODO + // VISIT(Extension) + +#undef VISIT + + private: + template + void convert(const ArrayType& array) { + auto value = Qnil; + if (!array.IsNull(row_offset_)) { + value = convert_value(array, row_offset_); + } + rb_ary_store(record_, column_index_, value); + } + + // Destination for converted record. + VALUE record_; + + // The current column index. + int column_index_; + + // The current row offset. + int64_t row_offset_; + }; } VALUE @@ -181,4 +303,34 @@ namespace red_arrow { return records; } + + VALUE + record_batch_each_raw_record(VALUE rb_record_batch){ + auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch)); + auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get(); + + try { + RawRecordsProducer producer; + producer.produce(*record_batch); + } catch (rb::State& state) { + state.jump(); + } + + return Qnil; + } + + VALUE + table_each_raw_record(VALUE rb_table) { + auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table)); + auto table = garrow_table_get_raw(garrow_table).get(); + + try { + RawRecordsProducer producer; + producer.produce(*table); + } catch (rb::State& state) { + state.jump(); + } + + return Qnil; + } } diff --git a/ruby/red-arrow/ext/arrow/red-arrow.hpp b/ruby/red-arrow/ext/arrow/red-arrow.hpp index ba578076a7e39..ffc24f9844bb0 100644 --- a/ruby/red-arrow/ext/arrow/red-arrow.hpp +++ b/ruby/red-arrow/ext/arrow/red-arrow.hpp @@ -59,6 +59,8 @@ namespace red_arrow { VALUE record_batch_raw_records(VALUE obj); VALUE table_raw_records(VALUE obj); + VALUE record_batch_each_raw_record(VALUE obj); + VALUE table_each_raw_record(VALUE obj); inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) { switch (unit) { diff --git a/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb b/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb new file mode 100644 index 0000000000000..dbbbd79ee0acb --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-basic-arrays.rb @@ -0,0 +1,531 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordBasicArraysTests + def test_null + records = [ + [nil], + [nil], + [nil], + ] + iterated_records = [] + target = build({column: :null}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_boolean + records = [ + [true], + [nil], + [false], + ] + iterated_records = [] + target = build({column: :boolean}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int8 + records = [ + [-(2 ** 7)], + [nil], + [(2 ** 7) - 1], + ] + iterated_records = [] + target = build({column: :int8}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint8 + records = [ + [0], + [nil], + [(2 ** 8) - 1], + ] + iterated_records = [] + target = build({column: :uint8}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int16 + records = [ + [-(2 ** 15)], + [nil], + [(2 ** 15) - 1], + ] + iterated_records = [] + target = build({column: :int16}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint16 + records = [ + [0], + [nil], + [(2 ** 16) - 1], + ] + iterated_records = [] + target = build({column: :uint16}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int32 + records = [ + [-(2 ** 31)], + [nil], + [(2 ** 31) - 1], + ] + iterated_records = [] + target = build({column: :int32}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint32 + records = [ + [0], + [nil], + [(2 ** 32) - 1], + ] + iterated_records = [] + target = build({column: :uint32}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int64 + records = [ + [-(2 ** 63)], + [nil], + [(2 ** 63) - 1], + ] + iterated_records = [] + target = build({column: :int64}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint64 + records = [ + [0], + [nil], + [(2 ** 64) - 1], + ] + iterated_records = [] + target = build({column: :uint64}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_half_float + records = [ + [-1.5], + [nil], + [1.5], + ] + iterated_records = [] + target = build({column: :half_float}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_float + records = [ + [-1.0], + [nil], + [1.0], + ] + iterated_records = [] + target = build({column: :float}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_double + records = [ + [-1.0], + [nil], + [1.0], + ] + iterated_records = [] + target = build({column: :double}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_binary + records = [ + ["\x00".b], + [nil], + ["\xff".b], + ] + iterated_records = [] + target = build({column: :binary}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_string + records = [ + ["Ruby"], + [nil], + ["\u3042"], # U+3042 HIRAGANA LETTER A + ] + iterated_records = [] + target = build({column: :string}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date32 + records = [ + [Date.new(1960, 1, 1)], + [nil], + [Date.new(2017, 8, 23)], + ] + iterated_records = [] + target = build({column: :date32}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date64 + records = [ + [DateTime.new(1960, 1, 1, 2, 9, 30)], + [nil], + [DateTime.new(2017, 8, 23, 14, 57, 2)], + ] + iterated_records = [] + target = build({column: :date64}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_second + records = [ + [Time.parse("1960-01-01T02:09:30Z")], + [nil], + [Time.parse("2017-08-23T14:57:02Z")], + ] + iterated_records = [] + target = build({ + column: { + type: :timestamp, + unit: :second, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_milli + records = [ + [Time.parse("1960-01-01T02:09:30.123Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987Z")], + ] + iterated_records = [] + target = build({ + column: { + type: :timestamp, + unit: :milli, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_micro + records = [ + [Time.parse("1960-01-01T02:09:30.123456Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654Z")], + ] + iterated_records = [] + target = build({ + column: { + type: :timestamp, + unit: :micro, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_nano + records = [ + [Time.parse("1960-01-01T02:09:30.123456789Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654321Z")], + ] + iterated_records = [] + target = build({ + column: { + type: :timestamp, + unit: :nano, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [Arrow::Time.new(unit, 60 * 10)], # 00:10:00 + [nil], + [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09 + ] + iterated_records = [] + target = build({ + column: { + type: :time32, + unit: :second, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123 + [nil], + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987 + ] + iterated_records = [] + target = build({ + column: { + type: :time32, + unit: :milli, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)], + [nil], + # 02:00:09.987654 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)], + ] + iterated_records = [] + target = build({ + column: { + type: :time64, + unit: :micro, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)], + [nil], + # 02:00:09.987654321 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)], + ] + iterated_records = [] + target = build({ + column: { + type: :time64, + unit: :nano, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal128 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + iterated_records = [] + target = build({ + column: { + type: :decimal128, + precision: 8, + scale: 2, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal256 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + iterated_records = [] + target = build({ + column: { + type: :decimal256, + precision: 38, + scale: 2, + } + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_interval + records = [ + [1], + [nil], + [12], + ] + iterated_records = [] + target = build({column: :month_interval}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_day_time_interval + records = [ + [{day: 1, millisecond: 100}], + [nil], + [{day: 2, millisecond: 300}], + ] + iterated_records = [] + target = build({column: :day_time_interval}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_day_nano_interval + records = [ + [{month: 1, day: 1, nanosecond: 100}], + [nil], + [{month: 2, day: 3, nanosecond: 400}], + ] + iterated_records = [] + target = build({column: :month_day_nano_interval}, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end +end + +class EachRawRecordRecordBatchBasicArraysTest< Test::Unit::TestCase + include EachRawRecordBasicArraysTests + + def build(schema, records) + Arrow::RecordBatch.new(schema, records) + end +end + +class EachRawRecordTableBasicArraysTest < Test::Unit::TestCase + include EachRawRecordBasicArraysTests + + def build(schema, records) + record_batch = Arrow::RecordBatch.new(schema, records) + # Multiple chunks + record_batches = [ + record_batch.slice(0, 2), + record_batch.slice(2, 0), # Empty chunk + record_batch.slice(2, record_batch.length - 2), + ] + Arrow::Table.new(schema, record_batches) + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb b/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb new file mode 100644 index 0000000000000..7c784cccde3a1 --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-dense-union-array.rb @@ -0,0 +1,706 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordDenseUnionArrayTests + def build_schema(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :dense_union, + fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes, + }, + } + end + + # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records) + def build_record_batch(type, records) + type_codes = [0, 1] + schema = Arrow::Schema.new(build_schema(type, type_codes)) + type_ids = [] + offsets = [] + arrays = schema.fields[0].data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = [] + records.each do |record| + column = record[0] + next if column.nil? + next unless column.key?(field.name) + sub_records << [column[field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + records.each do |record| + column = record[0] + if column.key?("0") + type_id = type_codes[0] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + elsif column.key?("1") + type_id = type_codes[1] + type_ids << type_id + offsets << (type_ids.count(type_id) - 1) + end + end + union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), + Arrow::Int32Array.new(offsets), + arrays) + schema = Arrow::Schema.new(column: union_array.value_data_type) + Arrow::RecordBatch.new(schema, + records.size, + [union_array]) + end + + def remove_field_names(records) + records.collect do |record| + record.collect do |column| + if column.nil? + column + else + column.values[0] + end + end + end + end + + def test_null + records = [ + [{"0" => nil}], + ] + iterated_records = [] + target = build(:null, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_boolean + records = [ + [{"0" => true}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:boolean, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_int8 + records = [ + [{"0" => -(2 ** 7)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_uint8 + records = [ + [{"0" => (2 ** 8) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_int16 + records = [ + [{"0" => -(2 ** 15)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_uint16 + records = [ + [{"0" => (2 ** 16) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_int32 + records = [ + [{"0" => -(2 ** 31)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_uint32 + records = [ + [{"0" => (2 ** 32) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_int64 + records = [ + [{"0" => -(2 ** 63)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_uint64 + records = [ + [{"0" => (2 ** 64) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_float + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:float, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_double + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:double, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_binary + records = [ + [{"0" => "\xff".b}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:binary, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_string + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:string, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_date32 + records = [ + [{"0" => Date.new(1960, 1, 1)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:date32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + target.raw_records) + end + + def test_date64 + records = [ + [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:date64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_timestamp_second + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_timestamp_milli + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_timestamp_micro + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_timestamp_nano + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"0" => Arrow::Time.new(unit, 60 * 10)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_decimal128 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_decimal256 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_month_interval + records = [ + [{"0" => 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:month_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_day_time_interval + records = [ + [{"0" => {day: 1, millisecond: 100}}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:day_time_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_month_day_nano_interval + records = [ + [{"0" => {month: 1, day: 1, nanosecond: 100}}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:month_day_nano_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_list + records = [ + [{"0" => [true, nil, false]}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_struct + records = [ + [{"0" => {"sub_field" => true}}], + [{"1" => nil}], + [{"0" => {"sub_field" => nil}}], + ] + iterated_records = [] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_map + records = [ + [{"0" => {"key1" => true, "key2" => nil}}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end + + def test_sparse_union + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => 29}}], + [{"0" => {"field2" => nil}}], + ] + iterated_records = [] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(remove_field_names(records)), + iterated_records) + end + + def test_dense_union + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => 29}}], + [{"0" => {"field2" => nil}}], + ] + iterated_records = [] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(remove_field_names(records)), + iterated_records) + end + + def test_dictionary + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + [{"0" => "GLib"}], + ] + iterated_records = [] + target = build({ + type: :dictionary, + index_data_type: :int8, + value_data_type: :string, + ordered: false, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), + iterated_records) + end +end + +class EachRawRecordRecordBatchDenseUnionArrayTest < Test::Unit::TestCase + include EachRawRecordDenseUnionArrayTests + + def build(type, records) + build_record_batch(type, records) + end +end + +class EachRawRecordTableDenseUnionArrayTest < Test::Unit::TestCase + include EachRawRecordDenseUnionArrayTests + + def build(type, records) + build_record_batch(type, records).to_table + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb b/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb new file mode 100644 index 0000000000000..edc6c33cc091d --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-dictionary-array.rb @@ -0,0 +1,457 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordDictionaryArrayTests + def build_record_batch(array) + dictionary = array.dictionary_encode + schema = Arrow::Schema.new(column: dictionary.value_data_type) + Arrow::RecordBatch.new(schema, array.length, [dictionary]) + end + + def test_null + records = [ + [nil], + [nil], + [nil], + [nil], + ] + iterated_records = [] + target = build(Arrow::NullArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_boolean + records = [ + [true], + [nil], + [false], + ] + iterated_records = [] + target = build(Arrow::BooleanArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int8 + records = [ + [-(2 ** 7)], + [nil], + [(2 ** 7) - 1], + ] + iterated_records = [] + target = build(Arrow::Int8Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint8 + records = [ + [0], + [nil], + [(2 ** 8) - 1], + ] + iterated_records = [] + target = build(Arrow::UInt8Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int16 + records = [ + [-(2 ** 15)], + [nil], + [(2 ** 15) - 1], + ] + iterated_records = [] + target = build(Arrow::Int16Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint16 + records = [ + [0], + [nil], + [(2 ** 16) - 1], + ] + iterated_records = [] + target = build(Arrow::UInt16Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int32 + records = [ + [-(2 ** 31)], + [nil], + [(2 ** 31) - 1], + ] + iterated_records = [] + target = build(Arrow::Int32Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint32 + records = [ + [0], + [nil], + [(2 ** 32) - 1], + ] + iterated_records = [] + target = build(Arrow::UInt32Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int64 + records = [ + [-(2 ** 63)], + [nil], + [(2 ** 63) - 1], + ] + iterated_records = [] + target = build(Arrow::Int64Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint64 + records = [ + [0], + [nil], + [(2 ** 64) - 1], + ] + iterated_records = [] + target = build(Arrow::UInt64Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_float + records = [ + [-1.0], + [nil], + [1.0], + ] + iterated_records = [] + target = build(Arrow::FloatArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_double + records = [ + [-1.0], + [nil], + [1.0], + ] + iterated_records = [] + target = build(Arrow::DoubleArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_binary + records = [ + ["\x00".b], + [nil], + ["\xff".b], + ] + iterated_records = [] + target = build(Arrow::BinaryArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_string + records = [ + ["Ruby"], + [nil], + ["\u3042"], # U+3042 HIRAGANA LETTER A + ] + iterated_records = [] + target = build(Arrow::StringArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date32 + records = [ + [Date.new(1960, 1, 1)], + [nil], + [Date.new(2017, 8, 23)], + ] + iterated_records = [] + target = build(Arrow::Date32Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date64 + records = [ + [DateTime.new(1960, 1, 1, 2, 9, 30)], + [nil], + [DateTime.new(2017, 8, 23, 14, 57, 2)], + ] + iterated_records = [] + target = build(Arrow::Date64Array.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_second + records = [ + [Time.parse("1960-01-01T02:09:30Z")], + [nil], + [Time.parse("2017-08-23T14:57:02Z")], + ] + iterated_records = [] + target = build(Arrow::TimestampArray.new(:second, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_milli + records = [ + [Time.parse("1960-01-01T02:09:30.123Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987Z")], + ] + iterated_records = [] + target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_micro + records = [ + [Time.parse("1960-01-01T02:09:30.123456Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654Z")], + ] + iterated_records = [] + target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_nano + records = [ + [Time.parse("1960-01-01T02:09:30.123456789Z")], + [nil], + [Time.parse("2017-08-23T14:57:02.987654321Z")], + ] + iterated_records = [] + target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [Arrow::Time.new(unit, 60 * 10)], # 00:10:00 + [nil], + [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09 + ] + iterated_records = [] + target = build(Arrow::Time32Array.new(unit, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123 + [nil], + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987 + ] + iterated_records = [] + target = build(Arrow::Time32Array.new(unit, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)], + [nil], + # 02:00:09.987654 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)], + ] + iterated_records = [] + target = build(Arrow::Time64Array.new(unit, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)], + [nil], + # 02:00:09.987654321 + [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)], + ] + iterated_records = [] + target = build(Arrow::Time64Array.new(unit, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal128 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + iterated_records = [] + data_type = Arrow::Decimal128DataType.new(8, 2) + target = build(Arrow::Decimal128Array.new(data_type, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal256 + records = [ + [BigDecimal("92.92")], + [nil], + [BigDecimal("29.29")], + ] + iterated_records = [] + data_type = Arrow::Decimal256DataType.new(38, 2) + target = build(Arrow::Decimal256Array.new(data_type, records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, target.raw_records) + end + + def test_month_interval + records = [ + [1], + [nil], + [12], + ] + iterated_records = [] + target = build(Arrow::MonthIntervalArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_day_time_interval + records = [ + [{day: 1, millisecond: 100}], + [nil], + [{day: 2, millisecond: 300}], + ] + iterated_records = [] + target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_day_nano_interval + records = [ + [{month: 1, day: 1, nanosecond: 100}], + [nil], + [{month: 2, day: 3, nanosecond: 400}], + ] + iterated_records = [] + target = build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first))) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end +end + +class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase + include EachRawRecordDictionaryArrayTests + + def build(array) + build_record_batch(array) + end +end + +class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase + include EachRawRecordDictionaryArrayTests + + def build(array) + build_record_batch(array).to_table + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-list-array.rb b/ruby/red-arrow/test/each-raw-record/test-list-array.rb new file mode 100644 index 0000000000000..64cc5839bd757 --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-list-array.rb @@ -0,0 +1,771 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordListArrayTests + def build_schema(type) + field_description = { + name: :element, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :list, + field: field_description, + }, + } + end + + def test_null + records = [ + [[nil, nil, nil]], + [nil], + ] + iterated_records = [] + target = build(:null, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_boolean + records = [ + [[true, nil, false]], + [nil], + ] + iterated_records = [] + target = build(:boolean, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int8 + records = [ + [[-(2 ** 7), nil, (2 ** 7) - 1]], + [nil], + ] + iterated_records = [] + target = build(:int8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint8 + records = [ + [[0, nil, (2 ** 8) - 1]], + [nil], + ] + iterated_records = [] + target = build(:uint8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int16 + records = [ + [[-(2 ** 15), nil, (2 ** 15) - 1]], + [nil], + ] + iterated_records = [] + target = build(:int16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint16 + records = [ + [[0, nil, (2 ** 16) - 1]], + [nil], + ] + iterated_records = [] + target = build(:uint16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int32 + records = [ + [[-(2 ** 31), nil, (2 ** 31) - 1]], + [nil], + ] + iterated_records = [] + target = build(:int32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint32 + records = [ + [[0, nil, (2 ** 32) - 1]], + [nil], + ] + iterated_records = [] + target = build(:uint32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int64 + records = [ + [[-(2 ** 63), nil, (2 ** 63) - 1]], + [nil], + ] + iterated_records = [] + target = build(:int64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint64 + records = [ + [[0, nil, (2 ** 64) - 1]], + [nil], + ] + iterated_records = [] + target = build(:uint64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_float + records = [ + [[-1.0, nil, 1.0]], + [nil], + ] + iterated_records = [] + target = build(:float, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_double + records = [ + [[-1.0, nil, 1.0]], + [nil], + ] + iterated_records = [] + target = build(:double, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_binary + records = [ + [["\x00".b, nil, "\xff".b]], + [nil], + ] + iterated_records = [] + target = build(:binary, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_string + records = [ + [ + [ + "Ruby", + nil, + "\u3042", # U+3042 HIRAGANA LETTER A + ], + ], + [nil], + ] + iterated_records = [] + target = build(:string, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date32 + records = [ + [ + [ + Date.new(1960, 1, 1), + nil, + Date.new(2017, 8, 23), + ], + ], + [nil], + ] + iterated_records = [] + target = build(:date32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date64 + records = [ + [ + [ + DateTime.new(1960, 1, 1, 2, 9, 30), + nil, + DateTime.new(2017, 8, 23, 14, 57, 2), + ], + ], + [nil], + ] + iterated_records = [] + target = build(:date64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_second + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30Z"), + nil, + Time.parse("2017-08-23T14:57:02Z"), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + iterated_records = [] + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_milli + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123Z"), + nil, + Time.parse("2017-08-23T14:57:02.987Z"), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + iterated_records = [] + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_micro + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123456Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654Z"), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + iterated_records = [] + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_nano + records = [ + [ + [ + Time.parse("1960-01-01T02:09:30.123456789Z"), + nil, + Time.parse("2017-08-23T14:57:02.987654321Z"), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + [ + [ + # 00:10:00 + Arrow::Time.new(unit, 60 * 10), + nil, + # 02:00:09 + Arrow::Time.new(unit, 60 * 60 * 2 + 9), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + [ + [ + # 00:10:00.123 + Arrow::Time.new(unit, (60 * 10) * 1000 + 123), + nil, + # 02:00:09.987 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + [ + [ + # 00:10:00.123456 + Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), + nil, + # 02:00:09.987654 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + [ + [ + # 00:10:00.123456789 + Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), + nil, + # 02:00:09.987654321 + Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal128 + records = [ + [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal256 + records = [ + [ + [ + BigDecimal("92.92"), + nil, + BigDecimal("29.29"), + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_interval + records = [ + [[1, nil, 12]], + [nil], + ] + iterated_records = [] + target = build(:month_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_day_time_interval + records = [ + [ + [ + {day: 1, millisecond: 100}, + nil, + {day: 2, millisecond: 300}, + ] + ], + [nil], + ] + iterated_records = [] + target = build(:day_time_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_day_nano_interval + records = [ + [ + [ + {month: 1, day: 1, nanosecond: 100}, + nil, + {month: 2, day: 3, nanosecond: 400}, + ] + ], + [nil], + ] + iterated_records = [] + target = build(:month_day_nano_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_list + records = [ + [ + [ + [ + true, + nil, + ], + nil, + [ + nil, + false, + ], + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_struct + records = [ + [ + [ + {"field" => true}, + nil, + {"field" => nil}, + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_map + records = [ + [ + [ + {"key1" => true, "key2" => nil}, + nil, + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def remove_union_field_names(records) + records.collect do |record| + record.collect do |column| + if column.nil? + column + else + column.collect do |value| + if value.nil? + value + else + value.values[0] + end + end + end + end + end + end + + def test_sparse_union + records = [ + [ + [ + {"field1" => true}, + nil, + {"field2" => 29}, + {"field2" => nil}, + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_union_field_names(records), + iterated_records) + end + + def test_dense_union + records = [ + [ + [ + {"field1" => true}, + nil, + {"field2" => 29}, + {"field2" => nil}, + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_union_field_names(records), + iterated_records) + end + + def test_dictionary + records = [ + [ + [ + "Ruby", + nil, + "GLib", + ], + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :dictionary, + index_data_type: :int8, + value_data_type: :string, + ordered: false, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end +end + +class EachRawRecordRecordBatchListArrayTest < Test::Unit::TestCase + include EachRawRecordListArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class EachRawRecordTableListArrayTest < Test::Unit::TestCase + include EachRawRecordListArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-map-array.rb b/ruby/red-arrow/test/each-raw-record/test-map-array.rb new file mode 100644 index 0000000000000..b802275a7f00f --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-map-array.rb @@ -0,0 +1,647 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordMapArrayTests + def build_schema(type) + { + column: { + type: :map, + key: :string, + item: type + }, + } + end + + def test_null + records = [ + [{"key1" => nil}], + [nil], + ] + iterated_records = [] + target = build(:null, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_boolean + records = [ + [{"key1" => true, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:boolean, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int8 + records = [ + [{"key1" => -(2 ** 7), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:int8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint8 + records = [ + [{"key1" => (2 ** 8) - 1, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:uint8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int16 + records = [ + [{"key1" => -(2 ** 15), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:int16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint16 + records = [ + [{"key1" => (2 ** 16) - 1, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:uint16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int32 + records = [ + [{"key1" => -(2 ** 31), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:int32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint32 + records = [ + [{"key1" => (2 ** 32) - 1, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:uint32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int64 + records = [ + [{"key1" => -(2 ** 63), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:int64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint64 + records = [ + [{"key1" => (2 ** 64) - 1, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:uint64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_float + records = [ + [{"key1" => -1.0, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:float, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_double + records = [ + [{"key1" => -1.0, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:double, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_binary + records = [ + [{"key1" => "\xff".b, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:binary, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_string + records = [ + [{"key1" => "Ruby", "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:string, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date32 + records = [ + [{"key1" => Date.new(1960, 1, 1), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:date32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date64 + records = [ + [{"key1" => DateTime.new(1960, 1, 1, 2, 9, 30), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:date64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_second + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30Z"), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_milli + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123Z"), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_micro + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123456Z"), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_nano + records = [ + [{"key1" => Time.parse("1960-01-01T02:09:30.123456789Z"), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"key1" => Arrow::Time.new(unit, 60 * 10), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal128 + records = [ + [{"key1" => BigDecimal("92.92"), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal256 + records = [ + [{"key1" => BigDecimal("92.92"), "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_interval + records = [ + [{"key1" => 1, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build(:month_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_day_time_interval + records = [ + [ + { + "key1" => {day: 1, millisecond: 100}, + "key2" => nil, + }, + ], + [nil], + ] + iterated_records = [] + target = build(:day_time_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_day_nano_interval + records = [ + [ + { + "key1" => {month: 1, day: 1, nanosecond: 100}, + "key2" => nil, + }, + ], + [nil], + ] + iterated_records = [] + target = build(:month_day_nano_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_list + records = [ + [{"key1" => [true, nil, false], "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :list, + field: { + name: :element, + type: :boolean, + }, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_struct + records = [ + [{"key1" => {"field" => true}, "key2" => nil, "key3" => {"field" => nil}}], + [nil], + ] + iterated_records = [] + target = build({ + type: :struct, + fields: [ + { + name: :field, + type: :boolean, + }, + ], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_map + records = [ + [{"key1" => {"sub_key1" => true, "sub_key2" => nil}, "key2" => nil}], + [nil], + ] + iterated_records = [] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def remove_union_field_names(records) + records.collect do |record| + record.collect do |column| + if column.nil? + column + else + value = {} + column.each do |k, v| + v = v.values[0] unless v.nil? + value[k] = v + end + value + end + end + end + end + + def test_sparse_union + records = [ + [ + { + "key1" => {"field1" => true}, + "key2" => nil, + "key3" => {"field2" => 29}, + "key4" => {"field2" => nil}, + }, + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_union_field_names(records), + target.raw_records) + end + + def test_dense_union + records = [ + [ + { + "key1" => {"field1" => true}, + "key2" => nil, + "key3" => {"field2" => 29}, + "key4" => {"field2" => nil}, + }, + ], + [nil], + ] + iterated_records = [] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_union_field_names(records), + target.raw_records) + end + + def test_dictionary + records = [ + [{"key1" => "Ruby", "key2" => nil, "key3" => "GLib"}], + [nil], + ] + iterated_records = [] + target = build({ + type: :dictionary, + index_data_type: :int8, + value_data_type: :string, + ordered: false, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end +end + +class EachRawRecordRecordBatchMapArrayTest < Test::Unit::TestCase + include EachRawRecordMapArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class EachRawRecordTableMapArrayTest < Test::Unit::TestCase + include EachRawRecordMapArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb b/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb new file mode 100644 index 0000000000000..c0547d324d26d --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-multiple-columns.rb @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordMultipleColumnsTests + def test_3_elements + records = [ + [true, nil, "Ruby"], + [nil, 0, "GLib"], + [false, 2 ** 8 - 1, nil], + ] + iterated_records = [] + target = build([ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + ], + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_4_elements + records = [ + [true, nil, "Ruby", -(2 ** 63)], + [nil, 0, "GLib", nil], + [false, 2 ** 8 - 1, nil, (2 ** 63) - 1], + ] + iterated_records = [] + target = build([ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + {name: :column3, type: :int64}, + ], + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end +end + +class EachRawRecordRecordBatchMultipleColumnsTest < Test::Unit::TestCase + include EachRawRecordMultipleColumnsTests + + def build(schema, records) + Arrow::RecordBatch.new(schema, records) + end +end + +class EachRawRecordTableMultipleColumnsTest < Test::Unit::TestCase + include EachRawRecordMultipleColumnsTests + + def build(schema, records) + record_batch = Arrow::RecordBatch.new(schema, records) + record_batches = [ + record_batch.slice(0, 2), + record_batch.slice(2, 0), + record_batch.slice(2, record_batch.length - 2), + ] + + Arrow::Table.new(schema, record_batches) + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb b/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb new file mode 100644 index 0000000000000..4b1b941fb2079 --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-sparse-union-array.rb @@ -0,0 +1,662 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordSparseUnionArrayTests + def build_schema(type, type_codes) + field_description = {} + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :sparse_union, + fields: [ + field_description.merge(name: "0"), + field_description.merge(name: "1"), + ], + type_codes: type_codes, + }, + } + end + + # TODO: Use Arrow::RecordBatch.new(build_schema(type, type_codes), records) + def build_record_batch(type, records) + type_codes = [0, 1] + schema = Arrow::Schema.new(build_schema(type, type_codes)) + type_ids = [] + arrays = schema.fields[0].data_type.fields.collect do |field| + sub_schema = Arrow::Schema.new([field]) + sub_records = records.collect do |record| + [record[0].nil? ? nil : record[0][field.name]] + end + sub_record_batch = Arrow::RecordBatch.new(sub_schema, + sub_records) + sub_record_batch.columns[0].data + end + records.each do |record| + column = record[0] + if column.key?("0") + type_ids << type_codes[0] + elsif column.key?("1") + type_ids << type_codes[1] + end + end + union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type, + Arrow::Int8Array.new(type_ids), + arrays) + schema = Arrow::Schema.new(column: union_array.value_data_type) + Arrow::RecordBatch.new(schema, + records.size, + [union_array]) + end + + def remove_field_names(records) + records.collect do |record| + record.collect do |column| + if column.nil? + column + else + column.values[0] + end + end + end + end + + def test_null + records = [ + [{"0" => nil}], + ] + iterated_records = [] + target = build(:null, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_boolean + records = [ + [{"0" => true}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:boolean, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_int8 + records = [ + [{"0" => -(2 ** 7)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_uint8 + records = [ + [{"0" => (2 ** 8) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_int16 + records = [ + [{"0" => -(2 ** 15)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_uint16 + records = [ + [{"0" => (2 ** 16) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_int32 + records = [ + [{"0" => -(2 ** 31)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_uint32 + records = [ + [{"0" => (2 ** 32) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_int64 + records = [ + [{"0" => -(2 ** 63)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:int64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_uint64 + records = [ + [{"0" => (2 ** 64) - 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:uint64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_float + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:float, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_double + records = [ + [{"0" => -1.0}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:double, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_binary + records = [ + [{"0" => "\xff".b}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:binary, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_string + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:string, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_date32 + records = [ + [{"0" => Date.new(1960, 1, 1)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:date32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_date64 + records = [ + [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:date64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_timestamp_second + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_timestamp_milli + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + + end + + def test_timestamp_micro + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_timestamp_nano + records = [ + [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"0" => Arrow::Time.new(unit, 60 * 10)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_decimal128 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_decimal256 + records = [ + [{"0" => BigDecimal("92.92")}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_month_interval + records = [ + [{"0" => 1}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:month_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_day_time_interval + records = [ + [{"0" => {day: 1, millisecond: 100}}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:day_time_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_month_day_nano_interval + records = [ + [{"0" => {month: 1, day: 1, nanosecond: 100}}], + [{"1" => nil}], + ] + iterated_records = [] + target = build(:month_day_nano_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_list + records = [ + [{"0" => [true, nil, false]}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_struct + records = [ + [{"0" => {"sub_field" => true}}], + [{"1" => nil}], + [{"0" => {"sub_field" => nil}}], + ] + iterated_records = [] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_map + records = [ + [{"0" => {"key1" => true, "key2" => nil}}], + [{"1" => nil}], + ] + iterated_records = [] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end + + def test_sparse_union + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => 29}}], + [{"0" => {"field2" => nil}}], + ] + iterated_records = [] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(remove_field_names(records)), iterated_records) + end + + def test_dense_union + records = [ + [{"0" => {"field1" => true}}], + [{"1" => nil}], + [{"0" => {"field2" => 29}}], + [{"0" => {"field2" => nil}}], + ] + iterated_records = [] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(remove_field_names(records)), iterated_records) + end + + def test_dictionary + records = [ + [{"0" => "Ruby"}], + [{"1" => nil}], + [{"0" => "GLib"}], + ] + iterated_records = [] + target = build({ + type: :dictionary, + index_data_type: :int8, + value_data_type: :string, + ordered: false, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_field_names(records), iterated_records) + end +end + +class EachRawRecordRecordBatchSparseUnionArrayTest < Test::Unit::TestCase + include EachRawRecordSparseUnionArrayTests + + def build(type, records) + build_record_batch(type, records) + end +end + +class EachRawRecordTableSparseUnionArrayTest < Test::Unit::TestCase + include EachRawRecordSparseUnionArrayTests + + def build(type, records) + build_record_batch(type, records).to_table + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-struct-array.rb b/ruby/red-arrow/test/each-raw-record/test-struct-array.rb new file mode 100644 index 0000000000000..de6e78aaeef4c --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-struct-array.rb @@ -0,0 +1,669 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module EachRawRecordStructArrayTests + def build_schema(type) + field_description = { + name: :field, + } + if type.is_a?(Hash) + field_description = field_description.merge(type) + else + field_description[:type] = type + end + { + column: { + type: :struct, + fields: [ + field_description, + ], + }, + } + end + + def test_null + records = [ + [{"field" => nil}], + [nil], + ] + iterated_records = [] + target = build(:null, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_boolean + records = [ + [{"field" => true}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:boolean, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int8 + records = [ + [{"field" => -(2 ** 7)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:int8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint8 + records = [ + [{"field" => (2 ** 8) - 1}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:uint8, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int16 + records = [ + [{"field" => -(2 ** 15)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:int16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint16 + records = [ + [{"field" => (2 ** 16) - 1}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:uint16, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int32 + records = [ + [{"field" => -(2 ** 31)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:int32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint32 + records = [ + [{"field" => (2 ** 32) - 1}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:uint32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_int64 + records = [ + [{"field" => -(2 ** 63)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:int64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_uint64 + records = [ + [{"field" => (2 ** 64) - 1}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:uint64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_float + records = [ + [{"field" => -1.0}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:float, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_double + records = [ + [{"field" => -1.0}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:double, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_binary + records = [ + [{"field" => "\xff".b}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:binary, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_string + records = [ + [{"field" => "Ruby"}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:string, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date32 + records = [ + [{"field" => Date.new(1960, 1, 1)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:date32, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_date64 + records = [ + [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:date64, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_second + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30Z")}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_milli + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_micro + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_timestamp_nano + records = [ + [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :timestamp, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_second + unit = Arrow::TimeUnit::SECOND + records = [ + # 00:10:00 + [{"field" => Arrow::Time.new(unit, 60 * 10)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :second, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time32_milli + unit = Arrow::TimeUnit::MILLI + records = [ + # 00:10:00.123 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :time32, + unit: :milli, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_micro + unit = Arrow::TimeUnit::MICRO + records = [ + # 00:10:00.123456 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :micro, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_time64_nano + unit = Arrow::TimeUnit::NANO + records = [ + # 00:10:00.123456789 + [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :time64, + unit: :nano, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal128 + records = [ + [{"field" => BigDecimal("92.92")}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :decimal128, + precision: 8, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_decimal256 + records = [ + [{"field" => BigDecimal("92.92")}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :decimal256, + precision: 38, + scale: 2, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_interval + records = [ + [{"field" => 1}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:month_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_day_time_interval + records = [ + [{"field" => {day: 1, millisecond: 100}}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:day_time_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_month_day_nano_interval + records = [ + [{"field" => {month: 1, day: 1, nanosecond: 100}}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build(:month_day_nano_interval, records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_list + records = [ + [{"field" => [true, nil, false]}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :list, + field: { + name: :sub_element, + type: :boolean, + }, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_struct + records = [ + [{"field" => {"sub_field" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"sub_field" => nil}}], + ] + iterated_records = [] + target = build({ + type: :struct, + fields: [ + { + name: :sub_field, + type: :boolean, + }, + ], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def test_map + records = [ + [{"field" => {"key1" => true, "key2" => nil}}], + [nil], + [{"field" => nil}], + ] + iterated_records = [] + target = build({ + type: :map, + key: :string, + item: :boolean, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end + + def remove_union_field_names(records) + records.collect do |record| + record.collect do |column| + if column.nil? + column + else + value = column["field"] + value = value.values[0] unless value.nil? + {"field" => value} + end + end + end + end + + def test_sparse_union + records = [ + [{"field" => {"field1" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"field2" => 29}}], + [{"field" => {"field2" => nil}}], + ] + iterated_records = [] + target = build({ + type: :sparse_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_union_field_names(records), + iterated_records) + end + + def test_dense_union + records = [ + [{"field" => {"field1" => true}}], + [nil], + [{"field" => nil}], + [{"field" => {"field2" => 29}}], + [{"field" => {"field2" => nil}}], + ] + iterated_records = [] + target = build({ + type: :dense_union, + fields: [ + { + name: :field1, + type: :boolean, + }, + { + name: :field2, + type: :uint8, + }, + ], + type_codes: [0, 1], + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(remove_union_field_names(records), + iterated_records) + end + + def test_dictionary + records = [ + [{"field" => "Ruby"}], + [nil], + [{"field" => nil}], + [{"field" => "GLib"}], + ] + iterated_records = [] + target = build({ + type: :dictionary, + index_data_type: :int8, + value_data_type: :string, + ordered: false, + }, + records) + target.each_raw_record do |record| + iterated_records << record + end + assert_equal(records, iterated_records) + end +end + +class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase + include EachRawRecordStructArrayTests + + def build(type, records) + Arrow::RecordBatch.new(build_schema(type), records) + end +end + +class RawRecordsTableStructArrayTest < Test::Unit::TestCase + include EachRawRecordStructArrayTests + + def build(type, records) + Arrow::Table.new(build_schema(type), records) + end +end diff --git a/ruby/red-arrow/test/each-raw-record/test-table.rb b/ruby/red-arrow/test/each-raw-record/test-table.rb new file mode 100644 index 0000000000000..b5bd80127c8b0 --- /dev/null +++ b/ruby/red-arrow/test/each-raw-record/test-table.rb @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class EachRawRecordTableTest < Test::Unit::TestCase + test("2 arrays") do + raw_record_batches = [ + [ + [true, nil, "Ruby"], + [nil, 0, "GLib"], + [false, 2 ** 8 - 1, nil], + ], + [ + [nil, 10, "A"], + [true, 20, "B"], + [false, nil, "C"], + [nil, 40, nil], + ] + ] + raw_records = raw_record_batches.inject do |all_records, record_batch| + all_records + record_batch + end + schema = [ + {name: :column0, type: :boolean}, + {name: :column1, type: :uint8}, + {name: :column2, type: :string}, + ] + record_batches = raw_record_batches.collect do |record_batch| + Arrow::RecordBatch.new(schema, record_batch) + end + iterated_records = [] + table = Arrow::Table.new(schema, record_batches) + table.each_raw_record do |record| + iterated_records << record + end + assert_equal(raw_records, iterated_records) + end +end diff --git a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb index 15cdee68209e7..9b95a30ad44d5 100644 --- a/ruby/red-arrow/test/raw-records/test-basic-arrays.rb +++ b/ruby/red-arrow/test/raw-records/test-basic-arrays.rb @@ -157,7 +157,7 @@ def test_binary assert_equal(records, target.raw_records) end - def test_tring + def test_string records = [ ["Ruby"], [nil],