diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index 74c2c16f8c859..457135fa400d5 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -452,8 +452,10 @@ Result> ConcatenateRecordBatches( } std::vector> concatenated_columns; + concatenated_columns.reserve(cols); for (int col = 0; col < cols; ++col) { ArrayVector column_arrays; + column_arrays.reserve(batches.size()); for (const auto& batch : batches) { column_arrays.emplace_back(batch->column(col)); } diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h index 23bb571242e0b..1a66fc3fb5629 100644 --- a/cpp/src/arrow/record_batch.h +++ b/cpp/src/arrow/record_batch.h @@ -352,6 +352,11 @@ class ARROW_EXPORT RecordBatchReader { /// \brief Concatenate record batches /// +/// The columns of the new batch are formed by concatenate the same columns of each input +/// batch. Concatenate multiple batches into a new batch requires that the schema must be +/// consistent. It supports merging batches without columns (only length, scenarios such +/// as count(*)). +/// /// \param[in] batches a vector of record batches to be concatenated /// \param[in] pool memory to store the result will be allocated from this memory pool /// \return the concatenated record batch diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 7c6d7d40e2d97..db3a2d3def73f 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -573,6 +573,8 @@ TEST_F(TestRecordBatch, ConcatenateRecordBatches) { ASSERT_OK_AND_ASSIGN(auto batch, ConcatenateRecordBatches({b1, b2})); ASSERT_EQ(batch->num_rows(), b1->num_rows() + b2->num_rows()); + ASSERT_BATCHES_EQUAL(*batch->Slice(0, b1->num_rows()), *b1); + ASSERT_BATCHES_EQUAL(*batch->Slice(b1->num_rows()), *b2); f0 = field("fd0", int32()); f1 = field("fd1", uint8()); @@ -587,6 +589,7 @@ TEST_F(TestRecordBatch, ConcatenateRecordBatches) { std::vector>{}); ASSERT_OK_AND_ASSIGN(batch, ConcatenateRecordBatches({null_batch})); ASSERT_EQ(batch->num_rows(), null_batch->num_rows()); + ASSERT_BATCHES_EQUAL(*batch, *null_batch); } } // namespace arrow