2121#include " arrow/io/memory.h"
2222#include " arrow/testing/random.h"
2323#include " arrow/util/config.h"
24+ #include " arrow/util/logging.h"
2425
2526#include " parquet/column_reader.h"
2627#include " parquet/column_writer.h"
@@ -167,27 +168,29 @@ std::shared_ptr<Int64Reader> BuildReader(std::shared_ptr<Buffer>& buffer,
167168
168169static void BM_ReadInt64Column (::benchmark::State& state, Repetition::type repetition,
169170 Compression::type codec, Encoding::type encoding) {
170- format::ColumnChunk thrift_metadata;
171+ const auto kNumValues = state.range (0 );
172+ const auto kBatchSize = state.range (1 );
171173
172174 ::arrow::random::RandomArrayGenerator rgen (1337 );
173- auto values = rgen.Int64 (state. range ( 0 ) , 0 , 1000000 , 0 );
175+ auto values = rgen.Int64 (kNumValues , 0 , 1000000 , 0 );
174176 const auto & int64_values = static_cast <const ::arrow::Int64Array&>(*values);
175177
176- std::vector<int16_t > definition_levels (state. range ( 0 ) , 1 );
177- std::vector<int16_t > repetition_levels (state. range ( 0 ) , 0 );
178+ std::vector<int16_t > definition_levels (kNumValues , 1 );
179+ std::vector<int16_t > repetition_levels (kNumValues , 0 );
178180 std::shared_ptr<ColumnDescriptor> schema = Int64Schema (repetition);
179181 std::shared_ptr<WriterProperties> properties = WriterProperties::Builder ()
180182 .compression (codec)
181183 ->encoding (encoding)
182184 ->disable_dictionary ()
183185 ->build ();
184186
187+ format::ColumnChunk thrift_metadata;
185188 auto metadata = ColumnChunkMetaDataBuilder::Make (
186189 properties, schema.get (), reinterpret_cast <uint8_t *>(&thrift_metadata));
187190
188191 auto stream = CreateOutputStream ();
189192 std::shared_ptr<Int64Writer> writer = BuildWriter (
190- state. range ( 0 ) , stream, metadata.get (), schema.get (), properties.get (), codec);
193+ kNumValues , stream, metadata.get (), schema.get (), properties.get (), codec);
191194 writer->WriteBatch (int64_values.length (), definition_levels.data (),
192195 repetition_levels.data (), int64_values.raw_values ());
193196 writer->Close ();
@@ -196,16 +199,17 @@ static void BM_ReadInt64Column(::benchmark::State& state, Repetition::type repet
196199 int64_t stream_size = src->size ();
197200 int64_t data_size = int64_values.length () * sizeof (int64_t );
198201
199- std::vector<int64_t > values_out (state. range ( 1 ) );
200- std::vector<int16_t > definition_levels_out (state. range ( 1 ) );
201- std::vector<int16_t > repetition_levels_out (state. range ( 1 ) );
202+ std::vector<int64_t > values_out (kBatchSize );
203+ std::vector<int16_t > definition_levels_out (kBatchSize );
204+ std::vector<int16_t > repetition_levels_out (kBatchSize );
202205 while (state.KeepRunning ()) {
203206 std::shared_ptr<Int64Reader> reader =
204- BuildReader (src, state. range ( 1 ) , codec, schema.get ());
207+ BuildReader (src, kNumValues , codec, schema.get ());
205208 int64_t values_read = 0 ;
206- for (int64_t i = 0 ; i < int64_values. length () ; i += values_read) {
207- reader->ReadBatch (values_out. size () , definition_levels_out.data (),
209+ for (int64_t i = 0 ; i < kNumValues ; i += values_read) {
210+ reader->ReadBatch (kBatchSize , definition_levels_out.data (),
208211 repetition_levels_out.data (), values_out.data (), &values_read);
212+ ARROW_CHECK_NE (values_read, 0 ) << " Unexpected end of column" ;
209213 }
210214 }
211215 SetBytesProcessed (state, repetition);
0 commit comments