@@ -260,7 +260,8 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRepeated) {
260260 ASSERT_NO_FATAL_FAILURE (ExecuteDict (num_pages, levels_per_page, &descr));
261261}
262262
263- TEST_F (TestPrimitiveReader, TestInt32FlatRequiredSkip) {
263+ // Tests skipping around page boundaries.
264+ TEST_F (TestPrimitiveReader, TestSkipAroundPageBoundries) {
264265 int levels_per_page = 100 ;
265266 int num_pages = 5 ;
266267 max_def_level_ = 0 ;
@@ -289,10 +290,10 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
289290 values_.begin () + static_cast <int >(2.5 * static_cast <double >(levels_per_page)));
290291 ASSERT_TRUE (vector_equal (sub_values, vresult));
291292
292- // 2) skip_size == page_size (skip across two pages)
293+ // 2) skip_size == page_size (skip across two pages from page 2.5 to 3.5 )
293294 levels_skipped = reader->Skip (levels_per_page);
294295 ASSERT_EQ (levels_per_page, levels_skipped);
295- // Read half a page
296+ // Read half a page (page 3.5 to 4)
296297 reader->ReadBatch (levels_per_page / 2 , dresult.data (), rresult.data (), vresult.data (),
297298 &values_read);
298299 sub_values.clear ();
@@ -303,10 +304,10 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
303304 ASSERT_TRUE (vector_equal (sub_values, vresult));
304305
305306 // 3) skip_size < page_size (skip limited to a single page)
306- // Skip half a page
307+ // Skip half a page (page 4 to 4.5)
307308 levels_skipped = reader->Skip (levels_per_page / 2 );
308309 ASSERT_EQ (0.5 * levels_per_page, levels_skipped);
309- // Read half a page
310+ // Read half a page (page 4.5 to 5)
310311 reader->ReadBatch (levels_per_page / 2 , dresult.data (), rresult.data (), vresult.data (),
311312 &values_read);
312313 sub_values.clear ();
@@ -316,13 +317,71 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
316317 values_.end ());
317318 ASSERT_TRUE (vector_equal (sub_values, vresult));
318319
320+ // 4) skip_size = 0
321+ levels_skipped = reader->Skip (0 );
322+ ASSERT_EQ (0 , levels_skipped);
323+
324+ // 5) Skip past the end page. There are 5 pages and we have either skipped
325+ // or read all of them, so there is nothing left to skip.
326+ levels_skipped = reader->Skip (10 );
327+ ASSERT_EQ (0 , levels_skipped);
328+
319329 values_.clear ();
320330 def_levels_.clear ();
321331 rep_levels_.clear ();
322332 pages_.clear ();
323333 reader_.reset ();
324334}
325335
336+ // Skip with repeated field. This test makes it clear that we are skipping
337+ // values and not records.
338+ TEST_F (TestPrimitiveReader, TestSkipRepeatedField) {
339+ // Example schema: message M { repeated int32 b = 1 }
340+ max_def_level_ = 1 ;
341+ max_rep_level_ = 1 ;
342+ NodePtr type = schema::Int32 (" b" , Repetition::REPEATED);
343+ const ColumnDescriptor descr (type, max_def_level_, max_rep_level_);
344+ // Example rows: {}, {[10, 10]}, {[20, 20, 20]}
345+ std::vector<int32_t > values = {10 , 10 , 20 , 20 , 20 };
346+ std::vector<int16_t > def_levels = {0 , 1 , 1 , 1 , 1 , 1 };
347+ std::vector<int16_t > rep_levels = {0 , 0 , 1 , 0 , 1 , 1 };
348+ num_values_ = static_cast <int >(def_levels.size ());
349+ std::shared_ptr<DataPageV1> page = MakeDataPage<Int32Type>(
350+ &descr, values, num_values_, Encoding::PLAIN, /* indices=*/ {},
351+ /* indices_size=*/ 0 , def_levels, max_def_level_, rep_levels, max_rep_level_);
352+
353+ pages_.push_back (std::move (page));
354+
355+ InitReader (&descr);
356+ Int32Reader* reader = static_cast <Int32Reader*>(reader_.get ());
357+
358+ // Vecotrs to hold read values, definition levels, and repetition levels.
359+ std::vector<int32_t > read_vals (4 , -1 );
360+ std::vector<int16_t > read_defs (4 , -1 );
361+ std::vector<int16_t > read_reps (4 , -1 );
362+
363+ // Skip two levels.
364+ int64_t levels_skipped = reader->Skip (2 );
365+ ASSERT_EQ (2 , levels_skipped);
366+
367+ int64_t num_read_values = 0 ;
368+ // Read the next set of values
369+ reader->ReadBatch (10 , read_defs.data (), read_reps.data (), read_vals.data (),
370+ &num_read_values);
371+ ASSERT_EQ (num_read_values, 4 );
372+ // Note that we end up in the record with {[10, 10]}
373+ ASSERT_TRUE (vector_equal ({10 , 20 , 20 , 20 }, read_vals));
374+ ASSERT_TRUE (vector_equal ({1 , 1 , 1 , 1 }, read_defs));
375+ ASSERT_TRUE (vector_equal ({1 , 0 , 1 , 1 }, read_reps));
376+
377+ // No values remain in data page
378+ levels_skipped = reader->Skip (2 );
379+ ASSERT_EQ (0 , levels_skipped);
380+ reader->ReadBatch (10 , read_defs.data (), read_reps.data (), read_vals.data (),
381+ &num_read_values);
382+ ASSERT_EQ (num_read_values, 0 );
383+ }
384+
326385// Page claims to have two values but only 1 is present.
327386TEST_F (TestPrimitiveReader, TestReadValuesMissing) {
328387 max_def_level_ = 1 ;
0 commit comments