@@ -407,16 +407,19 @@ impl ArrowReader {
407
407
current_idx += run_length;
408
408
}
409
409
410
- // `skip` all consecutive deleted rows
410
+ // `skip` all consecutive deleted rows in the current row group
411
411
let mut run_length = 0 ;
412
- while next_deleted_row_idx == current_idx {
412
+ while next_deleted_row_idx == current_idx
413
+ && next_deleted_row_idx < next_page_base_idx
414
+ {
413
415
run_length += 1 ;
414
416
current_idx += 1 ;
415
417
positional_deletes. remove ( next_deleted_row_idx) ;
416
418
417
419
next_deleted_row_idx = match positional_deletes. min ( ) {
418
420
Some ( next_deleted_row_idx) => next_deleted_row_idx,
419
421
_ => {
422
+ // conclude the selection and break if this was the last positional delete
420
423
results. push ( RowSelector :: skip ( run_length) ) ;
421
424
break ' outer;
422
425
}
@@ -425,6 +428,7 @@ impl ArrowReader {
425
428
results. push ( RowSelector :: skip ( run_length) ) ;
426
429
}
427
430
431
+ // break if this was the final selected row group
428
432
if let Some ( selected_row_groups) = selected_row_groups {
429
433
if selected_row_groups_idx == selected_row_groups. len ( ) {
430
434
break ;
@@ -1532,35 +1536,85 @@ message schema {
1532
1536
1533
1537
let selected_row_groups = Some ( vec ! [ 1 , 3 ] ) ;
1534
1538
1539
+ /* cases to cover:
1540
+ * {skip|select} {first|intermediate|last} {one row|multiple rows} in
1541
+ {first|imtermediate|last} {skipped|selected} row group
1542
+ * row group selection disabled
1543
+ */
1544
+
1535
1545
let positional_deletes = RoaringTreemap :: from_iter ( & [
1536
- 1 , // in skipped row group 0, should be ignored
1537
- 3 , // in skipped row group 0, should be ignored
1538
- 4 , // in skipped row group 0, should be ignored
1539
- 5 , // in skipped row group 0, should be ignored
1540
- 1002 , // solitary row in selected row group 1
1541
- 1010 , // run of 5 rows in selected row group 1
1542
- 1011 , 1012 , 1013 , 1014 , 1600 , // should ignore, in skipped row group 2
1543
- 2100 , // single row in selected row group 3,
1546
+ 1 , // in skipped rg 0, should be ignored
1547
+ 3 , // run of three consecutive items in skipped rg0
1548
+ 4 , 5 , 998 , // two consecutive items at end of skipped rg0
1549
+ 999 , 1000 , // solitary row at start of selected rg1 (1, 9)
1550
+ 1010 , // run of 3 rows in selected rg1
1551
+ 1011 , 1012 , // (3, 485)
1552
+ 1498 , // run of two items at end of selected rg1
1553
+ 1499 , 1500 , // run of two items at start of skipped rg2
1554
+ 1501 , 1600 , // should ignore, in skipped rg2
1555
+ 1999 , // single row at end of skipped rg2
1556
+ 2000 , // run of two items at start of selected rg3
1557
+ 2001 , // (4, 98)
1558
+ 2100 , // single row in selected row group 3 (1, 99)
1544
1559
2200 , // run of 3 consecutive rows in selected row group 3
1545
- 2201 , 2202 ,
1560
+ 2201 , 2202 , // (3, 796)
1561
+ 2999 , // single item at end of selected rg3 (1)
1562
+ 3000 , // single item at start of skipped rg4
1546
1563
] ) ;
1547
1564
1565
+ // using selected row groups 1 and 3
1548
1566
let result = ArrowReader :: build_deletes_row_selection (
1549
1567
& row_groups_metadata,
1550
1568
& selected_row_groups,
1569
+ positional_deletes. clone ( ) ,
1570
+ )
1571
+ . unwrap ( ) ;
1572
+
1573
+ let expected = RowSelection :: from ( vec ! [
1574
+ RowSelector :: skip( 1 ) ,
1575
+ RowSelector :: select( 9 ) ,
1576
+ RowSelector :: skip( 3 ) ,
1577
+ RowSelector :: select( 485 ) ,
1578
+ RowSelector :: skip( 4 ) ,
1579
+ RowSelector :: select( 98 ) ,
1580
+ RowSelector :: skip( 1 ) ,
1581
+ RowSelector :: select( 99 ) ,
1582
+ RowSelector :: skip( 3 ) ,
1583
+ RowSelector :: select( 796 ) ,
1584
+ RowSelector :: skip( 1 ) ,
1585
+ ] ) ;
1586
+
1587
+ assert_eq ! ( result, expected) ;
1588
+
1589
+ // selecting all row groups
1590
+ let result = ArrowReader :: build_deletes_row_selection (
1591
+ & row_groups_metadata,
1592
+ & None ,
1551
1593
positional_deletes,
1552
1594
)
1553
1595
. unwrap ( ) ;
1554
1596
1555
1597
let expected = RowSelection :: from ( vec ! [
1556
- RowSelector :: select( 2 ) ,
1598
+ RowSelector :: select( 1 ) ,
1557
1599
RowSelector :: skip( 1 ) ,
1558
- RowSelector :: select( 7 ) ,
1559
- RowSelector :: skip( 5 ) ,
1560
- RowSelector :: select( 100 ) ,
1600
+ RowSelector :: select( 1 ) ,
1601
+ RowSelector :: skip( 3 ) ,
1602
+ RowSelector :: select( 992 ) ,
1603
+ RowSelector :: skip( 3 ) ,
1604
+ RowSelector :: select( 9 ) ,
1605
+ RowSelector :: skip( 3 ) ,
1606
+ RowSelector :: select( 485 ) ,
1607
+ RowSelector :: skip( 4 ) ,
1608
+ RowSelector :: select( 98 ) ,
1609
+ RowSelector :: skip( 1 ) ,
1610
+ RowSelector :: select( 398 ) ,
1611
+ RowSelector :: skip( 3 ) ,
1612
+ RowSelector :: select( 98 ) ,
1561
1613
RowSelector :: skip( 1 ) ,
1562
1614
RowSelector :: select( 99 ) ,
1563
1615
RowSelector :: skip( 3 ) ,
1616
+ RowSelector :: select( 796 ) ,
1617
+ RowSelector :: skip( 2 ) ,
1564
1618
] ) ;
1565
1619
1566
1620
assert_eq ! ( result, expected) ;
0 commit comments