Skip to content

Commit 2fc5f70

Browse files
committed
fix: build_delete_rows_selection should handle consecutive runs of deletes that cross page group boundaries
1 parent 4017a0e commit 2fc5f70

File tree

1 file changed

+69
-15
lines changed

1 file changed

+69
-15
lines changed

Diff for: crates/iceberg/src/arrow/reader.rs

+69-15
Original file line numberDiff line numberDiff line change
@@ -407,16 +407,19 @@ impl ArrowReader {
407407
current_idx += run_length;
408408
}
409409

410-
// `skip` all consecutive deleted rows
410+
// `skip` all consecutive deleted rows in the current row group
411411
let mut run_length = 0;
412-
while next_deleted_row_idx == current_idx {
412+
while next_deleted_row_idx == current_idx
413+
&& next_deleted_row_idx < next_page_base_idx
414+
{
413415
run_length += 1;
414416
current_idx += 1;
415417
positional_deletes.remove(next_deleted_row_idx);
416418

417419
next_deleted_row_idx = match positional_deletes.min() {
418420
Some(next_deleted_row_idx) => next_deleted_row_idx,
419421
_ => {
422+
// conclude the selection and break if this was the last positional delete
420423
results.push(RowSelector::skip(run_length));
421424
break 'outer;
422425
}
@@ -425,6 +428,7 @@ impl ArrowReader {
425428
results.push(RowSelector::skip(run_length));
426429
}
427430

431+
// break if this was the final selected row group
428432
if let Some(selected_row_groups) = selected_row_groups {
429433
if selected_row_groups_idx == selected_row_groups.len() {
430434
break;
@@ -1532,35 +1536,85 @@ message schema {
15321536

15331537
let selected_row_groups = Some(vec![1, 3]);
15341538

1539+
/* cases to cover:
1540+
* {skip|select} {first|intermediate|last} {one row|multiple rows} in
1541+
{first|imtermediate|last} {skipped|selected} row group
1542+
* row group selection disabled
1543+
*/
1544+
15351545
let positional_deletes = RoaringTreemap::from_iter(&[
1536-
1, // in skipped row group 0, should be ignored
1537-
3, // in skipped row group 0, should be ignored
1538-
4, // in skipped row group 0, should be ignored
1539-
5, // in skipped row group 0, should be ignored
1540-
1002, // solitary row in selected row group 1
1541-
1010, // run of 5 rows in selected row group 1
1542-
1011, 1012, 1013, 1014, 1600, // should ignore, in skipped row group 2
1543-
2100, // single row in selected row group 3,
1546+
1, // in skipped rg 0, should be ignored
1547+
3, // run of three consecutive items in skipped rg0
1548+
4, 5, 998, // two consecutive items at end of skipped rg0
1549+
999, 1000, // solitary row at start of selected rg1 (1, 9)
1550+
1010, // run of 3 rows in selected rg1
1551+
1011, 1012, // (3, 485)
1552+
1498, // run of two items at end of selected rg1
1553+
1499, 1500, // run of two items at start of skipped rg2
1554+
1501, 1600, // should ignore, in skipped rg2
1555+
1999, // single row at end of skipped rg2
1556+
2000, // run of two items at start of selected rg3
1557+
2001, // (4, 98)
1558+
2100, // single row in selected row group 3 (1, 99)
15441559
2200, // run of 3 consecutive rows in selected row group 3
1545-
2201, 2202,
1560+
2201, 2202, // (3, 796)
1561+
2999, // single item at end of selected rg3 (1)
1562+
3000, // single item at start of skipped rg4
15461563
]);
15471564

1565+
// using selected row groups 1 and 3
15481566
let result = ArrowReader::build_deletes_row_selection(
15491567
&row_groups_metadata,
15501568
&selected_row_groups,
1569+
positional_deletes.clone(),
1570+
)
1571+
.unwrap();
1572+
1573+
let expected = RowSelection::from(vec![
1574+
RowSelector::skip(1),
1575+
RowSelector::select(9),
1576+
RowSelector::skip(3),
1577+
RowSelector::select(485),
1578+
RowSelector::skip(4),
1579+
RowSelector::select(98),
1580+
RowSelector::skip(1),
1581+
RowSelector::select(99),
1582+
RowSelector::skip(3),
1583+
RowSelector::select(796),
1584+
RowSelector::skip(1),
1585+
]);
1586+
1587+
assert_eq!(result, expected);
1588+
1589+
// selecting all row groups
1590+
let result = ArrowReader::build_deletes_row_selection(
1591+
&row_groups_metadata,
1592+
&None,
15511593
positional_deletes,
15521594
)
15531595
.unwrap();
15541596

15551597
let expected = RowSelection::from(vec![
1556-
RowSelector::select(2),
1598+
RowSelector::select(1),
15571599
RowSelector::skip(1),
1558-
RowSelector::select(7),
1559-
RowSelector::skip(5),
1560-
RowSelector::select(100),
1600+
RowSelector::select(1),
1601+
RowSelector::skip(3),
1602+
RowSelector::select(992),
1603+
RowSelector::skip(3),
1604+
RowSelector::select(9),
1605+
RowSelector::skip(3),
1606+
RowSelector::select(485),
1607+
RowSelector::skip(4),
1608+
RowSelector::select(98),
1609+
RowSelector::skip(1),
1610+
RowSelector::select(398),
1611+
RowSelector::skip(3),
1612+
RowSelector::select(98),
15611613
RowSelector::skip(1),
15621614
RowSelector::select(99),
15631615
RowSelector::skip(3),
1616+
RowSelector::select(796),
1617+
RowSelector::skip(2),
15641618
]);
15651619

15661620
assert_eq!(result, expected);

0 commit comments

Comments
 (0)