Skip to content

Commit

Permalink
fix: v2 writer used the wrong value to calculate how many rows were a…
Browse files Browse the repository at this point in the history
…vailable in array (#2704)
  • Loading branch information
westonpace authored Aug 8, 2024
1 parent d1f7d3d commit a7e65f4
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
2 changes: 1 addition & 1 deletion rust/lance-encoding/src/encodings/logical/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ impl PrimitiveFieldEncoder {
let mut offset = 0;
let part_size = bit_util::ceil(array.len(), num_parts);
for _ in 0..num_parts {
let avail = array.len() - part_size;
let avail = array.len() - offset;
let chunk_size = avail.min(part_size);
let part = array.slice(offset, chunk_size);
let task = self.create_encode_task(vec![part])?;
Expand Down
15 changes: 14 additions & 1 deletion rust/lance-encoding/src/encodings/physical/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ pub mod tests {
}

#[test_log::test(tokio::test)]
async fn test_few_rows_bigger_than_max_page_size() {
async fn test_bigger_than_max_page_size() {
// Create an array with one single 32MiB string
let big_string = String::from_iter((0..(32 * 1024 * 1024)).map(|_| '0'));
let string_array = StringArray::from(vec![
Expand All @@ -624,6 +624,19 @@ pub mod tests {
HashMap::new(),
)
.await;

// This is a regression testing the case where a page with X rows is split into Y parts
// where the number of parts is not evenly divisible by the number of rows. In this
// case we are splitting 90 rows into 4 parts.
let big_string = String::from_iter((0..(1000 * 1000)).map(|_| '0'));
let string_array = StringArray::from_iter_values((0..90).map(|_| big_string.clone()));

check_round_trip_encoding_of_data(
vec![Arc::new(string_array)],
&TestCases::default(),
HashMap::new(),
)
.await;
}

#[test_log::test(tokio::test)]
Expand Down

0 comments on commit a7e65f4

Please sign in to comment.