-
Notifications
You must be signed in to change notification settings - Fork 1k
Support more operations on ListView #8645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
95be754
feat: improved ListView support
a10y edc98a6
fix ArrayData validation for ListView
a10y a3b821b
more fixes, add test
a10y eefd289
test for concat
a10y d7e7846
fix + tests for take list_view
a10y b751907
remove unused test macro
a10y 124b437
fix license headers
a10y 278f297
remove old comment
a10y c22b4a7
address comments
a10y 722f34e
more tests, more clippy
a10y f3f8064
reduce
a10y d0da6c5
Add test for equality kernel
a10y 5ac42a8
use lhs_range_sizes to account for lhs_start/rhs_start
a10y 3cf2342
add test for equal_ranges kernel over ListView
a10y File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use crate::ArrayData; | ||
| use crate::data::count_nulls; | ||
| use crate::equal::equal_values; | ||
| use arrow_buffer::ArrowNativeType; | ||
| use num_integer::Integer; | ||
|
|
||
| pub(super) fn list_view_equal<T: ArrowNativeType + Integer>( | ||
| lhs: &ArrayData, | ||
| rhs: &ArrayData, | ||
| lhs_start: usize, | ||
| rhs_start: usize, | ||
| len: usize, | ||
| ) -> bool { | ||
| let lhs_offsets = lhs.buffer::<T>(0); | ||
| let lhs_sizes = lhs.buffer::<T>(1); | ||
|
|
||
| let rhs_offsets = rhs.buffer::<T>(0); | ||
| let rhs_sizes = rhs.buffer::<T>(1); | ||
|
|
||
| let lhs_data = &lhs.child_data()[0]; | ||
| let rhs_data = &rhs.child_data()[0]; | ||
|
|
||
| let lhs_null_count = count_nulls(lhs.nulls(), lhs_start, len); | ||
| let rhs_null_count = count_nulls(rhs.nulls(), rhs_start, len); | ||
|
|
||
| if lhs_null_count != rhs_null_count { | ||
| return false; | ||
| } | ||
|
|
||
| if lhs_null_count == 0 { | ||
| // non-null pathway: all sizes must be equal, and all values must be equal | ||
| let lhs_range_sizes = &lhs_sizes[lhs_start..lhs_start + len]; | ||
| let rhs_range_sizes = &rhs_sizes[rhs_start..rhs_start + len]; | ||
|
|
||
| if lhs_range_sizes.len() != rhs_range_sizes.len() { | ||
| return false; | ||
| } | ||
|
|
||
a10y marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if lhs_range_sizes != rhs_range_sizes { | ||
| return false; | ||
| } | ||
|
|
||
| // Check values for equality | ||
| let lhs_range_offsets = &lhs_offsets[lhs_start..lhs_start + len]; | ||
| let rhs_range_offsets = &rhs_offsets[rhs_start..rhs_start + len]; | ||
|
|
||
| if lhs_range_offsets.len() != rhs_range_offsets.len() { | ||
| return false; | ||
| } | ||
|
|
||
| for ((&lhs_offset, &rhs_offset), &size) in lhs_range_offsets | ||
| .iter() | ||
| .zip(rhs_range_offsets) | ||
| .zip(lhs_range_sizes) | ||
| { | ||
| let lhs_offset = lhs_offset.to_usize().unwrap(); | ||
| let rhs_offset = rhs_offset.to_usize().unwrap(); | ||
| let size = size.to_usize().unwrap(); | ||
|
|
||
| // Check if offsets are valid for the given range | ||
| if !equal_values(lhs_data, rhs_data, lhs_offset, rhs_offset, size) { | ||
| return false; | ||
| } | ||
| } | ||
| } else { | ||
| // Need to integrate validity check in the inner loop. | ||
| // non-null pathway: all sizes must be equal, and all values must be equal | ||
| let lhs_range_sizes = &lhs_sizes[lhs_start..lhs_start + len]; | ||
| let rhs_range_sizes = &rhs_sizes[rhs_start..rhs_start + len]; | ||
|
|
||
| let lhs_nulls = lhs.nulls().unwrap().slice(lhs_start, len); | ||
| let rhs_nulls = rhs.nulls().unwrap().slice(rhs_start, len); | ||
|
|
||
| // Sizes can differ if values are null | ||
| if lhs_range_sizes.len() != rhs_range_sizes.len() { | ||
a10y marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return false; | ||
| } | ||
|
|
||
| // Check values for equality, with null checking | ||
| let lhs_range_offsets = &lhs_offsets[lhs_start..lhs_start + len]; | ||
| let rhs_range_offsets = &rhs_offsets[rhs_start..rhs_start + len]; | ||
|
|
||
| if lhs_range_offsets.len() != rhs_range_offsets.len() { | ||
| return false; | ||
| } | ||
|
|
||
| for (index, ((&lhs_offset, &rhs_offset), &size)) in lhs_range_offsets | ||
| .iter() | ||
| .zip(rhs_range_offsets) | ||
| .zip(lhs_range_sizes) | ||
| .enumerate() | ||
| { | ||
| let lhs_is_null = lhs_nulls.is_null(index); | ||
| let rhs_is_null = rhs_nulls.is_null(index); | ||
|
|
||
| if lhs_is_null != rhs_is_null { | ||
| return false; | ||
| } | ||
|
|
||
| let lhs_offset = lhs_offset.to_usize().unwrap(); | ||
| let rhs_offset = rhs_offset.to_usize().unwrap(); | ||
| let size = size.to_usize().unwrap(); | ||
|
|
||
| // Check if values match in the range | ||
| if !lhs_is_null && !equal_values(lhs_data, rhs_data, lhs_offset, rhs_offset, size) { | ||
| return false; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| true | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| use crate::ArrayData; | ||
| use crate::transform::_MutableArrayData; | ||
| use arrow_buffer::ArrowNativeType; | ||
| use num_integer::Integer; | ||
| use num_traits::CheckedAdd; | ||
|
|
||
| pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>( | ||
| array: &ArrayData, | ||
| ) -> crate::transform::Extend<'_> { | ||
| let offsets = array.buffer::<T>(0); | ||
| let sizes = array.buffer::<T>(1); | ||
| Box::new( | ||
| move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| { | ||
| let offset_buffer = &mut mutable.buffer1; | ||
| let sizes_buffer = &mut mutable.buffer2; | ||
|
|
||
| for &offset in &offsets[start..start + len] { | ||
| offset_buffer.push(offset); | ||
| } | ||
|
|
||
| // sizes | ||
| for &size in &sizes[start..start + len] { | ||
| sizes_buffer.push(size); | ||
| } | ||
|
|
||
| // the beauty of views is that we don't need to copy child_data, we just splat | ||
| // the offsets and sizes. | ||
a10y marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }, | ||
| ) | ||
| } | ||
|
|
||
| pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) { | ||
| let offset_buffer = &mut mutable.buffer1; | ||
| let sizes_buffer = &mut mutable.buffer2; | ||
|
|
||
| // We push 0 as a placeholder for NULL values in both the offsets and sizes | ||
| (0..len).for_each(|_| offset_buffer.push(T::default())); | ||
| (0..len).for_each(|_| sizes_buffer.push(T::default())); | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was a bug before. You can verify this by construction a list_view_array and then doing
list_view_array.to_data().into_builder().build().unwrap()and it will panic, because values_length is the length of the inner values not of the list itself.