Skip to content

Commit 3065079

Browse files
committed
Add cast support for (Large)ListView <-> (Large)List
1 parent e9ea12b commit 3065079

File tree

3 files changed

+331
-3
lines changed

3 files changed

+331
-3
lines changed

arrow-array/src/array/list_view_array.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ use std::sync::Arc;
2424

2525
use crate::array::{make_array, print_long_array};
2626
use crate::iterator::GenericListViewArrayIter;
27-
use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait, new_empty_array};
27+
use crate::{
28+
Array, ArrayAccessor, ArrayRef, FixedSizeListArray, GenericListArray, OffsetSizeTrait,
29+
new_empty_array,
30+
};
2831

2932
/// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
3033
pub type ListViewArray = GenericListViewArray<i32>;
@@ -454,6 +457,36 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<Offse
454457
}
455458
}
456459

460+
impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
461+
for GenericListViewArray<OffsetSize>
462+
{
463+
fn from(value: GenericListArray<OffsetSize>) -> Self {
464+
let field = match value.data_type() {
465+
DataType::List(f) | DataType::LargeList(f) => f.clone(),
466+
_ => panic!(
467+
"Expected infallible creation of GenericListViewArray from GenericList failed"
468+
),
469+
};
470+
471+
let offsets = value.value_offsets();
472+
let len = offsets.len() - 1;
473+
let mut sizes = Vec::with_capacity(len);
474+
let mut view_offsets = Vec::with_capacity(len);
475+
for (i, offset) in offsets.iter().enumerate().take(len) {
476+
view_offsets.push(*offset);
477+
sizes.push(value.value_length(i));
478+
}
479+
480+
Self::new(
481+
field,
482+
ScalarBuffer::from(view_offsets),
483+
ScalarBuffer::from(sizes),
484+
value.values().clone(),
485+
value.nulls().cloned(),
486+
)
487+
}
488+
}
489+
457490
impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
458491
fn from(array: GenericListViewArray<OffsetSize>) -> Self {
459492
let len = array.len();

arrow-cast/src/cast/list.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ pub(crate) fn cast_single_element_fixed_size_list_to_values(
5050
cast_with_options(values, to, cast_options)
5151
}
5252

53+
pub(crate) fn cast_list_to_list_view<OffsetSize>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
54+
where
55+
OffsetSize: OffsetSizeTrait,
56+
{
57+
let list = array.as_list::<OffsetSize>();
58+
let list_view: GenericListViewArray<OffsetSize> = list.clone().into();
59+
Ok(Arc::new(list_view))
60+
}
61+
5362
pub(crate) fn cast_fixed_size_list_to_list<OffsetSize>(
5463
array: &dyn Array,
5564
) -> Result<ArrayRef, ArrowError>
@@ -160,6 +169,40 @@ pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
160169
)?))
161170
}
162171

172+
/// Helper function to cast the values in a list view to a list
173+
pub(crate) fn cast_list_view_values<O: OffsetSizeTrait>(
174+
array: &dyn Array,
175+
to: &FieldRef,
176+
cast_options: &CastOptions,
177+
) -> Result<ArrayRef, ArrowError> {
178+
let list_view = array.as_list_view::<O>();
179+
let list_view_offsets = list_view.offsets();
180+
let sizes = list_view.sizes();
181+
let source_values = list_view.values();
182+
let mut indices = Vec::with_capacity(list_view.values().len());
183+
let mut offsets = Vec::with_capacity(list_view.len() + 1);
184+
offsets.push(O::usize_as(0));
185+
for i in 0..list_view.len() {
186+
let offset = list_view_offsets[i].as_usize();
187+
let size = sizes[i].as_usize();
188+
let end = offset + size;
189+
for j in offset..end {
190+
indices.push(j as i32);
191+
}
192+
offsets.push(O::usize_as(indices.len()));
193+
}
194+
let indices_array = Int32Array::from(indices);
195+
let values = arrow_select::take::take(source_values, &indices_array, None)?;
196+
let values = cast_with_options(&values, to.data_type(), cast_options)?;
197+
let offsets = OffsetBuffer::new(offsets.into());
198+
Ok(Arc::new(GenericListArray::<O>::try_new(
199+
to.clone(),
200+
offsets,
201+
values,
202+
list_view.nulls().cloned(),
203+
)?))
204+
}
205+
163206
/// Cast the container type of List/Largelist array along with the inner datatype
164207
pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
165208
array: &dyn Array,

arrow-cast/src/cast/mod.rs

Lines changed: 254 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,20 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
153153
(List(list_from) | LargeList(list_from), FixedSizeList(list_to, _)) => {
154154
can_cast_types(list_from.data_type(), list_to.data_type())
155155
}
156+
(List(list_from), ListView(list_to)) => {
157+
can_cast_types(list_from.data_type(), list_to.data_type())
158+
}
156159
(List(_), _) => false,
160+
(ListView(list_from), List(list_to)) => {
161+
can_cast_types(list_from.data_type(), list_to.data_type())
162+
}
163+
(LargeListView(list_from), LargeList(list_to)) => {
164+
can_cast_types(list_from.data_type(), list_to.data_type())
165+
}
166+
(LargeList(list_from), LargeListView(list_to)) => {
167+
can_cast_types(list_from.data_type(), list_to.data_type())
168+
}
169+
157170
(FixedSizeList(list_from, _), List(list_to))
158171
| (FixedSizeList(list_from, _), LargeList(list_to)) => {
159172
can_cast_types(list_from.data_type(), list_to.data_type())
@@ -864,9 +877,40 @@ pub fn cast_with_options(
864877
let array = array.as_list::<i64>();
865878
cast_list_to_fixed_size_list::<i64>(array, field, *size, cast_options)
866879
}
867-
(List(_) | LargeList(_), _) => match to_type {
880+
(ListView(_), List(_)) => match to_type {
881+
List(list_to) => cast_list_view_values::<i32>(array, list_to, cast_options),
882+
_ => Err(ArrowError::CastError(
883+
"Cannot cast list view to non-list data types".to_string(),
884+
)),
885+
},
886+
(LargeListView(_), LargeList(_)) => match to_type {
887+
LargeList(list_to) => cast_list_view_values::<i64>(array, list_to, cast_options),
888+
_ => Err(ArrowError::CastError(
889+
"Cannot cast list view to non-list data types".to_string(),
890+
)),
891+
},
892+
(List(list_from) | LargeList(list_from), _) => match to_type {
868893
Utf8 => value_to_string::<i32>(array, cast_options),
869894
LargeUtf8 => value_to_string::<i64>(array, cast_options),
895+
ListView(list_to) => {
896+
if list_to.data_type() != list_from.data_type() {
897+
Err(ArrowError::CastError(
898+
"Cannot cast list to list view with different inner type".to_string(),
899+
))
900+
} else {
901+
cast_list_to_list_view::<i32>(array)
902+
}
903+
}
904+
LargeListView(list_to) => {
905+
if list_to.data_type() != list_from.data_type() {
906+
Err(ArrowError::CastError(
907+
"Cannot cast large list to large list view with different inner type"
908+
.to_string(),
909+
))
910+
} else {
911+
cast_list_to_list_view::<i64>(array)
912+
}
913+
}
870914
_ => Err(ArrowError::CastError(
871915
"Cannot cast list to non-list data types".to_string(),
872916
)),
@@ -2677,8 +2721,8 @@ mod tests {
26772721
use super::*;
26782722
use DataType::*;
26792723
use arrow_array::{Int64Array, RunArray, StringArray};
2680-
use arrow_buffer::i256;
26812724
use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer};
2725+
use arrow_buffer::{ScalarBuffer, i256};
26822726
use arrow_schema::{DataType, Field};
26832727
use chrono::NaiveDate;
26842728
use half::f16;
@@ -11866,4 +11910,212 @@ mod tests {
1186611910
// Verify the run-ends were cast correctly (run ends at 3, 6, 9)
1186711911
assert_eq!(run_array.run_ends().values(), &[3i64, 6i64, 9i64]);
1186811912
}
11913+
11914+
#[test]
11915+
fn test_cast_list_view_to_list() {
11916+
let list_view = ListViewArray::new(
11917+
Arc::new(Field::new("a", DataType::Int32, false)),
11918+
ScalarBuffer::from(vec![0, 3, 6]),
11919+
ScalarBuffer::from(vec![3, 3, 3]),
11920+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
11921+
None,
11922+
);
11923+
let cast_result = cast(
11924+
&list_view,
11925+
&DataType::List(Arc::new(Field::new("a", DataType::Int32, false))),
11926+
)
11927+
.unwrap();
11928+
let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
11929+
11930+
let mut offsets = OffsetBufferBuilder::new(0);
11931+
offsets.push_length(3);
11932+
offsets.push_length(3);
11933+
offsets.push_length(3);
11934+
let expected_list = ListArray::new(
11935+
Arc::new(Field::new("a", DataType::Int32, false)),
11936+
offsets.finish(),
11937+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
11938+
None,
11939+
);
11940+
assert_eq!(got_list, &expected_list);
11941+
}
11942+
11943+
#[test]
11944+
fn test_cast_list_to_list_view() {
11945+
let mut offsets = OffsetBufferBuilder::new(3);
11946+
offsets.push_length(3);
11947+
offsets.push_length(3);
11948+
offsets.push_length(3);
11949+
let list = ListArray::new(
11950+
Arc::new(Field::new("a", DataType::Int32, false)),
11951+
offsets.finish(),
11952+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
11953+
None,
11954+
);
11955+
11956+
let expected_list_view = ListViewArray::new(
11957+
Arc::new(Field::new("a", DataType::Int32, false)),
11958+
ScalarBuffer::from(vec![0, 3, 6]),
11959+
ScalarBuffer::from(vec![3, 3, 3]),
11960+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
11961+
None,
11962+
);
11963+
let cast_result = cast(
11964+
&list,
11965+
&DataType::ListView(Arc::new(Field::new("a", DataType::Int32, false))),
11966+
)
11967+
.unwrap();
11968+
11969+
let got_list_view = cast_result
11970+
.as_any()
11971+
.downcast_ref::<ListViewArray>()
11972+
.unwrap();
11973+
assert_eq!(got_list_view, &expected_list_view);
11974+
}
11975+
11976+
#[test]
11977+
fn test_cast_large_list_view_to_large_list() {
11978+
let list_view = LargeListViewArray::new(
11979+
Arc::new(Field::new("a", DataType::Int32, false)),
11980+
ScalarBuffer::from(vec![0, 3, 6]),
11981+
ScalarBuffer::from(vec![3, 3, 3]),
11982+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
11983+
None,
11984+
);
11985+
let cast_result = cast(
11986+
&list_view,
11987+
&DataType::LargeList(Arc::new(Field::new("a", DataType::Int32, false))),
11988+
)
11989+
.unwrap();
11990+
let got_list = cast_result
11991+
.as_any()
11992+
.downcast_ref::<LargeListArray>()
11993+
.unwrap();
11994+
11995+
let mut offsets = OffsetBufferBuilder::new(0);
11996+
offsets.push_length(3);
11997+
offsets.push_length(3);
11998+
offsets.push_length(3);
11999+
let expected_list = LargeListArray::new(
12000+
Arc::new(Field::new("a", DataType::Int32, false)),
12001+
offsets.finish(),
12002+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
12003+
None,
12004+
);
12005+
assert_eq!(got_list, &expected_list);
12006+
}
12007+
12008+
#[test]
12009+
fn test_cast_large_list_to_large_list_view() {
12010+
let mut offsets = OffsetBufferBuilder::new(3);
12011+
offsets.push_length(3);
12012+
offsets.push_length(3);
12013+
offsets.push_length(3);
12014+
let list = LargeListArray::new(
12015+
Arc::new(Field::new("a", DataType::Int32, false)),
12016+
offsets.finish(),
12017+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
12018+
None,
12019+
);
12020+
12021+
let expected_list_view = LargeListViewArray::new(
12022+
Arc::new(Field::new("a", DataType::Int32, false)),
12023+
ScalarBuffer::from(vec![0, 3, 6]),
12024+
ScalarBuffer::from(vec![3, 3, 3]),
12025+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
12026+
None,
12027+
);
12028+
let cast_result = cast(
12029+
&list,
12030+
&DataType::LargeListView(Arc::new(Field::new("a", DataType::Int32, false))),
12031+
)
12032+
.unwrap();
12033+
12034+
let got_list_view = cast_result
12035+
.as_any()
12036+
.downcast_ref::<LargeListViewArray>()
12037+
.unwrap();
12038+
assert_eq!(got_list_view, &expected_list_view);
12039+
}
12040+
12041+
#[test]
12042+
fn test_cast_list_view_to_list_out_of_order() {
12043+
let list_view = ListViewArray::new(
12044+
Arc::new(Field::new("a", DataType::Int32, false)),
12045+
ScalarBuffer::from(vec![0, 6, 3]),
12046+
ScalarBuffer::from(vec![3, 3, 3]),
12047+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
12048+
None,
12049+
);
12050+
let cast_result = cast(
12051+
&list_view,
12052+
&DataType::List(Arc::new(Field::new("a", DataType::Int32, false))),
12053+
)
12054+
.unwrap();
12055+
let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
12056+
let mut offsets = OffsetBufferBuilder::new(0);
12057+
offsets.push_length(3);
12058+
offsets.push_length(3);
12059+
offsets.push_length(3);
12060+
let expected_list = ListArray::new(
12061+
Arc::new(Field::new("a", DataType::Int32, false)),
12062+
offsets.finish(),
12063+
Arc::new(Int32Array::from(vec![1, 2, 3, 7, 8, 9, 4, 5, 6])),
12064+
None,
12065+
);
12066+
assert_eq!(got_list, &expected_list);
12067+
}
12068+
12069+
#[test]
12070+
fn test_cast_list_view_to_list_overlapping() {
12071+
let list_view = ListViewArray::new(
12072+
Arc::new(Field::new("a", DataType::Int32, false)),
12073+
ScalarBuffer::from(vec![0, 0]),
12074+
ScalarBuffer::from(vec![1, 2]),
12075+
Arc::new(Int32Array::from(vec![1, 2])),
12076+
None,
12077+
);
12078+
let cast_result = cast(
12079+
&list_view,
12080+
&DataType::List(Arc::new(Field::new("a", DataType::Int32, false))),
12081+
)
12082+
.unwrap();
12083+
let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
12084+
let mut offsets = OffsetBufferBuilder::new(0);
12085+
offsets.push_length(1);
12086+
offsets.push_length(2);
12087+
let expected_list = ListArray::new(
12088+
Arc::new(Field::new("a", DataType::Int32, false)),
12089+
offsets.finish(),
12090+
Arc::new(Int32Array::from(vec![1, 1, 2])),
12091+
None,
12092+
);
12093+
assert_eq!(got_list, &expected_list);
12094+
}
12095+
12096+
#[test]
12097+
fn test_cast_list_view_to_list_empty() {
12098+
let empty_array: Vec<i32> = vec![];
12099+
let list_view = ListViewArray::new(
12100+
Arc::new(Field::new("a", DataType::Int32, false)),
12101+
ScalarBuffer::from(vec![]),
12102+
ScalarBuffer::from(vec![]),
12103+
Arc::new(Int32Array::from(empty_array.clone())),
12104+
None,
12105+
);
12106+
let cast_result = cast(
12107+
&list_view,
12108+
&DataType::List(Arc::new(Field::new("a", DataType::Int32, false))),
12109+
)
12110+
.unwrap();
12111+
let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
12112+
let offsets = OffsetBuffer::new_empty();
12113+
let expected_list = ListArray::new(
12114+
Arc::new(Field::new("a", DataType::Int32, false)),
12115+
offsets,
12116+
Arc::new(Int32Array::from(empty_array.clone())),
12117+
None,
12118+
);
12119+
assert_eq!(got_list, &expected_list);
12120+
}
1186912121
}

0 commit comments

Comments
 (0)