Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set nulls correctly for all type of arrays/vectors #344

Merged
merged 8 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 51 additions & 21 deletions crates/duckdb/src/vtab/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,13 +262,7 @@ pub fn record_batch_to_duckdb_data_chunk(
fn primitive_array_to_flat_vector<T: ArrowPrimitiveType>(array: &PrimitiveArray<T>, out_vector: &mut FlatVector) {
// assert!(array.len() <= out_vector.capacity());
out_vector.copy::<T::Native>(array.values());
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.into_iter().enumerate() {
if !null {
out_vector.set_null(i);
}
}
}
set_nulls_in_flat_vector(array, out_vector);
}

fn primitive_array_to_flat_vector_cast<T: ArrowPrimitiveType>(
Expand All @@ -279,13 +273,7 @@ fn primitive_array_to_flat_vector_cast<T: ArrowPrimitiveType>(
let array = arrow::compute::kernels::cast::cast(array, &data_type).unwrap();
let out_vector: &mut FlatVector = out_vector.as_mut_any().downcast_mut().unwrap();
out_vector.copy::<T::Native>(array.as_primitive::<T>().values());
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.iter().enumerate() {
if !null {
out_vector.set_null(i);
}
}
}
set_nulls_in_flat_vector(&array, out_vector);
}

fn primitive_array_to_vector(array: &dyn Array, out: &mut dyn Vector) -> Result<(), Box<dyn std::error::Error>> {
Expand Down Expand Up @@ -435,13 +423,7 @@ fn decimal_array_to_vector(array: &Decimal128Array, out: &mut FlatVector, width:
}

// Set nulls
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.into_iter().enumerate() {
if !null {
out.set_null(i);
}
}
}
set_nulls_in_flat_vector(array, out);
}

/// Convert Arrow [BooleanArray] to a duckdb vector.
Expand All @@ -451,6 +433,7 @@ fn boolean_array_to_vector(array: &BooleanArray, out: &mut FlatVector) {
for i in 0..array.len() {
out.as_mut_slice()[i] = array.value(i);
}
set_nulls_in_flat_vector(array, out);
}

fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) {
Expand All @@ -461,6 +444,7 @@ fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) {
let s = array.value(i);
out.insert(i, s);
}
set_nulls_in_flat_vector(array, out);
}

fn binary_array_to_vector(array: &BinaryArray, out: &mut FlatVector) {
Expand All @@ -470,6 +454,7 @@ fn binary_array_to_vector(array: &BinaryArray, out: &mut FlatVector) {
let s = array.value(i);
out.insert(i, s);
}
set_nulls_in_flat_vector(array, out);
}

fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
Expand Down Expand Up @@ -498,6 +483,8 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
let length = array.value_length(i);
out.set_entry(i, offset.as_(), length.as_());
}
set_nulls_in_list_vector(array, out);

Ok(())
}

Expand All @@ -522,6 +509,8 @@ fn fixed_size_list_array_to_vector(
}
}

set_nulls_in_array_vector(array, out);

Ok(())
}

Expand Down Expand Up @@ -569,6 +558,7 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) -> Result
}
}
}
set_nulls_in_struct_vector(array, out);
Ok(())
}

Expand Down Expand Up @@ -605,6 +595,46 @@ pub fn arrow_ffi_to_query_params(array: FFI_ArrowArray, schema: FFI_ArrowSchema)
[arr as *mut _ as usize, sch as *mut _ as usize]
}

fn set_nulls_in_flat_vector(array: &dyn Array, out_vector: &mut FlatVector) {
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.into_iter().enumerate() {
if !null {
out_vector.set_null(i);
}
}
}
}

fn set_nulls_in_struct_vector(array: &dyn Array, out_vector: &mut StructVector) {
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.into_iter().enumerate() {
if !null {
out_vector.set_null(i);
}
}
}
}

fn set_nulls_in_array_vector(array: &dyn Array, out_vector: &mut ArrayVector) {
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.into_iter().enumerate() {
if !null {
out_vector.set_null(i);
}
}
}
}

fn set_nulls_in_list_vector(array: &dyn Array, out_vector: &mut ListVector) {
if let Some(nulls) = array.nulls() {
for (i, null) in nulls.into_iter().enumerate() {
if !null {
out_vector.set_null(i);
}
}
}
}

#[cfg(test)]
mod test {
use super::{arrow_recordbatch_to_query_params, ArrowVTab};
Expand Down
29 changes: 27 additions & 2 deletions crates/duckdb/src/vtab/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,15 @@ impl ListVector {
self.entries.as_mut_slice::<duckdb_list_entry>()[idx].length = length as u64;
}

/// Set row as null
pub fn set_null(&mut self, row: usize) {
unsafe {
duckdb_vector_ensure_validity_writable(self.entries.ptr);
let idx = duckdb_vector_get_validity(self.entries.ptr);
duckdb_validity_set_row_invalid(idx, row as u64);
}
}

/// Reserve the capacity for its child node.
fn reserve(&self, capacity: usize) {
unsafe {
Expand All @@ -190,7 +199,6 @@ impl ListVector {

/// A array vector. (fixed-size list)
pub struct ArrayVector {
/// ArrayVector does not own the vector pointer.
ptr: duckdb_vector,
}

Expand Down Expand Up @@ -222,11 +230,19 @@ impl ArrayVector {
pub fn set_child<T: Copy>(&self, data: &[T]) {
self.child(data.len()).copy(data);
}

/// Set row as null
pub fn set_null(&mut self, row: usize) {
unsafe {
duckdb_vector_ensure_validity_writable(self.ptr);
let idx = duckdb_vector_get_validity(self.ptr);
duckdb_validity_set_row_invalid(idx, row as u64);
}
}
}

/// A struct vector.
pub struct StructVector {
/// ListVector does not own the vector pointer.
ptr: duckdb_vector,
}

Expand Down Expand Up @@ -279,4 +295,13 @@ impl StructVector {
let logical_type = self.logical_type();
unsafe { duckdb_struct_type_child_count(logical_type.ptr) as usize }
}

y-f-u marked this conversation as resolved.
Show resolved Hide resolved
/// Set row as null
pub fn set_null(&mut self, row: usize) {
unsafe {
duckdb_vector_ensure_validity_writable(self.ptr);
let idx = duckdb_vector_get_validity(self.ptr);
duckdb_validity_set_row_invalid(idx, row as u64);
}
}
}
Loading