Skip to content

Commit

Permalink
Improve performance of filter_dict (#2063)
Browse files Browse the repository at this point in the history
* Optimize filter_dict

* For review
  • Loading branch information
viirya authored Jul 14, 2022
1 parent 88e0de5 commit 50e285f
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
22 changes: 22 additions & 0 deletions arrow/src/array/array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,28 @@ impl<K: ArrowPrimitiveType> DictionaryArray<K> {
Ok(array.into())
}

/// Create a new DictionaryArray directly from specified keys
/// (indexes into the dictionary) and values (dictionary)
/// array, and the corresponding ArrayData. This is used internally
/// for the usage like filter kernel.
///
/// # Safety
///
/// The input keys, values and data must form a valid DictionaryArray,
/// or undefined behavior can occur.
pub(crate) unsafe fn try_new_unchecked(
keys: PrimitiveArray<K>,
values: ArrayRef,
data: ArrayData,
) -> Self {
Self {
data,
keys,
values,
is_ordered: false,
}
}

/// Return an array view of the keys of this dictionary as a PrimitiveArray.
pub fn keys(&self) -> &PrimitiveArray<K> {
&self.keys
Expand Down
8 changes: 7 additions & 1 deletion arrow/src/compute/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,13 @@ where
)
};

DictionaryArray::<T>::from(data)
unsafe {
DictionaryArray::<T>::try_new_unchecked(
filtered_keys,
array.values().clone(),
data,
)
}
}

#[cfg(test)]
Expand Down

0 comments on commit 50e285f

Please sign in to comment.