Skip to content

Commit

Permalink
Remove StringOffsetTrait and BinaryOffsetTrait (apache#1645)
Browse files Browse the repository at this point in the history
* remove StringOffsetTrait

Signed-off-by: remzi <13716567376yh@gmail.com>

* remove BinaryOffsetTrait

Signed-off-by: remzi <13716567376yh@gmail.com>
  • Loading branch information
HaoYang670 authored May 6, 2022
1 parent 922bfe7 commit a38e460
Show file tree
Hide file tree
Showing 22 changed files with 174 additions and 231 deletions.
58 changes: 24 additions & 34 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,29 +35,23 @@ use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use crate::{buffer::MutableBuffer, datatypes::DataType};

/// Like [`OffsetSizeTrait`], but specialized for Binary.
/// This allow us to expose a constant datatype for the [`GenericBinaryArray`].
pub trait BinaryOffsetSizeTrait: OffsetSizeTrait {
const DATA_TYPE: DataType;
}

impl BinaryOffsetSizeTrait for i32 {
const DATA_TYPE: DataType = DataType::Binary;
}

impl BinaryOffsetSizeTrait for i64 {
const DATA_TYPE: DataType = DataType::LargeBinary;
}

/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
/// binary data.
pub struct GenericBinaryArray<OffsetSize: BinaryOffsetSizeTrait> {
pub struct GenericBinaryArray<OffsetSize: OffsetSizeTrait> {
data: ArrayData,
value_offsets: RawPtrBox<OffsetSize>,
value_data: RawPtrBox<u8>,
}

impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
pub fn get_data_type() -> DataType {
if OffsetSize::is_large() {
DataType::LargeBinary
} else {
DataType::Binary
}
}

/// Returns the length for value at index `i`.
#[inline]
pub fn value_length(&self, i: usize) -> OffsetSize {
Expand Down Expand Up @@ -155,7 +149,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
"BinaryArray can only be created from List<u8> arrays, mismatched data types."
);

let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE)
let mut builder = ArrayData::builder(Self::get_data_type())
.len(v.len())
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
Expand Down Expand Up @@ -195,7 +189,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
let array_data = ArrayData::builder(Self::get_data_type())
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
Expand Down Expand Up @@ -223,14 +217,14 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}
}

impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray<T> {
impl<'a, T: OffsetSizeTrait> GenericBinaryArray<T> {
/// constructs a new iterator
pub fn iter(&'a self) -> GenericBinaryIter<'a, T> {
GenericBinaryIter::<'a, T>::new(self)
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let prefix = if OffsetSize::is_large() { "Large" } else { "" };

Expand All @@ -242,7 +236,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> fmt::Debug for GenericBinaryArray<Offset
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> Array for GenericBinaryArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> Array for GenericBinaryArray<OffsetSize> {
fn as_any(&self) -> &dyn Any {
self
}
Expand All @@ -252,13 +246,11 @@ impl<OffsetSize: BinaryOffsetSizeTrait> Array for GenericBinaryArray<OffsetSize>
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayData>
for GenericBinaryArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericBinaryArray<OffsetSize> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
&<OffsetSize as BinaryOffsetSizeTrait>::DATA_TYPE,
&Self::get_data_type(),
"[Large]BinaryArray expects Datatype::[Large]Binary"
);
assert_eq!(
Expand All @@ -276,7 +268,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayData>
}
}

impl<Ptr, OffsetSize: BinaryOffsetSizeTrait> FromIterator<Option<Ptr>>
impl<Ptr, OffsetSize: OffsetSizeTrait> FromIterator<Option<Ptr>>
for GenericBinaryArray<OffsetSize>
where
Ptr: AsRef<[u8]>,
Expand Down Expand Up @@ -309,7 +301,7 @@ where

// calculate actual data_len, which may be different from the iterator's upper bound
let data_len = offsets.len() - 1;
let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
let array_data = ArrayData::builder(Self::get_data_type())
.len(data_len)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down Expand Up @@ -399,7 +391,7 @@ pub type BinaryArray = GenericBinaryArray<i32>;
///
pub type LargeBinaryArray = GenericBinaryArray<i64>;

impl<'a, T: BinaryOffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
type Item = Option<&'a [u8]>;
type IntoIter = GenericBinaryIter<'a, T>;

Expand All @@ -408,23 +400,21 @@ impl<'a, T: BinaryOffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<Option<&[u8]>>>
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>>
for GenericBinaryArray<OffsetSize>
{
fn from(v: Vec<Option<&[u8]>>) -> Self {
Self::from_opt_vec(v)
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> From<Vec<&[u8]>>
for GenericBinaryArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<Vec<&[u8]>> for GenericBinaryArray<OffsetSize> {
fn from(v: Vec<&[u8]>) -> Self {
Self::from_iter_values(v)
}
}

impl<T: BinaryOffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
impl<T: OffsetSizeTrait> From<GenericListArray<T>> for GenericBinaryArray<T> {
fn from(v: GenericListArray<T>) -> Self {
Self::from_list(v)
}
Expand Down Expand Up @@ -1295,7 +1285,7 @@ mod tests {
}
}

fn test_generic_binary_array_from_opt_vec<T: BinaryOffsetSizeTrait>() {
fn test_generic_binary_array_from_opt_vec<T: OffsetSizeTrait>() {
let values: Vec<Option<&[u8]>> =
vec![Some(b"one"), Some(b"two"), None, Some(b""), Some(b"three")];
let array = GenericBinaryArray::<T>::from_opt_vec(values);
Expand Down
62 changes: 25 additions & 37 deletions arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,25 @@ use crate::buffer::Buffer;
use crate::util::bit_util;
use crate::{buffer::MutableBuffer, datatypes::DataType};

/// Like [`OffsetSizeTrait`], but specialized for Strings.
/// This allow us to expose a constant datatype for the [`GenericStringArray`].
pub trait StringOffsetSizeTrait: OffsetSizeTrait {
const DATA_TYPE: DataType;
}

impl StringOffsetSizeTrait for i32 {
const DATA_TYPE: DataType = DataType::Utf8;
}

impl StringOffsetSizeTrait for i64 {
const DATA_TYPE: DataType = DataType::LargeUtf8;
}

/// Generic struct for \[Large\]StringArray
///
/// See [`StringArray`] and [`LargeStringArray`] for storing
/// specific string data.
pub struct GenericStringArray<OffsetSize: StringOffsetSizeTrait> {
pub struct GenericStringArray<OffsetSize: OffsetSizeTrait> {
data: ArrayData,
value_offsets: RawPtrBox<OffsetSize>,
value_data: RawPtrBox<u8>,
}

impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
pub fn get_data_type() -> DataType {
if OffsetSize::is_large() {
DataType::LargeUtf8
} else {
DataType::Utf8
}
}

/// Returns the length for the element at index `i`.
#[inline]
pub fn value_length(&self, i: usize) -> OffsetSize {
Expand Down Expand Up @@ -134,7 +128,7 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
"StringArray can only be created from List<u8> arrays, mismatched data types."
);

let mut builder = ArrayData::builder(OffsetSize::DATA_TYPE)
let mut builder = ArrayData::builder(Self::get_data_type())
.len(v.len())
.add_buffer(v.data().buffers()[0].clone())
.add_buffer(v.data().child_data()[0].buffers()[0].clone());
Expand Down Expand Up @@ -174,7 +168,7 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
assert!(!offsets.is_empty()); // wrote at least one
let actual_len = (offsets.len() / std::mem::size_of::<OffsetSize>()) - 1;

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
let array_data = ArrayData::builder(Self::get_data_type())
.len(actual_len)
.add_buffer(offsets.into())
.add_buffer(values.into());
Expand Down Expand Up @@ -202,7 +196,7 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
}
}

impl<'a, Ptr, OffsetSize: StringOffsetSizeTrait> FromIterator<&'a Option<Ptr>>
impl<'a, Ptr, OffsetSize: OffsetSizeTrait> FromIterator<&'a Option<Ptr>>
for GenericStringArray<OffsetSize>
where
Ptr: AsRef<str> + 'a,
Expand All @@ -216,7 +210,7 @@ where
}
}

impl<'a, Ptr, OffsetSize: StringOffsetSizeTrait> FromIterator<Option<Ptr>>
impl<'a, Ptr, OffsetSize: OffsetSizeTrait> FromIterator<Option<Ptr>>
for GenericStringArray<OffsetSize>
where
Ptr: AsRef<str>,
Expand Down Expand Up @@ -251,7 +245,7 @@ where

// calculate actual data_len, which may be different from the iterator's upper bound
let data_len = (offsets.len() / offset_size) - 1;
let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
let array_data = ArrayData::builder(Self::get_data_type())
.len(data_len)
.add_buffer(offsets.into())
.add_buffer(values.into())
Expand All @@ -261,7 +255,7 @@ where
}
}

impl<'a, T: StringOffsetSizeTrait> IntoIterator for &'a GenericStringArray<T> {
impl<'a, T: OffsetSizeTrait> IntoIterator for &'a GenericStringArray<T> {
type Item = Option<&'a str>;
type IntoIter = GenericStringIter<'a, T>;

Expand All @@ -270,14 +264,14 @@ impl<'a, T: StringOffsetSizeTrait> IntoIterator for &'a GenericStringArray<T> {
}
}

impl<'a, T: StringOffsetSizeTrait> GenericStringArray<T> {
impl<'a, T: OffsetSizeTrait> GenericStringArray<T> {
/// constructs a new iterator
pub fn iter(&'a self) -> GenericStringIter<'a, T> {
GenericStringIter::<'a, T>::new(self)
}
}

impl<OffsetSize: StringOffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let prefix = if OffsetSize::is_large() { "Large" } else { "" };

Expand All @@ -289,7 +283,7 @@ impl<OffsetSize: StringOffsetSizeTrait> fmt::Debug for GenericStringArray<Offset
}
}

impl<OffsetSize: StringOffsetSizeTrait> Array for GenericStringArray<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> Array for GenericStringArray<OffsetSize> {
fn as_any(&self) -> &dyn Any {
self
}
Expand All @@ -299,13 +293,11 @@ impl<OffsetSize: StringOffsetSizeTrait> Array for GenericStringArray<OffsetSize>
}
}

impl<OffsetSize: StringOffsetSizeTrait> From<ArrayData>
for GenericStringArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericStringArray<OffsetSize> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.data_type(),
&<OffsetSize as StringOffsetSizeTrait>::DATA_TYPE,
&Self::get_data_type(),
"[Large]StringArray expects Datatype::[Large]Utf8"
);
assert_eq!(
Expand All @@ -323,25 +315,21 @@ impl<OffsetSize: StringOffsetSizeTrait> From<ArrayData>
}
}

impl<OffsetSize: StringOffsetSizeTrait> From<Vec<Option<&str>>>
impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>>
for GenericStringArray<OffsetSize>
{
fn from(v: Vec<Option<&str>>) -> Self {
v.into_iter().collect()
}
}

impl<OffsetSize: StringOffsetSizeTrait> From<Vec<&str>>
for GenericStringArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<Vec<&str>> for GenericStringArray<OffsetSize> {
fn from(v: Vec<&str>) -> Self {
Self::from_iter_values(v)
}
}

impl<OffsetSize: StringOffsetSizeTrait> From<Vec<String>>
for GenericStringArray<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> From<Vec<String>> for GenericStringArray<OffsetSize> {
fn from(v: Vec<String>) -> Self {
Self::from_iter_values(v)
}
Expand Down Expand Up @@ -371,7 +359,7 @@ pub type StringArray = GenericStringArray<i32>;
/// ```
pub type LargeStringArray = GenericStringArray<i64>;

impl<T: StringOffsetSizeTrait> From<GenericListArray<T>> for GenericStringArray<T> {
impl<T: OffsetSizeTrait> From<GenericListArray<T>> for GenericStringArray<T> {
fn from(v: GenericListArray<T>) -> Self {
GenericStringArray::<T>::from_list(v)
}
Expand Down
12 changes: 4 additions & 8 deletions arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1165,9 +1165,7 @@ pub struct DecimalBuilder {
scale: usize,
}

impl<OffsetSize: BinaryOffsetSizeTrait> ArrayBuilder
for GenericBinaryBuilder<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
self
Expand Down Expand Up @@ -1199,9 +1197,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> ArrayBuilder
}
}

impl<OffsetSize: StringOffsetSizeTrait> ArrayBuilder
for GenericStringBuilder<OffsetSize>
{
impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericStringBuilder<OffsetSize> {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
self
Expand Down Expand Up @@ -1298,7 +1294,7 @@ impl ArrayBuilder for DecimalBuilder {
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
/// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize) -> Self {
Expand Down Expand Up @@ -1347,7 +1343,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
}
}

impl<OffsetSize: StringOffsetSizeTrait> GenericStringBuilder<OffsetSize> {
impl<OffsetSize: OffsetSizeTrait> GenericStringBuilder<OffsetSize> {
/// Creates a new `StringBuilder`,
/// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
pub fn new(capacity: usize) -> Self {
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray {

#[doc = "Force downcast ArrayRef to GenericBinaryArray"]
#[inline]
pub fn as_generic_binary_array<S: BinaryOffsetSizeTrait>(
pub fn as_generic_binary_array<S: OffsetSizeTrait>(
arr: &dyn Array,
) -> &GenericBinaryArray<S> {
arr.as_any()
Expand Down
Loading

0 comments on commit a38e460

Please sign in to comment.