From e5134f902fe6c56cde03181d8d9cd77616e42caf Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Mon, 3 Jan 2022 02:13:22 +0100 Subject: [PATCH 1/4] upgrades for datafusion 6.0.0 --- src/array/fixed_size_binary/mutable.rs | 21 +++++++++++---------- src/array/mod.rs | 2 ++ src/datatypes/field.rs | 2 +- src/datatypes/mod.rs | 8 ++++---- src/datatypes/physical_type.rs | 2 +- src/record_batch.rs | 25 +++++++++++++++++++++++++ 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index d7b28883c85..a1166492a02 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -162,6 +162,15 @@ impl MutableFixedSizeBinaryArray { validity.shrink_to_fit() } } + + /// Creates an owned [`FixedSizeBinaryArray`] from current data + pub fn as_fixed_size_array(&mut self) -> FixedSizeBinaryArray { + FixedSizeBinaryArray::from_data( + DataType::FixedSizeBinary(self.size), + std::mem::take(&mut self.values).into(), + std::mem::take(&mut self.validity).map(|x| x.into()), + ) + } } /// Accessors @@ -187,19 +196,11 @@ impl MutableArray for MutableFixedSizeBinaryArray { } fn as_box(&mut self) -> Box { - Box::new(FixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(self.size), - std::mem::take(&mut self.values).into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + Box::new(self.as_fixed_size_array()) } fn as_arc(&mut self) -> Arc { - Arc::new(FixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(self.size), - std::mem::take(&mut self.values).into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + Arc::new(self.as_fixed_size_array()) } fn data_type(&self) -> &DataType { diff --git a/src/array/mod.rs b/src/array/mod.rs index 06dc571d427..f83762b49bd 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -357,6 +357,7 @@ mod equal; mod ffi; pub mod growable; pub mod ord; +//mod decimal; pub use display::get_display; pub use equal::equal; @@ -374,6 +375,7 @@ pub use primitive::*; pub use struct_::StructArray; pub use union::UnionArray; pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter}; +//pub use decimal::{MutableDecimalArray, DecimalArray}; pub(crate) use self::ffi::offset_buffers_children_dictionary; pub(crate) use self::ffi::FromFfi; diff --git a/src/datatypes/field.rs b/src/datatypes/field.rs index fad96c7d40a..ec52e733e3d 100644 --- a/src/datatypes/field.rs +++ b/src/datatypes/field.rs @@ -3,7 +3,7 @@ use crate::error::{ArrowError, Result}; use super::{DataType, Metadata}; /// Represents the metadata of a "column". -#[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)] pub struct Field { /// Its name pub name: String, diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index 8d9de83d583..ea66fd67e98 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -24,7 +24,7 @@ pub(crate) type Extension = Option<(String, Option)>; /// which declares the in-memory representation of data. /// The [`DataType::Extension`] is special in that it augments a [`DataType`] with metadata to support custom types. /// Use `to_logical_type` to desugar such type and return its correspoding logical type. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum DataType { /// Null type Null, @@ -151,7 +151,7 @@ pub enum DataType { } /// Mode of [`DataType::Union`] -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum UnionMode { /// Dense union Dense, @@ -182,7 +182,7 @@ impl UnionMode { } /// The time units defined in Arrow. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum TimeUnit { /// Time in seconds. Second, @@ -195,7 +195,7 @@ pub enum TimeUnit { } /// Interval units defined in Arrow -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum IntervalUnit { /// The number of elapsed whole months. YearMonth, diff --git a/src/datatypes/physical_type.rs b/src/datatypes/physical_type.rs index 7e15cb19629..e8a9153a2ca 100644 --- a/src/datatypes/physical_type.rs +++ b/src/datatypes/physical_type.rs @@ -50,7 +50,7 @@ impl PhysicalType { /// the set of valid indices types of a dictionary-encoded Array. /// Each type corresponds to a variant of [`crate::array::DictionaryArray`]. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum IntegerType { /// A signed 8-bit integer. Int8, diff --git a/src/record_batch.rs b/src/record_batch.rs index 369e74a0481..a8448c15847 100644 --- a/src/record_batch.rs +++ b/src/record_batch.rs @@ -297,6 +297,31 @@ impl RecordBatch { let Self { columns, schema } = self; (columns, schema) } + + /// Return a new RecordBatch where each column is sliced + /// according to `offset` and `length` + /// + /// # Panics + /// + /// Panics if `offset` with `length` is greater than column length. + pub fn slice(&self, offset: usize, length: usize) -> RecordBatch { + if self.schema.fields().is_empty() { + assert!((offset + length) == 0); + return RecordBatch::new_empty(self.schema.clone()); + } + assert!((offset + length) <= self.num_rows()); + + let columns = self + .columns() + .iter() + .map(|column| column.slice(offset, length).into()) + .collect(); + + Self { + schema: self.schema.clone(), + columns, + } + } } /// Options that control the behaviour used when creating a [`RecordBatch`]. From 995c9391fb3cdd8bcd8d8918a5497da1a81b3acd Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Mon, 3 Jan 2022 02:17:28 +0100 Subject: [PATCH 2/4] remove references to decimal --- src/array/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/array/mod.rs b/src/array/mod.rs index f83762b49bd..06dc571d427 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -357,7 +357,6 @@ mod equal; mod ffi; pub mod growable; pub mod ord; -//mod decimal; pub use display::get_display; pub use equal::equal; @@ -375,7 +374,6 @@ pub use primitive::*; pub use struct_::StructArray; pub use union::UnionArray; pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter}; -//pub use decimal::{MutableDecimalArray, DecimalArray}; pub(crate) use self::ffi::offset_buffers_children_dictionary; pub(crate) use self::ffi::FromFfi; From 55d01ec579d6300d92131ec2c04eab1b654eb78d Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Mon, 3 Jan 2022 02:20:09 +0100 Subject: [PATCH 3/4] remove slice test function --- src/record_batch.rs | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/src/record_batch.rs b/src/record_batch.rs index a8448c15847..369e74a0481 100644 --- a/src/record_batch.rs +++ b/src/record_batch.rs @@ -297,31 +297,6 @@ impl RecordBatch { let Self { columns, schema } = self; (columns, schema) } - - /// Return a new RecordBatch where each column is sliced - /// according to `offset` and `length` - /// - /// # Panics - /// - /// Panics if `offset` with `length` is greater than column length. - pub fn slice(&self, offset: usize, length: usize) -> RecordBatch { - if self.schema.fields().is_empty() { - assert!((offset + length) == 0); - return RecordBatch::new_empty(self.schema.clone()); - } - assert!((offset + length) <= self.num_rows()); - - let columns = self - .columns() - .iter() - .map(|column| column.slice(offset, length).into()) - .collect(); - - Self { - schema: self.schema.clone(), - columns, - } - } } /// Options that control the behaviour used when creating a [`RecordBatch`]. From b67e9b446e40974a711cc6a672e831f2ba6b4135 Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Mon, 3 Jan 2022 02:22:02 +0100 Subject: [PATCH 4/4] remove as_fixed_size_array which is only useful for decimal --- src/array/fixed_size_binary/mutable.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index a1166492a02..d7b28883c85 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -162,15 +162,6 @@ impl MutableFixedSizeBinaryArray { validity.shrink_to_fit() } } - - /// Creates an owned [`FixedSizeBinaryArray`] from current data - pub fn as_fixed_size_array(&mut self) -> FixedSizeBinaryArray { - FixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(self.size), - std::mem::take(&mut self.values).into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - } } /// Accessors @@ -196,11 +187,19 @@ impl MutableArray for MutableFixedSizeBinaryArray { } fn as_box(&mut self) -> Box { - Box::new(self.as_fixed_size_array()) + Box::new(FixedSizeBinaryArray::from_data( + DataType::FixedSizeBinary(self.size), + std::mem::take(&mut self.values).into(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) } fn as_arc(&mut self) -> Arc { - Arc::new(self.as_fixed_size_array()) + Arc::new(FixedSizeBinaryArray::from_data( + DataType::FixedSizeBinary(self.size), + std::mem::take(&mut self.values).into(), + std::mem::take(&mut self.validity).map(|x| x.into()), + )) } fn data_type(&self) -> &DataType {