Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

suppport bitwise and as an example #1653

Merged
merged 5 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions datafusion/src/logical_plan/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ pub enum Operator {
RegexNotMatch,
/// Case insensitive regex not match
RegexNotIMatch,
/// Bitwise and, like `&`
BitwiseAnd,
}

impl fmt::Display for Operator {
Expand All @@ -90,6 +92,7 @@ impl fmt::Display for Operator {
Operator::RegexNotIMatch => "!~*",
Operator::IsDistinctFrom => "IS DISTINCT FROM",
Operator::IsNotDistinctFrom => "IS NOT DISTINCT FROM",
Operator::BitwiseAnd => "&",
};
write!(f, "{}", display)
}
Expand Down
20 changes: 20 additions & 0 deletions datafusion/src/physical_plan/coercion_rule/binary_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub(crate) fn coerce_types(
) -> Result<DataType> {
// This result MUST be compatible with `binary_coerce`
let result = match op {
Operator::BitwiseAnd => bitwise_coercion(lhs_type, rhs_type),
Operator::And | Operator::Or => match (lhs_type, rhs_type) {
// logical binary boolean operators can only be evaluated in bools
(DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
Expand Down Expand Up @@ -72,6 +73,25 @@ pub(crate) fn coerce_types(
}
}

fn bitwise_coercion(left_type: &DataType, right_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;

if !is_numeric(left_type) || !is_numeric(right_type) {
return None;
}
if left_type == right_type && !is_dictionary(left_type) {
return Some(left_type.clone());
}
// TODO support other data type
match (left_type, right_type) {
(Int64, _) | (_, Int64) => Some(Int64),
(Int32, _) | (_, Int32) => Some(Int32),
(Int16, _) | (_, Int16) => Some(Int16),
(Int8, _) | (_, Int8) => Some(Int8),
_ => None,
}
}

fn comparison_eq_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
// can't compare dictionaries directly due to
// https://github.com/apache/arrow-rs/issues/1201
Expand Down
134 changes: 134 additions & 0 deletions datafusion/src/physical_plan/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,103 @@ fn modulus_decimal(left: &DecimalArray, right: &DecimalArray) -> Result<DecimalA
Ok(decimal_builder.finish())
}

/// The binary_bitwise_array_op macro only evaluates for integer types
/// like int64, int32.
/// It is used to do bitwise operation.
macro_rules! binary_bitwise_array_op {
($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{
let len = $LEFT.len();
let left = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let right = $RIGHT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let result = (0..len)
.into_iter()
.map(|i| {
if left.is_null(i) || right.is_null(i) {
None
} else {
Some(left.value(i) $OP right.value(i))
}
})
.collect::<$ARRAY_TYPE>();
Ok(Arc::new(result))
}};
}

/// The binary_bitwise_array_op macro only evaluates for integer types
/// like int64, int32.
/// It is used to do bitwise operation on an array with a scalar.
macro_rules! binary_bitwise_array_scalar {
($LEFT:expr, $RIGHT:expr, $OP:tt, $ARRAY_TYPE:ident, $TYPE:ty) => {{
let len = $LEFT.len();
let array = $LEFT.as_any().downcast_ref::<$ARRAY_TYPE>().unwrap();
let scalar = $RIGHT;
if scalar.is_null() {
Ok(new_null_array(array.data_type(), len))
} else {
let right: $TYPE = scalar.try_into().unwrap();
let result = (0..len)
.into_iter()
.map(|i| {
if array.is_null(i) {
None
} else {
Some(array.value(i) $OP right)
}
})
.collect::<$ARRAY_TYPE>();
Ok(Arc::new(result) as ArrayRef)
}
}};
}

fn bitwise_and(left: ArrayRef, right: ArrayRef) -> Result<ArrayRef> {
match &left.data_type() {
DataType::Int8 => {
binary_bitwise_array_op!(left, right, &, Int8Array, i8)
}
DataType::Int16 => {
binary_bitwise_array_op!(left, right, &, Int16Array, i16)
}
DataType::Int32 => {
binary_bitwise_array_op!(left, right, &, Int32Array, i32)
}
DataType::Int64 => {
binary_bitwise_array_op!(left, right, &, Int64Array, i64)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn arrays",
other,
Operator::BitwiseAnd
))),
}
}

fn bitwise_and_scalar(
array: &dyn Array,
scalar: ScalarValue,
) -> Option<Result<ArrayRef>> {
let result = match array.data_type() {
DataType::Int8 => {
binary_bitwise_array_scalar!(array, scalar, &, Int8Array, i8)
}
DataType::Int16 => {
binary_bitwise_array_scalar!(array, scalar, &, Int16Array, i16)
}
DataType::Int32 => {
binary_bitwise_array_scalar!(array, scalar, &, Int32Array, i32)
}
DataType::Int64 => {
binary_bitwise_array_scalar!(array, scalar, &, Int64Array, i64)
}
other => Err(DataFusionError::Internal(format!(
"Data type {:?} not supported for binary operation '{}' on dyn arrays",
other,
Operator::BitwiseAnd
))),
};
Some(result)
}

/// Binary expression
#[derive(Debug)]
pub struct BinaryExpr {
Expand Down Expand Up @@ -880,6 +977,8 @@ pub fn binary_operator_data_type(
| Operator::RegexNotIMatch
| Operator::IsDistinctFrom
| Operator::IsNotDistinctFrom => Ok(DataType::Boolean),
// bitwise operations return the common coerced type
Operator::BitwiseAnd => Ok(result_type),
// math operations return the same value as the common coerced type
Operator::Plus
| Operator::Minus
Expand Down Expand Up @@ -1055,6 +1154,7 @@ impl BinaryExpr {
true,
true
),
Operator::BitwiseAnd => bitwise_and_scalar(array, scalar.clone()),
// if scalar operation is not supported - fallback to array implementation
_ => None,
};
Expand Down Expand Up @@ -1143,6 +1243,7 @@ impl BinaryExpr {
Operator::RegexNotIMatch => {
binary_string_array_flag_op!(left, right, regexp_is_match, true, true)
}
Operator::BitwiseAnd => bitwise_and(left, right),
}
}
}
Expand Down Expand Up @@ -1580,6 +1681,18 @@ mod tests {
DataType::Boolean,
vec![false, false, false, false, true]
);
test_coercion!(
Int16Array,
DataType::Int16,
vec![1i16, 2i16, 3i16],
Int64Array,
DataType::Int64,
vec![10i64, 4i64, 5i64],
Operator::BitwiseAnd,
Int64Array,
DataType::Int64,
vec![0i64, 0i64, 1i64]
);
Ok(())
}

Expand Down Expand Up @@ -2954,4 +3067,25 @@ mod tests {

Ok(())
}

#[test]
fn bitwise_array_test() -> Result<()> {
let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef;
let right =
Arc::new(Int32Array::from(vec![Some(1), Some(3), Some(7)])) as ArrayRef;
let result = bitwise_and(left, right)?;
let expected = Int32Array::from(vec![Some(0), None, Some(3)]);
assert_eq!(result.as_ref(), &expected);
Ok(())
}

#[test]
fn bitwise_scalar_test() -> Result<()> {
let left = Arc::new(Int32Array::from(vec![Some(12), None, Some(11)])) as ArrayRef;
let right = ScalarValue::from(3i32);
let result = bitwise_and_scalar(&left, right).unwrap()?;
let expected = Int32Array::from(vec![Some(0), None, Some(3)]);
assert_eq!(result.as_ref(), &expected);
Ok(())
}
}
1 change: 1 addition & 0 deletions datafusion/src/sql/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
BinaryOperator::PGRegexIMatch => Ok(Operator::RegexIMatch),
BinaryOperator::PGRegexNotMatch => Ok(Operator::RegexNotMatch),
BinaryOperator::PGRegexNotIMatch => Ok(Operator::RegexNotIMatch),
BinaryOperator::BitwiseAnd => Ok(Operator::BitwiseAnd),
_ => Err(DataFusionError::NotImplemented(format!(
"Unsupported SQL binary operator {:?}",
op
Expand Down