Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add isnan and iszero #7274

Merged
merged 6 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datafusion/core/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ async fn test_mathematical_expressions_with_null() -> Result<()> {
test_expression!("nanvl(NULL, NULL)", "NULL");
test_expression!("nanvl(1, NULL)", "NULL");
test_expression!("nanvl(NULL, 1)", "NULL");
test_expression!("isnan(NULL)", "NULL");
test_expression!("iszero(NULL)", "NULL");
Ok(())
}

Expand Down
14 changes: 13 additions & 1 deletion datafusion/core/tests/sqllogictests/test_files/math.slt
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,16 @@ SELECT atan2(2.0, 1.0), atan2(-2.0, 1.0), atan2(2.0, -1.0), atan2(-2.0, -1.0), a
query RRR
SELECT nanvl(asin(10), 1.0), nanvl(1.0, 2.0), nanvl(asin(10), asin(10))
----
1 1 NaN
1 1 NaN

# isnan
query BBBB
SELECT isnan(1.0), isnan('NaN'::DOUBLE), isnan(-'NaN'::DOUBLE), isnan(NULL)
----
false true true NULL

# iszero
query BBBB
SELECT iszero(1.0), iszero(0.0), iszero(-0.0), iszero(NULL)
----
false true true NULL
46 changes: 46 additions & 0 deletions datafusion/core/tests/sqllogictests/test_files/scalar.slt
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,52 @@ select round(nanvl(asin(f + a), 2), 5), round(nanvl(asin(b + c), 3), 5), round(n
2 -1.11977 4
NULL NULL NULL

## isnan

# isnan scalar function
query BBB
select isnan(10.0), isnan('NaN'::DOUBLE), isnan(-'NaN'::DOUBLE)
----
false true true

# isnan scalar nulls
query B
select isnan(NULL)
----
NULL

# isnan with columns
query BBBB
select isnan(asin(a + b + c)), isnan(-asin(a + b + c)), isnan(asin(d + e + f)), isnan(-asin(d + e + f)) from small_floats;
----
true true false false
false false true true
true true false false
NULL NULL NULL NULL

## iszero

# iszero scalar function
query BBB
select iszero(10.0), iszero(0.0), iszero(-0.0)
----
false true true

# iszero scalar nulls
query B
select iszero(NULL)
----
NULL

# iszero with columns
query BBBB
select iszero(floor(a + b + c)), iszero(-floor(a + b + c)), iszero(floor(d + e + f)), iszero(-floor(d + e + f)) from small_floats;
----
false false false false
true true false false
false false true true
NULL NULL NULL NULL

## pi

# pi scalar function
Expand Down
14 changes: 13 additions & 1 deletion datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ pub enum BuiltinScalarFunction {
Gcd,
/// lcm, Least common multiple
Lcm,
/// isnan
Isnan,
/// iszero
Iszero,
/// ln, Natural logarithm
Ln,
/// log, same as log10
Expand Down Expand Up @@ -333,6 +337,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Factorial => Volatility::Immutable,
BuiltinScalarFunction::Floor => Volatility::Immutable,
BuiltinScalarFunction::Gcd => Volatility::Immutable,
BuiltinScalarFunction::Isnan => Volatility::Immutable,
BuiltinScalarFunction::Iszero => Volatility::Immutable,
BuiltinScalarFunction::Lcm => Volatility::Immutable,
BuiltinScalarFunction::Ln => Volatility::Immutable,
BuiltinScalarFunction::Log => Volatility::Immutable,
Expand Down Expand Up @@ -789,6 +795,8 @@ impl BuiltinScalarFunction {
_ => Ok(Float64),
},

BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => Ok(Boolean),

BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),

BuiltinScalarFunction::Abs
Expand Down Expand Up @@ -1186,7 +1194,9 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::Sqrt
| BuiltinScalarFunction::Tan
| BuiltinScalarFunction::Tanh
| BuiltinScalarFunction::Cot => {
| BuiltinScalarFunction::Cot
| BuiltinScalarFunction::Isnan
| BuiltinScalarFunction::Iszero => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would consider Isnan and Iszero not math expressions as listed here. The reason to give f64 high priority is also not applied too for these two expressions, I think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on above reason, maybe it is less confused to have them in a separate pattern.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the late reply.
Yes, f64 doesn't need high priority.
I've changed it.

// math expressions expect 1 argument of type f64 or f32
// priority is given to f64 because e.g. `sqrt(1i32)` is in IR (real numbers) and thus we
// return the best approximation for it (in f64).
Expand Down Expand Up @@ -1223,6 +1233,8 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
BuiltinScalarFunction::Factorial => &["factorial"],
BuiltinScalarFunction::Floor => &["floor"],
BuiltinScalarFunction::Gcd => &["gcd"],
BuiltinScalarFunction::Isnan => &["isnan"],
BuiltinScalarFunction::Iszero => &["iszero"],
BuiltinScalarFunction::Lcm => &["lcm"],
BuiltinScalarFunction::Ln => &["ln"],
BuiltinScalarFunction::Log => &["log"],
Expand Down
14 changes: 14 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,18 @@ scalar_expr!(CurrentDate, current_date, ,"returns current UTC date as a [`DataTy
scalar_expr!(Now, now, ,"returns current timestamp in nanoseconds, using the same value for all instances of now() in same statement");
scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a [`DataType::Time64`] value");
scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y");
scalar_expr!(
Isnan,
isnan,
num,
"returns true if a given number is +NaN or -NaN otherwise returns false"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i didn't know there are +NaN and -NaN. i thought there's only one type of NaN and (they) are unordered. unlike +0 and -0 which are distinct.

Copy link
Contributor

@tustvold tustvold Aug 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any value with an exponent field of all 1s is a Nan, and so there are 2^N distinct values of NaN, where N is the number of mantissa bits. The same is true of -NaN

Within arrow-rs we follow the IEEE 754 total order predicate which establishes an ordering for NaNs, (and infinity, etc...)

);
scalar_expr!(
Iszero,
iszero,
num,
"returns true if a given number is +0.0 or -0.0 otherwise returns false"
);

scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");

Expand Down Expand Up @@ -1003,6 +1015,8 @@ mod test {
test_unary_scalar_expr!(Ln, ln);
test_scalar_expr!(Atan2, atan2, y, x);
test_scalar_expr!(Nanvl, nanvl, x, y);
test_scalar_expr!(Isnan, isnan, input);
test_scalar_expr!(Iszero, iszero, input);

test_scalar_expr!(Ascii, ascii, input);
test_scalar_expr!(BitLength, bit_length, string);
Expand Down
6 changes: 6 additions & 0 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,12 @@ pub fn create_physical_fun(
BuiltinScalarFunction::Gcd => {
Arc::new(|args| make_scalar_function(math_expressions::gcd)(args))
}
BuiltinScalarFunction::Isnan => {
Arc::new(|args| make_scalar_function(math_expressions::isnan)(args))
}
BuiltinScalarFunction::Iszero => {
Arc::new(|args| make_scalar_function(math_expressions::iszero)(args))
}
BuiltinScalarFunction::Lcm => {
Arc::new(|args| make_scalar_function(math_expressions::lcm)(args))
}
Expand Down
141 changes: 139 additions & 2 deletions datafusion/physical-expr/src/math_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Math expressions

use arrow::array::ArrayRef;
use arrow::array::{Float32Array, Float64Array, Int64Array};
use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array};
use arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_common::ScalarValue::{Float32, Int64};
Expand Down Expand Up @@ -142,6 +142,19 @@ macro_rules! make_function_inputs2 {
}};
}

macro_rules! make_function_scalar_inputs_return_type {
($ARG: expr, $NAME:expr, $ARGS_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{
let arg = downcast_arg!($ARG, $NAME, $ARGS_TYPE);

arg.iter()
.map(|a| match a {
Some(a) => Some($FUNC(a)),
_ => None,
})
.collect::<$RETURN_TYPE>()
}};
}

math_unary_function!("sqrt", sqrt);
math_unary_function!("cbrt", cbrt);
math_unary_function!("sin", sin);
Expand Down Expand Up @@ -306,6 +319,56 @@ pub fn nanvl(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}

/// Isnan SQL function
pub fn isnan(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ f64::is_nan }
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ f32::is_nan }
)) as ArrayRef),

other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function isnan"
))),
}
}

/// Iszero SQL function
pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ |x: f64| { x == 0_f64 } }
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ |x: f32| { x == 0_f32 } }
)) as ArrayRef),

other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function iszero"
))),
}
}

/// Pi SQL function
pub fn pi(args: &[ColumnarValue]) -> Result<ColumnarValue> {
if !matches!(&args[0], ColumnarValue::Array(_)) {
Expand Down Expand Up @@ -650,7 +713,9 @@ mod tests {

use super::*;
use arrow::array::{Float64Array, NullArray};
use datafusion_common::cast::{as_float32_array, as_float64_array, as_int64_array};
use datafusion_common::cast::{
as_boolean_array, as_float32_array, as_float64_array, as_int64_array,
};

#[test]
fn test_random_expression() {
Expand Down Expand Up @@ -1041,4 +1106,76 @@ mod tests {
assert_eq!(floats.value(2), 3.0);
assert!(floats.value(3).is_nan());
}

#[test]
fn test_isnan_f64() {
let args: Vec<ArrayRef> = vec![Arc::new(Float64Array::from(vec![
1.0,
f64::NAN,
3.0,
-f64::NAN,
]))];

let result = isnan(&args).expect("failed to initialize function isnan");
let booleans =
as_boolean_array(&result).expect("failed to initialize function isnan");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_isnan_f32() {
let args: Vec<ArrayRef> = vec![Arc::new(Float32Array::from(vec![
1.0,
f32::NAN,
3.0,
f32::NAN,
]))];

let result = isnan(&args).expect("failed to initialize function isnan");
let booleans =
as_boolean_array(&result).expect("failed to initialize function isnan");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f64() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f32() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}
}
2 changes: 2 additions & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,8 @@ enum ScalarFunction {
ArrayReplaceAll = 110;
Nanvl = 111;
Flatten = 112;
Isnan = 113;
Iszero = 114;
}

message ScalarFunctionNode {
Expand Down
6 changes: 6 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading