Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add isnan and iszero #7274

Merged
merged 6 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datafusion/core/tests/sql/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ async fn test_mathematical_expressions_with_null() -> Result<()> {
test_expression!("nanvl(NULL, NULL)", "NULL");
test_expression!("nanvl(1, NULL)", "NULL");
test_expression!("nanvl(NULL, 1)", "NULL");
test_expression!("isnan(NULL)", "NULL");
test_expression!("iszero(NULL)", "NULL");
Ok(())
}

Expand Down
16 changes: 16 additions & 0 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ pub enum BuiltinScalarFunction {
Gcd,
/// lcm, Least common multiple
Lcm,
/// isnan
Isnan,
/// iszero
Iszero,
/// ln, Natural logarithm
Ln,
/// log, same as log10
Expand Down Expand Up @@ -334,6 +338,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Factorial => Volatility::Immutable,
BuiltinScalarFunction::Floor => Volatility::Immutable,
BuiltinScalarFunction::Gcd => Volatility::Immutable,
BuiltinScalarFunction::Isnan => Volatility::Immutable,
BuiltinScalarFunction::Iszero => Volatility::Immutable,
BuiltinScalarFunction::Lcm => Volatility::Immutable,
BuiltinScalarFunction::Ln => Volatility::Immutable,
BuiltinScalarFunction::Log => Volatility::Immutable,
Expand Down Expand Up @@ -774,6 +780,8 @@ impl BuiltinScalarFunction {
_ => Ok(Float64),
},

BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => Ok(Boolean),

BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),

BuiltinScalarFunction::Abs
Expand Down Expand Up @@ -1184,6 +1192,12 @@ impl BuiltinScalarFunction {
| BuiltinScalarFunction::CurrentTime => {
Signature::uniform(0, vec![], self.volatility())
}
BuiltinScalarFunction::Isnan | BuiltinScalarFunction::Iszero => {
Signature::one_of(
vec![Exact(vec![Float32]), Exact(vec![Float64])],
self.volatility(),
)
}
}
}
}
Expand All @@ -1208,6 +1222,8 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
BuiltinScalarFunction::Factorial => &["factorial"],
BuiltinScalarFunction::Floor => &["floor"],
BuiltinScalarFunction::Gcd => &["gcd"],
BuiltinScalarFunction::Isnan => &["isnan"],
BuiltinScalarFunction::Iszero => &["iszero"],
BuiltinScalarFunction::Lcm => &["lcm"],
BuiltinScalarFunction::Ln => &["ln"],
BuiltinScalarFunction::Log => &["log"],
Expand Down
14 changes: 14 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,18 @@ scalar_expr!(CurrentDate, current_date, ,"returns current UTC date as a [`DataTy
scalar_expr!(Now, now, ,"returns current timestamp in nanoseconds, using the same value for all instances of now() in same statement");
scalar_expr!(CurrentTime, current_time, , "returns current UTC time as a [`DataType::Time64`] value");
scalar_expr!(Nanvl, nanvl, x y, "returns x if x is not NaN otherwise returns y");
scalar_expr!(
Isnan,
isnan,
num,
"returns true if a given number is +NaN or -NaN otherwise returns false"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i didn't know there are +NaN and -NaN. i thought there's only one type of NaN and (they) are unordered. unlike +0 and -0 which are distinct.

Copy link
Contributor

@tustvold tustvold Aug 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any value with an exponent field of all 1s is a Nan, and so there are 2^N distinct values of NaN, where N is the number of mantissa bits. The same is true of -NaN

Within arrow-rs we follow the IEEE 754 total order predicate which establishes an ordering for NaNs, (and infinity, etc...)

);
scalar_expr!(
Iszero,
iszero,
num,
"returns true if a given number is +0.0 or -0.0 otherwise returns false"
);

scalar_expr!(ArrowTypeof, arrow_typeof, val, "data type");

Expand Down Expand Up @@ -1003,6 +1015,8 @@ mod test {
test_unary_scalar_expr!(Ln, ln);
test_scalar_expr!(Atan2, atan2, y, x);
test_scalar_expr!(Nanvl, nanvl, x, y);
test_scalar_expr!(Isnan, isnan, input);
test_scalar_expr!(Iszero, iszero, input);

test_scalar_expr!(Ascii, ascii, input);
test_scalar_expr!(BitLength, bit_length, string);
Expand Down
6 changes: 6 additions & 0 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,12 @@ pub fn create_physical_fun(
BuiltinScalarFunction::Gcd => {
Arc::new(|args| make_scalar_function(math_expressions::gcd)(args))
}
BuiltinScalarFunction::Isnan => {
Arc::new(|args| make_scalar_function(math_expressions::isnan)(args))
}
BuiltinScalarFunction::Iszero => {
Arc::new(|args| make_scalar_function(math_expressions::iszero)(args))
}
BuiltinScalarFunction::Lcm => {
Arc::new(|args| make_scalar_function(math_expressions::lcm)(args))
}
Expand Down
141 changes: 139 additions & 2 deletions datafusion/physical-expr/src/math_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Math expressions

use arrow::array::ArrayRef;
use arrow::array::{Float32Array, Float64Array, Int64Array};
use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array};
use arrow::datatypes::DataType;
use datafusion_common::ScalarValue;
use datafusion_common::ScalarValue::{Float32, Int64};
Expand Down Expand Up @@ -142,6 +142,19 @@ macro_rules! make_function_inputs2 {
}};
}

macro_rules! make_function_scalar_inputs_return_type {
($ARG: expr, $NAME:expr, $ARGS_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{
let arg = downcast_arg!($ARG, $NAME, $ARGS_TYPE);

arg.iter()
.map(|a| match a {
Some(a) => Some($FUNC(a)),
_ => None,
})
.collect::<$RETURN_TYPE>()
}};
}

math_unary_function!("sqrt", sqrt);
math_unary_function!("cbrt", cbrt);
math_unary_function!("sin", sin);
Expand Down Expand Up @@ -306,6 +319,56 @@ pub fn nanvl(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}

/// Isnan SQL function
pub fn isnan(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ f64::is_nan }
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ f32::is_nan }
)) as ArrayRef),

other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function isnan"
))),
}
}

/// Iszero SQL function
pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
DataType::Float64 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float64Array,
BooleanArray,
{ |x: f64| { x == 0_f64 } }
)) as ArrayRef),

DataType::Float32 => Ok(Arc::new(make_function_scalar_inputs_return_type!(
&args[0],
"x",
Float32Array,
BooleanArray,
{ |x: f32| { x == 0_f32 } }
)) as ArrayRef),

other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function iszero"
))),
}
}

/// Pi SQL function
pub fn pi(args: &[ColumnarValue]) -> Result<ColumnarValue> {
if !matches!(&args[0], ColumnarValue::Array(_)) {
Expand Down Expand Up @@ -650,7 +713,9 @@ mod tests {

use super::*;
use arrow::array::{Float64Array, NullArray};
use datafusion_common::cast::{as_float32_array, as_float64_array, as_int64_array};
use datafusion_common::cast::{
as_boolean_array, as_float32_array, as_float64_array, as_int64_array,
};

#[test]
fn test_random_expression() {
Expand Down Expand Up @@ -1041,4 +1106,76 @@ mod tests {
assert_eq!(floats.value(2), 3.0);
assert!(floats.value(3).is_nan());
}

#[test]
fn test_isnan_f64() {
let args: Vec<ArrayRef> = vec![Arc::new(Float64Array::from(vec![
1.0,
f64::NAN,
3.0,
-f64::NAN,
]))];

let result = isnan(&args).expect("failed to initialize function isnan");
let booleans =
as_boolean_array(&result).expect("failed to initialize function isnan");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_isnan_f32() {
let args: Vec<ArrayRef> = vec![Arc::new(Float32Array::from(vec![
1.0,
f32::NAN,
3.0,
f32::NAN,
]))];

let result = isnan(&args).expect("failed to initialize function isnan");
let booleans =
as_boolean_array(&result).expect("failed to initialize function isnan");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f64() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f32() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}
}
2 changes: 2 additions & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,8 @@ enum ScalarFunction {
ArrayReplaceAll = 110;
Nanvl = 111;
Flatten = 112;
Isnan = 113;
Iszero = 114;
}

message ScalarFunctionNode {
Expand Down
6 changes: 6 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use datafusion_expr::{
concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, date_part,
date_trunc, degrees, digest, exp,
expr::{self, InList, Sort, WindowFunction},
factorial, floor, from_unixtime, gcd, lcm, left, ln, log, log10, log2,
factorial, floor, from_unixtime, gcd, isnan, iszero, lcm, left, ln, log, log10, log2,
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, pi, power, radians,
random, regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad,
Expand Down Expand Up @@ -525,6 +525,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::FromUnixtime => Self::FromUnixtime,
ScalarFunction::Atan2 => Self::Atan2,
ScalarFunction::Nanvl => Self::Nanvl,
ScalarFunction::Isnan => Self::Isnan,
ScalarFunction::Iszero => Self::Iszero,
ScalarFunction::ArrowTypeof => Self::ArrowTypeof,
}
}
Expand Down Expand Up @@ -1577,6 +1579,8 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
ScalarFunction::Isnan => Ok(isnan(parse_expr(&args[0], registry)?)),
ScalarFunction::Iszero => Ok(iszero(parse_expr(&args[0], registry)?)),
_ => Err(proto_error(
"Protobuf deserialization error: Unsupported scalar function",
)),
Expand Down
2 changes: 2 additions & 0 deletions datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1524,6 +1524,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::FromUnixtime => Self::FromUnixtime,
BuiltinScalarFunction::Atan2 => Self::Atan2,
BuiltinScalarFunction::Nanvl => Self::Nanvl,
BuiltinScalarFunction::Isnan => Self::Isnan,
BuiltinScalarFunction::Iszero => Self::Iszero,
BuiltinScalarFunction::ArrowTypeof => Self::ArrowTypeof,
};

Expand Down
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/math.slt
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,15 @@ query RRR
SELECT nanvl(asin(10), 1.0), nanvl(1.0, 2.0), nanvl(asin(10), asin(10))
----
1 1 NaN

# isnan
query BBBB
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

SELECT isnan(1.0), isnan('NaN'::DOUBLE), isnan(-'NaN'::DOUBLE), isnan(NULL)
----
false true true NULL

# iszero
query BBBB
SELECT iszero(1.0), iszero(0.0), iszero(-0.0), iszero(NULL)
----
false true true NULL
Loading