diff --git a/datafusion/functions/src/math/gcd.rs b/datafusion/functions/src/math/gcd.rs index ace4937d6d6f..95a559c5d103 100644 --- a/datafusion/functions/src/math/gcd.rs +++ b/datafusion/functions/src/math/gcd.rs @@ -93,7 +93,7 @@ fn gcd(args: &[ArrayRef]) -> Result { } /// Computes gcd of two unsigned integers using Binary GCD algorithm. -fn unsigned_gcd(mut a: u64, mut b: u64) -> u64 { +pub(super) fn unsigned_gcd(mut a: u64, mut b: u64) -> u64 { if a == 0 { return b; } diff --git a/datafusion/functions/src/math/lcm.rs b/datafusion/functions/src/math/lcm.rs index d58bc5ef0bf5..21c201657e90 100644 --- a/datafusion/functions/src/math/lcm.rs +++ b/datafusion/functions/src/math/lcm.rs @@ -27,7 +27,7 @@ use datafusion_common::{arrow_datafusion_err, exec_err, DataFusionError, Result} use datafusion_expr::ColumnarValue; use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; -use crate::math::gcd::compute_gcd; +use super::gcd::unsigned_gcd; use crate::utils::make_scalar_function; #[derive(Debug)] @@ -75,19 +75,23 @@ impl ScalarUDFImpl for LcmFunc { /// Lcm SQL function fn lcm(args: &[ArrayRef]) -> Result { let compute_lcm = |x: i64, y: i64| { - let a = x.wrapping_abs(); - let b = y.wrapping_abs(); - - if a == 0 || b == 0 { + if x == 0 || y == 0 { return Ok(0); } - match compute_gcd(a, b) { - Ok(gcd) => Ok(a / gcd * b), - // change the error string to use LCM instead of GCD - Err(_) => Err(arrow_datafusion_err!(ArrowError::ComputeError(format!( - "Signed integer overflow in LCM({x}, {y})" - )))), - } + + // lcm(x, y) = |x| * |y| / gcd(|x|, |y|) + let a = x.unsigned_abs(); + let b = y.unsigned_abs(); + let gcd = unsigned_gcd(a, b); + // gcd is not zero since both a and b are not zero, so the division is safe. + (a / gcd) + .checked_mul(b) + .and_then(|v| i64::try_from(v).ok()) + .ok_or_else(|| { + arrow_datafusion_err!(ArrowError::ComputeError(format!( + "Signed integer overflow in LCM({x}, {y})" + ))) + }) }; match args[0].data_type() { diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt index 19de6560c26f..ec1b0cfd93f8 100644 --- a/datafusion/sqllogictest/test_files/math.slt +++ b/datafusion/sqllogictest/test_files/math.slt @@ -565,6 +565,21 @@ SELECT c1%0 FROM test_non_nullable_decimal statement ok drop table test_non_nullable_decimal +statement ok +CREATE TABLE signed_integers( + a INT, + b INT, + c INT, + d INT, + e INT, + f INT +) as VALUES + (-1, 100, -567, 1024, -4, 10), + (2, -1000, 123, -256, 5, -11), + (-3, 10000, -978, 2048, -6, 12), + (4, NULL, NULL, -512, NULL, NULL) +; + query II select gcd(20, 1000), lcm(20, 1000); ---- @@ -587,12 +602,54 @@ select gcd(0, 0), gcd(-100, 0), gcd(0, -100); ---- 0 100 100 + +## lcm + +# Basic cases +query IIIII +select lcm(0, 0), lcm(0, 2), lcm(3, 0), lcm(2, 3), lcm(15, 10); +---- +0 0 0 6 30 + +# Test lcm with negative numbers +query IIIII +select lcm(0, -2), lcm(-3, 0), lcm(-2, 3), lcm(15, -10), lcm(-15, -10) +---- +0 0 6 30 30 + +# Test lcm with Nulls +query III +select lcm(null, 64), lcm(16, null), lcm(null, null) +---- +NULL NULL NULL + +# Test lcm with columns +query III rowsort +select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers; +---- +100 580608 20 +1000 31488 55 +30000 1001472 12 +NULL NULL NULL + +# Result cannot fit in i64 query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(\-9223372036854775808, \-9223372036854775808\) select lcm(-9223372036854775808, -9223372036854775808); +query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(1, \-9223372036854775808\) +select lcm(1, -9223372036854775808); + +# Overflow on multiplication +query error DataFusion error: Arrow error: Compute error: Signed integer overflow in LCM\(2, 9223372036854775803\) +select lcm(2, 9223372036854775803); + + query error DataFusion error: Arrow error: Compute error: Overflow happened on: 2107754225 \^ 1221660777 select power(2107754225, 1221660777); # factorial overflow query error DataFusion error: Arrow error: Compute error: Overflow happened on FACTORIAL\(350943270\) select FACTORIAL(350943270); + +statement ok +drop table signed_integers \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 551c50e0a17b..a68d1cc7a770 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -493,41 +493,6 @@ select gcd(a, b), gcd(c, d), gcd(e, f) from signed_integers; 2 1 1 NULL NULL NULL -## lcm - -# lcm scalar function -query III rowsort -select lcm(0, 0), lcm(2, 3), lcm(15, 10); ----- -0 6 30 - -# lcm scalar nulls -query I rowsort -select lcm(null, 64); ----- -NULL - -# lcm scalar nulls #1 -query I rowsort -select lcm(2, null); ----- -NULL - -# lcm scalar nulls #2 -query I rowsort -select lcm(null, null); ----- -NULL - -# lcm with columns -query III rowsort -select lcm(a, b), lcm(c, d), lcm(e, f) from signed_integers; ----- -100 580608 20 -1000 31488 55 -30000 1001472 12 -NULL NULL NULL - ## ln # ln scalar function