Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add missing scalar math functions #465

Merged
merged 1 commit into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion datafusion/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import math

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -85,12 +86,15 @@ def test_math_functions():
ctx = SessionContext()
# create a RecordBatch and a new DataFrame from it
batch = pa.RecordBatch.from_arrays(
[pa.array([0.1, -0.7, 0.55])], names=["value"]
[pa.array([0.1, -0.7, 0.55]), pa.array([float("nan"), 0, 2.0])],
names=["value", "na_value"],
)
df = ctx.create_dataframe([[batch]])

values = np.array([0.1, -0.7, 0.55])
na_values = np.array([np.nan, 0, 2.0])
col_v = column("value")
col_nav = column("na_value")
df = df.select(
f.abs(col_v),
f.sin(col_v),
Expand All @@ -113,6 +117,20 @@ def test_math_functions():
f.sqrt(col_v),
f.signum(col_v),
f.trunc(col_v),
f.asinh(col_v),
f.acosh(col_v),
f.atanh(col_v),
f.cbrt(col_v),
f.cosh(col_v),
f.degrees(col_v),
f.gcd(literal(9), literal(3)),
f.lcm(literal(6), literal(4)),
f.nanvl(col_nav, literal(5)),
f.pi(),
f.radians(col_v),
f.sinh(col_v),
f.tanh(col_v),
f.factorial(literal(6)),
)
batches = df.collect()
assert len(batches) == 1
Expand Down Expand Up @@ -151,6 +169,22 @@ def test_math_functions():
np.testing.assert_array_almost_equal(result.column(18), np.sqrt(values))
np.testing.assert_array_almost_equal(result.column(19), np.sign(values))
np.testing.assert_array_almost_equal(result.column(20), np.trunc(values))
np.testing.assert_array_almost_equal(result.column(21), np.arcsinh(values))
np.testing.assert_array_almost_equal(result.column(22), np.arccosh(values))
np.testing.assert_array_almost_equal(result.column(23), np.arctanh(values))
np.testing.assert_array_almost_equal(result.column(24), np.cbrt(values))
np.testing.assert_array_almost_equal(result.column(25), np.cosh(values))
np.testing.assert_array_almost_equal(result.column(26), np.degrees(values))
np.testing.assert_array_almost_equal(result.column(27), np.gcd(9, 3))
np.testing.assert_array_almost_equal(result.column(28), np.lcm(6, 4))
np.testing.assert_array_almost_equal(
result.column(29), np.where(np.isnan(na_values), 5, na_values)
)
np.testing.assert_array_almost_equal(result.column(30), np.pi)
np.testing.assert_array_almost_equal(result.column(31), np.radians(values))
np.testing.assert_array_almost_equal(result.column(32), np.sinh(values))
np.testing.assert_array_almost_equal(result.column(33), np.tanh(values))
np.testing.assert_array_almost_equal(result.column(34), math.factorial(6))


def test_string_functions(df):
Expand Down
32 changes: 32 additions & 0 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,20 @@ macro_rules! aggregate_function {

scalar_function!(abs, Abs);
scalar_function!(acos, Acos);
scalar_function!(acosh, Acosh);
scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character.");
scalar_function!(asin, Asin);
scalar_function!(asinh, Asinh);
scalar_function!(atan, Atan);
scalar_function!(atanh, Atanh);
scalar_function!(atan2, Atan2);
scalar_function!(
bit_length,
BitLength,
"Returns number of bits in the string (8 times the octet_length)."
);
scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string.");
scalar_function!(cbrt, Cbrt);
scalar_function!(ceil, Ceil);
scalar_function!(
character_length,
Expand All @@ -219,9 +223,14 @@ scalar_function!(char_length, CharacterLength);
scalar_function!(chr, Chr, "Returns the character with the given code.");
scalar_function!(coalesce, Coalesce);
scalar_function!(cos, Cos);
scalar_function!(cosh, Cosh);
scalar_function!(degrees, Degrees);
scalar_function!(exp, Exp);
scalar_function!(factorial, Factorial);
scalar_function!(floor, Floor);
scalar_function!(gcd, Gcd);
scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.");
scalar_function!(lcm, Lcm);
scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters.");
scalar_function!(ln, Ln);
scalar_function!(log, Log);
Expand All @@ -235,9 +244,16 @@ scalar_function!(
MD5,
"Computes the MD5 hash of the argument, with the result written in hexadecimal."
);
scalar_function!(
nanvl,
Nanvl,
"Computes the MD5 hash of the argument, with the result written in hexadecimal."
);
scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces.");
scalar_function!(pi, Pi);
scalar_function!(power, Power);
scalar_function!(pow, Power);
scalar_function!(radians, Radians);
scalar_function!(regexp_match, RegexpMatch);
scalar_function!(
regexp_replace,
Expand Down Expand Up @@ -269,6 +285,7 @@ scalar_function!(sha384, SHA384);
scalar_function!(sha512, SHA512);
scalar_function!(signum, Signum);
scalar_function!(sin, Sin);
scalar_function!(sinh, Sinh);
scalar_function!(
split_part,
SplitPart,
Expand All @@ -283,6 +300,7 @@ scalar_function!(
scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)");
scalar_function!(substr, Substr);
scalar_function!(tan, Tan);
scalar_function!(tanh, Tanh);
scalar_function!(
to_hex,
ToHex,
Expand Down Expand Up @@ -343,6 +361,7 @@ aggregate_function!(var_samp, Variance);
pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(abs))?;
m.add_wrapped(wrap_pyfunction!(acos))?;
m.add_wrapped(wrap_pyfunction!(acosh))?;
m.add_wrapped(wrap_pyfunction!(approx_distinct))?;
m.add_wrapped(wrap_pyfunction!(alias))?;
m.add_wrapped(wrap_pyfunction!(approx_median))?;
Expand All @@ -353,11 +372,14 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(arrow_typeof))?;
m.add_wrapped(wrap_pyfunction!(ascii))?;
m.add_wrapped(wrap_pyfunction!(asin))?;
m.add_wrapped(wrap_pyfunction!(asinh))?;
m.add_wrapped(wrap_pyfunction!(atan))?;
m.add_wrapped(wrap_pyfunction!(atanh))?;
m.add_wrapped(wrap_pyfunction!(atan2))?;
m.add_wrapped(wrap_pyfunction!(avg))?;
m.add_wrapped(wrap_pyfunction!(bit_length))?;
m.add_wrapped(wrap_pyfunction!(btrim))?;
m.add_wrapped(wrap_pyfunction!(cbrt))?;
m.add_wrapped(wrap_pyfunction!(ceil))?;
m.add_wrapped(wrap_pyfunction!(character_length))?;
m.add_wrapped(wrap_pyfunction!(chr))?;
Expand All @@ -369,25 +391,30 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(concat))?;
m.add_wrapped(wrap_pyfunction!(corr))?;
m.add_wrapped(wrap_pyfunction!(cos))?;
m.add_wrapped(wrap_pyfunction!(cosh))?;
m.add_wrapped(wrap_pyfunction!(count))?;
m.add_wrapped(wrap_pyfunction!(count_star))?;
m.add_wrapped(wrap_pyfunction!(covar))?;
m.add_wrapped(wrap_pyfunction!(covar_pop))?;
m.add_wrapped(wrap_pyfunction!(covar_samp))?;
m.add_wrapped(wrap_pyfunction!(current_date))?;
m.add_wrapped(wrap_pyfunction!(current_time))?;
m.add_wrapped(wrap_pyfunction!(degrees))?;
m.add_wrapped(wrap_pyfunction!(date_bin))?;
m.add_wrapped(wrap_pyfunction!(datepart))?;
m.add_wrapped(wrap_pyfunction!(date_part))?;
m.add_wrapped(wrap_pyfunction!(datetrunc))?;
m.add_wrapped(wrap_pyfunction!(date_trunc))?;
m.add_wrapped(wrap_pyfunction!(digest))?;
m.add_wrapped(wrap_pyfunction!(exp))?;
m.add_wrapped(wrap_pyfunction!(factorial))?;
m.add_wrapped(wrap_pyfunction!(floor))?;
m.add_wrapped(wrap_pyfunction!(from_unixtime))?;
m.add_wrapped(wrap_pyfunction!(gcd))?;
m.add_wrapped(wrap_pyfunction!(grouping))?;
m.add_wrapped(wrap_pyfunction!(in_list))?;
m.add_wrapped(wrap_pyfunction!(initcap))?;
m.add_wrapped(wrap_pyfunction!(lcm))?;
m.add_wrapped(wrap_pyfunction!(left))?;
m.add_wrapped(wrap_pyfunction!(length))?;
m.add_wrapped(wrap_pyfunction!(ln))?;
Expand All @@ -403,12 +430,15 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(mean))?;
m.add_wrapped(wrap_pyfunction!(median))?;
m.add_wrapped(wrap_pyfunction!(min))?;
m.add_wrapped(wrap_pyfunction!(nanvl))?;
m.add_wrapped(wrap_pyfunction!(now))?;
m.add_wrapped(wrap_pyfunction!(nullif))?;
m.add_wrapped(wrap_pyfunction!(octet_length))?;
m.add_wrapped(wrap_pyfunction!(order_by))?;
m.add_wrapped(wrap_pyfunction!(pi))?;
m.add_wrapped(wrap_pyfunction!(power))?;
m.add_wrapped(wrap_pyfunction!(pow))?;
m.add_wrapped(wrap_pyfunction!(radians))?;
m.add_wrapped(wrap_pyfunction!(random))?;
m.add_wrapped(wrap_pyfunction!(regexp_match))?;
m.add_wrapped(wrap_pyfunction!(regexp_replace))?;
Expand All @@ -425,6 +455,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(sha512))?;
m.add_wrapped(wrap_pyfunction!(signum))?;
m.add_wrapped(wrap_pyfunction!(sin))?;
m.add_wrapped(wrap_pyfunction!(sinh))?;
m.add_wrapped(wrap_pyfunction!(split_part))?;
m.add_wrapped(wrap_pyfunction!(sqrt))?;
m.add_wrapped(wrap_pyfunction!(starts_with))?;
Expand All @@ -436,6 +467,7 @@ pub(crate) fn init_module(m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(substr))?;
m.add_wrapped(wrap_pyfunction!(sum))?;
m.add_wrapped(wrap_pyfunction!(tan))?;
m.add_wrapped(wrap_pyfunction!(tanh))?;
m.add_wrapped(wrap_pyfunction!(to_hex))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
Expand Down