From e60f6a196ad88d880885c64484ae4cedb5035899 Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Tue, 14 Sep 2021 11:44:38 +0900 Subject: [PATCH 01/14] Add new built-in SQL functions: SEC and CSC --- python/pyspark/sql/functions.py | 34 ++++++++++++++ python/pyspark/sql/functions.pyi | 2 + .../catalyst/analysis/FunctionRegistry.scala | 2 + .../expressions/mathExpressions.scala | 46 +++++++++++++++++++ .../org/apache/spark/sql/functions.scala | 18 ++++++++ .../resources/sql-tests/inputs/operators.sql | 8 ++++ 6 files changed, 110 insertions(+) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index e418c0d11f8f..4bf9f1919dd6 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -394,6 +394,23 @@ def cot(col): return _invoke_function_over_column("cot", col) +def csc(col): + """ + .. versionadded:: 3.3.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + Angle in radians + + Returns + ------- + :class:`~pyspark.sql.Column` + Cosecant of the angle. + """ + return _invoke_function_over_column("csc", col) + + @since(1.4) def exp(col): """ @@ -451,6 +468,23 @@ def rint(col): return _invoke_function_over_column("rint", col) +def sec(col): + """ + .. versionadded:: 3.3.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + Angle in radians + + Returns + ------- + :class:`~pyspark.sql.Column` + Secant of the angle. + """ + return _invoke_function_over_column("sec", col) + + @since(1.4) def signum(col): """ diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index 143fa133f4fe..2717bd5d6b6a 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -299,6 +299,7 @@ def column(col: str) -> Column: ... def cos(col: ColumnOrName) -> Column: ... def cosh(col: ColumnOrName) -> Column: ... def cot(col: ColumnOrName) -> Column: ... +def csc(col: ColumnOrName) -> Column: ... def count(col: ColumnOrName) -> Column: ... def cume_dist() -> Column: ... def degrees(col: ColumnOrName) -> Column: ... @@ -337,6 +338,7 @@ def rank() -> Column: ... def rint(col: ColumnOrName) -> Column: ... def row_number() -> Column: ... def rtrim(col: ColumnOrName) -> Column: ... +def sec(col: ColumnOrName) -> Column: ... def signum(col: ColumnOrName) -> Column: ... def sin(col: ColumnOrName) -> Column: ... def sinh(col: ColumnOrName) -> Column: ... diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index c9a3a5888ade..f019f012b7db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -361,6 +361,7 @@ object FunctionRegistry { expression[Ceil]("ceil"), expression[Ceil]("ceiling", true), expression[Cos]("cos"), + expression[Sec]("sec"), expression[Cosh]("cosh"), expression[Conv]("conv"), expression[ToDegrees]("degrees"), @@ -392,6 +393,7 @@ object FunctionRegistry { expression[Signum]("sign", true), expression[Signum]("signum"), expression[Sin]("sin"), + expression[Csc]("csc"), expression[Sinh]("sinh"), expression[StringToMap]("str_to_map"), expression[Sqrt]("sqrt"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index 2466940c798c..af044c5a8431 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -299,6 +299,29 @@ case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS") { override protected def withNewChildInternal(newChild: Expression): Cos = copy(child = newChild) } +@ExpressionDescription( + usage = """ + _FUNC_(expr) - Returns the secant of `expr`, as if computed by `1/java.lang.Math.cos`. + """, + arguments = """ + Arguments: + * expr - angle in radians + """, + examples = """ + Examples: + > SELECT _FUNC_(0); + 1.0 + """, + since = "3.3.0", + group = "math_funcs") +case class Sec(child: Expression) + extends UnaryMathExpression((x: Double) => 1 / math.cos(x), "SEC") { + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + defineCodeGen(ctx, ev, c => s"${ev.value} = 1 / java.lang.Math.cos($c);") + } + override protected def withNewChildInternal(newChild: Expression): Sec = copy(child = newChild) +} + @ExpressionDescription( usage = """ _FUNC_(expr) - Returns the hyperbolic cosine of `expr`, as if computed by @@ -655,6 +678,29 @@ case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN") { override protected def withNewChildInternal(newChild: Expression): Sin = copy(child = newChild) } +@ExpressionDescription( + usage = """ + _FUNC_(expr) - Returns the cosecant of `expr`, as if computed by `1/java.lang.Math.sin`. + """, + arguments = """ + Arguments: + * expr - angle in radians + """, + examples = """ + Examples: + > SELECT _FUNC_(1); + 1.1883951057781212 + """, + since = "3.3.0", + group = "math_funcs") +case class Csc(child: Expression) + extends UnaryMathExpression((x: Double) => 1 / math.sin(x), "CSC") { + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + defineCodeGen(ctx, ev, c => s"${ev.value} = 1 / java.lang.Math.sin($c);") + } + override protected def withNewChildInternal(newChild: Expression): Csc = copy(child = newChild) +} + @ExpressionDescription( usage = """ _FUNC_(expr) - Returns hyperbolic sine of `expr`, as if computed by `java.lang.Math._FUNC_`. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 781a2dd5649e..7e916c55004a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1809,6 +1809,15 @@ object functions { */ def cot(e: Column): Column = withExpr { Cot(e.expr) } + /** + * @param e angle in radians + * @return cosecant of the angle + * + * @group math_funcs + * @since 3.3.0 + */ + def csc(e: Column): Column = withExpr { Csc(e.expr) } + /** * Computes the exponential of the given value. * @@ -2197,6 +2206,15 @@ object functions { */ def bround(e: Column, scale: Int): Column = withExpr { BRound(e.expr, Literal(scale)) } + /** + * @param e angle in radians + * @return secant of the angle + * + * @group math_funcs + * @since 3.3.0 + */ + def sec(e: Column): Column = withExpr { Sec(e.expr) } + /** * Shift the given value numBits left. If the given value is a long value, this function * will return a long value else it will return an integer value. diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index c296fa5f7e87..b5673f287e70 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -40,6 +40,14 @@ select 5 % 3; select pmod(-7, 3); -- math functions +select sec(1); +select sec(null); +select sec(0); +select sec(-1); +select csc(1); +select csc(null); +select csc(0); +select csc(-1);: select cot(1); select cot(null); select cot(0); From 48b32475d42da5145b45da04d6221938a0e48a41 Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Tue, 14 Sep 2021 13:08:01 +0900 Subject: [PATCH 02/14] Add tests to MathExpressionSuite.scala --- .../expressions/MathExpressionsSuite.scala | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index a17a8ba22bd6..678a257383df 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -187,6 +187,20 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(Sin, DoubleType) } + test("csc") { + def f: (Double) => Double = (x: Double) => 1 / math.sin(x) + testUnary(Csc, f) + checkConsistencyBetweenInterpretedAndCodegen(Csc, DoubleType) + val nullLit = Literal.create(null, NullType) + val intNullLit = Literal.create(null, IntegerType) + val intLit = Literal.create(1, IntegerType) + checkEvaluation(checkDataTypeAndCast(Csc(nullLit)), null, EmptyRow) + checkEvaluation(checkDataTypeAndCast(Csc(intNullLit)), null, EmptyRow) + checkEvaluation(checkDataTypeAndCast(Csc(intLit)), 1 / math.sin(1), EmptyRow) + checkEvaluation(checkDataTypeAndCast(Csc(-intLit)), 1 / math.sin(-1), EmptyRow) + checkEvaluation(checkDataTypeAndCast(Csc(0)), 1 / math.sin(0), EmptyRow) + } + test("asin") { testUnary(Asin, math.asin, (-10 to 10).map(_ * 0.1)) testUnary(Asin, math.asin, (11 to 20).map(_ * 0.1), expectNaN = true) @@ -215,6 +229,20 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen(Cos, DoubleType) } + test("sec") { + def f: (Double) => Double = (x: Double) => 1 / math.cos(x) + testUnary(Sec, f) + checkConsistencyBetweenInterpretedAndCodegen(Sec, DoubleType) + val nullLit = Literal.create(null, NullType) + val intNullLit = Literal.create(null, IntegerType) + val intLit = Literal.create(1, IntegerType) + checkEvaluation(checkDataTypeAndCast(Sec(nullLit)), null, EmptyRow) + checkEvaluation(checkDataTypeAndCast(Sec(intNullLit)), null, EmptyRow) + checkEvaluation(checkDataTypeAndCast(Sec(intLit)), 1 / math.cos(1), EmptyRow) + checkEvaluation(checkDataTypeAndCast(Sec(-intLit)), 1 / math.cos(-1), EmptyRow) + checkEvaluation(checkDataTypeAndCast(Sec(0)), 1 / math.cos(0), EmptyRow) + } + test("acos") { testUnary(Acos, math.acos, (-10 to 10).map(_ * 0.1)) testUnary(Acos, math.acos, (11 to 20).map(_ * 0.1), expectNaN = true) From 9ce80cbc9a82996ea25cbb03cb74e9778489046c Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Tue, 14 Sep 2021 14:13:00 +0900 Subject: [PATCH 03/14] add test results in operators.sql.out --- .../sql-tests/results/operators.sql.out | 78 ++++++++++++++++++- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 3af92bf93571..cd68a08c04ed 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 69 +-- Number of queries: 77 -- !query @@ -259,11 +259,83 @@ struct -- !query +select sec(1) +-- !query schema +struct +-- !query output +1.8508157176809255 + + +-- !query +select sec(null) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select sec(0) +-- !query schema +struct +-- !query output +1.0 + + +-- !query +select sec(-1) +-- !query schema +struct +-- !query output +1.8508157176809255 + + +-- !query +select csc(1) +-- !query schema +struct +-- !query output +1.1883951057781212 + + +-- !query +select csc(null) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select csc(0) +-- !query schema +struct +-- !query output +Infinity + + +-- !query +select csc(-1) +-- !query schema +struct +-- !query output +-1.1883951057781212 + + +-- !query +: select cot(1) -- !query schema -struct +struct<> -- !query output -0.6420926159343306 +org.apache.spark.sql.catalyst.parser.ParseException + +extraneous input ':' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) + +== SQL == +: +^^^ +select cot(1) -- !query From 068432b449a59a47d8e44f089b7d0b882509060d Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Wed, 15 Sep 2021 11:32:12 +0900 Subject: [PATCH 04/14] added test to MathFunctionsSuite and fixed operators.sql --- .../test/resources/sql-tests/inputs/operators.sql | 2 +- .../org/apache/spark/sql/MathFunctionsSuite.scala | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index b5673f287e70..33274665f55c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -47,7 +47,7 @@ select sec(-1); select csc(1); select csc(null); select csc(0); -select csc(-1);: +select csc(-1); select cot(1); select cot(null); select cot(0); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index 1e7e8c1c5e51..842b0f8c6ba8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -117,6 +117,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { testOneToOneMathFunction(sin, math.sin) } + test("csc") { + testOneToOneMathFunction(csc, + (x: Double) => 1 / math.sin) + } + test("asin") { testOneToOneMathFunction(asin, math.asin) } @@ -134,6 +139,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { testOneToOneMathFunction(cos, math.cos) } + test("sec") { + testOneToOneMathFunction(sec, + (x: Double) => 1 / math.cos) + } + test("acos") { testOneToOneMathFunction(acos, math.acos) } @@ -151,6 +161,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { testOneToOneMathFunction(tan, math.tan) } + test("cot") { + testOneToOneMathFunction(cot, + (x: Double) => 1 / math.tan) + } + test("atan") { testOneToOneMathFunction(atan, math.atan) } From abc64b6ef5f313cac281a05f95f6e663931c0a36 Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Wed, 15 Sep 2021 13:24:43 +0900 Subject: [PATCH 05/14] re-generate operators.sql.out and fix MathFunctionSuite --- .../resources/sql-tests/results/operators.sql.out | 12 ++---------- .../org/apache/spark/sql/MathFunctionsSuite.scala | 6 +++--- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index cd68a08c04ed..a2ed7d0b5472 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -323,19 +323,11 @@ struct -- !query -: select cot(1) -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -extraneous input ':' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -: -^^^ -select cot(1) +0.6420926159343306 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index 842b0f8c6ba8..3512e5c03bdd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -119,7 +119,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("csc") { testOneToOneMathFunction(csc, - (x: Double) => 1 / math.sin) + (x: Double) => (1 / math.sin(x)) ) } test("asin") { @@ -141,7 +141,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("sec") { testOneToOneMathFunction(sec, - (x: Double) => 1 / math.cos) + (x: Double) => (1 / math.cos(x)) ) } test("acos") { @@ -163,7 +163,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("cot") { testOneToOneMathFunction(cot, - (x: Double) => 1 / math.tan) + (x: Double) => (1 / math.tan(x)) ) } test("atan") { From e0e90a18b6983f5be1bcee373151b307159738dd Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Thu, 16 Sep 2021 13:49:26 +0900 Subject: [PATCH 06/14] add unit test in test_functions.py and generate golden result file --- python/pyspark/sql/tests/test_functions.py | 31 +++++++++++++++++-- .../sql-functions/sql-expression-schema.md | 4 ++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 00a2660492a0..5d3c90abf31f 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -18,12 +18,14 @@ import datetime from itertools import chain import re +import math from py4j.protocol import Py4JJavaError -from pyspark.sql import Row, Window +from pyspark.sql import Row, Window, types from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, \ lit, assert_true, sum_distinct, sumDistinct, shiftleft, shiftLeft, shiftRight, \ - shiftright, shiftrightunsigned, shiftRightUnsigned, octet_length, bit_length + shiftright, shiftrightunsigned, shiftRightUnsigned, octet_length, bit_length, \ + sec, csc, cot from pyspark.testing.sqlutils import ReusedSQLTestCase @@ -157,6 +159,31 @@ def test_inverse_trig_functions(self): for c in cols: self.assertIn(f"{alias}(a)", repr(f(c))) + def test_reciprocal_trig_functions(self): + list = [0.0, math.pi/6, math.pi/4, math.pi/3, math.pi/2, + math.pi, 3 * math.pi / 2, 2 * math.pi] + + df = self.spark.createDataFrame(list, types.DoubleType()) + + def to_reciprocal_trig(func): + return [1.0 / func(i) if func(i) != 0 else math.inf for i in list] + + def get_values(l): + return [j[0] for j in l] + + def assert_close(a, b): + c = get_values(b) + diff = [abs(v - c[k]) < 1e-6 if v != math.inf else v == c[k] + for k, v in enumerate(a)] + return sum(diff) == len(a) + + assert_close(to_reciprocal_trig(math.cos), + df.select(sec(df.value)).collect()) + assert_close(to_reciprocal_trig(math.sin), + df.select(csc(df.value)).collect()) + assert_close(to_reciprocal_trig(math.tan), + df.select(cot(df.value)).collect()) + def test_rand_functions(self): df = self.df from pyspark.sql import functions diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index c7743bc39558..fc5134764905 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 362 + - Number of queries: 364 - Number of expressions that missing example: 12 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions @@ -82,6 +82,7 @@ | org.apache.spark.sql.catalyst.expressions.CreateMap | map | SELECT map(1.0, '2', 3.0, '4') | struct> | | org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | named_struct | SELECT named_struct("a", 1, "b", 2, "c", 3) | struct> | | org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | struct | SELECT struct(1, 2, 3) | struct> | +| org.apache.spark.sql.catalyst.expressions.Csc | csc | SELECT csc(1) | struct | | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct | @@ -241,6 +242,7 @@ | org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | +| org.apache.spark.sql.catalyst.expressions.Sec | sec | SELECT sec(0) | struct | | org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct | | org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp | timestamp_seconds | SELECT timestamp_seconds(1230219000) | struct | | org.apache.spark.sql.catalyst.expressions.Sentences | sentences | SELECT sentences('Hi there! Good morning.') | struct>> | From 47f8fe238202a4d5fe8c765d11831fe44fba66fd Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Thu, 16 Sep 2021 14:15:13 +0900 Subject: [PATCH 07/14] add JIRA reference to test_functions.py --- python/pyspark/sql/tests/test_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 5d3c90abf31f..833faa50f348 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -160,6 +160,7 @@ def test_inverse_trig_functions(self): self.assertIn(f"{alias}(a)", repr(f(c))) def test_reciprocal_trig_functions(self): + # SPARK-36683: Add new built-in SQL functions: SEC and CSC list = [0.0, math.pi/6, math.pi/4, math.pi/3, math.pi/2, math.pi, 3 * math.pi / 2, 2 * math.pi] From 6454b2c5c3ccd1bfc33d89ea00875a4118510e6e Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 14:04:04 +0900 Subject: [PATCH 08/14] Moved duplicated functions from test_functions.py to sqlutils.py --- python/docs/source/reference/pyspark.sql.rst | 2 + python/pyspark/sql/functions.pyi | 2 +- python/pyspark/sql/tests/test_functions.py | 53 +++++++------------- python/pyspark/testing/sqlutils.py | 7 +++ 4 files changed, 28 insertions(+), 36 deletions(-) diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index 326b83b7627b..f5a8357e0df2 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -394,6 +394,7 @@ Functions covar_samp crc32 create_map + csc cume_dist current_date current_timestamp @@ -511,6 +512,7 @@ Functions rtrim schema_of_csv schema_of_json + sec second sentences sequence diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi index b1e6d70c4db8..a147210f3474 100644 --- a/python/pyspark/sql/functions.pyi +++ b/python/pyspark/sql/functions.pyi @@ -301,8 +301,8 @@ def column(col: str) -> Column: ... def cos(col: ColumnOrName) -> Column: ... def cosh(col: ColumnOrName) -> Column: ... def cot(col: ColumnOrName) -> Column: ... -def csc(col: ColumnOrName) -> Column: ... def count(col: ColumnOrName) -> Column: ... +def csc(col: ColumnOrName) -> Column: ... def cume_dist() -> Column: ... def degrees(col: ColumnOrName) -> Column: ... def dense_rank() -> Column: ... diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 833faa50f348..06a5276214cf 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -113,35 +113,27 @@ def test_math_functions(self): from pyspark.sql import functions import math - def get_values(l): - return [j[0] for j in l] - - def assert_close(a, b): - c = get_values(b) - diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)] - return sum(diff) == len(a) - - assert_close([math.cos(i) for i in range(10)], + self.assert_close([math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()) - assert_close([math.cos(i) for i in range(10)], + self.assert_close([math.cos(i) for i in range(10)], df.select(functions.cos("a")).collect()) - assert_close([math.sin(i) for i in range(10)], + self.assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df.a)).collect()) - assert_close([math.sin(i) for i in range(10)], + self.assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df['a'])).collect()) - assert_close([math.pow(i, 2 * i) for i in range(10)], + self.assert_close([math.pow(i, 2 * i) for i in range(10)], df.select(functions.pow(df.a, df.b)).collect()) - assert_close([math.pow(i, 2) for i in range(10)], + self.assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2)).collect()) - assert_close([math.pow(i, 2) for i in range(10)], + self.assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2.0)).collect()) - assert_close([math.hypot(i, 2 * i) for i in range(10)], + self.assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot(df.a, df.b)).collect()) - assert_close([math.hypot(i, 2 * i) for i in range(10)], + self.assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot("a", u"b")).collect()) - assert_close([math.hypot(i, 2) for i in range(10)], + self.assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot("a", 2)).collect()) - assert_close([math.hypot(i, 2) for i in range(10)], + self.assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect()) def test_inverse_trig_functions(self): @@ -160,29 +152,20 @@ def test_inverse_trig_functions(self): self.assertIn(f"{alias}(a)", repr(f(c))) def test_reciprocal_trig_functions(self): - # SPARK-36683: Add new built-in SQL functions: SEC and CSC - list = [0.0, math.pi/6, math.pi/4, math.pi/3, math.pi/2, + # SPARK-36683: Tests for reciprocal trig functions (SEC, CSC and COT) + l = [0.0, math.pi / 6, math.pi / 4, math.pi / 3, math.pi / 2, math.pi, 3 * math.pi / 2, 2 * math.pi] - df = self.spark.createDataFrame(list, types.DoubleType()) + df = self.spark.createDataFrame(l, types.DoubleType()) def to_reciprocal_trig(func): - return [1.0 / func(i) if func(i) != 0 else math.inf for i in list] - - def get_values(l): - return [j[0] for j in l] - - def assert_close(a, b): - c = get_values(b) - diff = [abs(v - c[k]) < 1e-6 if v != math.inf else v == c[k] - for k, v in enumerate(a)] - return sum(diff) == len(a) + return [1.0 / func(i) if func(i) != 0 else math.inf for i in l] - assert_close(to_reciprocal_trig(math.cos), + self.assert_close(to_reciprocal_trig(math.cos), df.select(sec(df.value)).collect()) - assert_close(to_reciprocal_trig(math.sin), + self.assert_close(to_reciprocal_trig(math.sin), df.select(csc(df.value)).collect()) - assert_close(to_reciprocal_trig(math.tan), + self.assert_close(to_reciprocal_trig(math.tan), df.select(cot(df.value)).collect()) def test_rand_functions(self): diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py index a394e8eecc69..e6d3c3da157e 100644 --- a/python/pyspark/testing/sqlutils.py +++ b/python/pyspark/testing/sqlutils.py @@ -243,6 +243,13 @@ def function(self, *functions): for f in functions: self.spark.sql("DROP FUNCTION IF EXISTS %s" % f) + @staticmethod + def assert_close(a, b): + c = [j[0] for j in b] + diff = [abs(v - c[k]) < 1e-6 if v != math.inf else v == c[k] + for k, v in enumerate(a)] + return sum(diff) == len(a) + class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils): @classmethod From 9bc8d5f3c83cb0ab037d03cd550f06ad2cb77609 Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 14:32:45 +0900 Subject: [PATCH 09/14] fixed test_functions.py --- python/pyspark/sql/tests/test_functions.py | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 06a5276214cf..87c4ab20933e 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -113,27 +113,27 @@ def test_math_functions(self): from pyspark.sql import functions import math - self.assert_close([math.cos(i) for i in range(10)], + assert_close([math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()) - self.assert_close([math.cos(i) for i in range(10)], + assert_close([math.cos(i) for i in range(10)], df.select(functions.cos("a")).collect()) - self.assert_close([math.sin(i) for i in range(10)], + assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df.a)).collect()) - self.assert_close([math.sin(i) for i in range(10)], + assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df['a'])).collect()) - self.assert_close([math.pow(i, 2 * i) for i in range(10)], + assert_close([math.pow(i, 2 * i) for i in range(10)], df.select(functions.pow(df.a, df.b)).collect()) - self.assert_close([math.pow(i, 2) for i in range(10)], + assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2)).collect()) - self.assert_close([math.pow(i, 2) for i in range(10)], + assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2.0)).collect()) - self.assert_close([math.hypot(i, 2 * i) for i in range(10)], + assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot(df.a, df.b)).collect()) - self.assert_close([math.hypot(i, 2 * i) for i in range(10)], + assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot("a", u"b")).collect()) - self.assert_close([math.hypot(i, 2) for i in range(10)], + assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot("a", 2)).collect()) - self.assert_close([math.hypot(i, 2) for i in range(10)], + assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect()) def test_inverse_trig_functions(self): @@ -161,11 +161,11 @@ def test_reciprocal_trig_functions(self): def to_reciprocal_trig(func): return [1.0 / func(i) if func(i) != 0 else math.inf for i in l] - self.assert_close(to_reciprocal_trig(math.cos), + assert_close(to_reciprocal_trig(math.cos), df.select(sec(df.value)).collect()) - self.assert_close(to_reciprocal_trig(math.sin), + assert_close(to_reciprocal_trig(math.sin), df.select(csc(df.value)).collect()) - self.assert_close(to_reciprocal_trig(math.tan), + assert_close(to_reciprocal_trig(math.tan), df.select(cot(df.value)).collect()) def test_rand_functions(self): From f0b0887eaa5f268c543a605bd3e17a2c6bef428e Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 15:14:10 +0900 Subject: [PATCH 10/14] add short descriptions in functions.py --- python/pyspark/sql/functions.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 4e85ebfe729f..9c09a7ce1e25 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -253,6 +253,8 @@ def product(col): def acos(col): """ + Computes inverse cosine of the input column. + .. versionadded:: 1.4.0 Returns @@ -278,6 +280,8 @@ def acosh(col): def asin(col): """ + Computes inverse sine of the input column. + .. versionadded:: 1.3.0 @@ -304,6 +308,8 @@ def asinh(col): def atan(col): """ + Compute inverse tangent of the input column. + .. versionadded:: 1.4.0 Returns @@ -345,6 +351,8 @@ def ceil(col): def cos(col): """ + Computes cosine of the input column. + .. versionadded:: 1.4.0 Parameters @@ -362,6 +370,8 @@ def cos(col): def cosh(col): """ + Computes hyperbolic cosine of the input column. + .. versionadded:: 1.4.0 Parameters @@ -379,6 +389,8 @@ def cosh(col): def cot(col): """ + Computes cotangent of the input column. + .. versionadded:: 3.3.0 Parameters @@ -396,6 +408,8 @@ def cot(col): def csc(col): """ + Computes cosecant of the input column. + .. versionadded:: 3.3.0 Parameters @@ -470,6 +484,8 @@ def rint(col): def sec(col): """ + Computes secant of the input column. + .. versionadded:: 3.3.0 Parameters @@ -495,6 +511,8 @@ def signum(col): def sin(col): """ + Computes sine of the input column. + .. versionadded:: 1.4.0 Parameters @@ -511,6 +529,8 @@ def sin(col): def sinh(col): """ + Computes hyperbolic sine of the input column. + .. versionadded:: 1.4.0 Parameters @@ -529,6 +549,8 @@ def sinh(col): def tan(col): """ + Computes tangent of the input column. + .. versionadded:: 1.4.0 Parameters @@ -546,6 +568,8 @@ def tan(col): def tanh(col): """ + Computes hyperbolic tangent of the input column. + .. versionadded:: 1.4.0 Parameters @@ -5186,9 +5210,9 @@ def _test(): from pyspark.sql import Row, SparkSession import pyspark.sql.functions globs = pyspark.sql.functions.__dict__.copy() - spark = SparkSession.builder\ - .master("local[4]")\ - .appName("sql.functions tests")\ + spark = SparkSession.builder \ + .master("local[4]") \ + .appName("sql.functions tests") \ .getOrCreate() sc = spark.sparkContext globs['sc'] = sc From 2df5825d383236f4e9c2c28496d1dcac5ead6a0f Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 16:21:53 +0900 Subject: [PATCH 11/14] fixed lint error and wrong method calling --- python/pyspark/sql/tests/test_functions.py | 69 +++++++++++----------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 87c4ab20933e..61225ab0d2f4 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -26,7 +26,7 @@ lit, assert_true, sum_distinct, sumDistinct, shiftleft, shiftLeft, shiftRight, \ shiftright, shiftrightunsigned, shiftRightUnsigned, octet_length, bit_length, \ sec, csc, cot -from pyspark.testing.sqlutils import ReusedSQLTestCase +from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils class FunctionsTests(ReusedSQLTestCase): @@ -111,30 +111,29 @@ def test_crosstab(self): def test_math_functions(self): df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF() from pyspark.sql import functions - import math - assert_close([math.cos(i) for i in range(10)], - df.select(functions.cos(df.a)).collect()) - assert_close([math.cos(i) for i in range(10)], - df.select(functions.cos("a")).collect()) - assert_close([math.sin(i) for i in range(10)], - df.select(functions.sin(df.a)).collect()) - assert_close([math.sin(i) for i in range(10)], - df.select(functions.sin(df['a'])).collect()) - assert_close([math.pow(i, 2 * i) for i in range(10)], - df.select(functions.pow(df.a, df.b)).collect()) - assert_close([math.pow(i, 2) for i in range(10)], - df.select(functions.pow(df.a, 2)).collect()) - assert_close([math.pow(i, 2) for i in range(10)], - df.select(functions.pow(df.a, 2.0)).collect()) - assert_close([math.hypot(i, 2 * i) for i in range(10)], - df.select(functions.hypot(df.a, df.b)).collect()) - assert_close([math.hypot(i, 2 * i) for i in range(10)], - df.select(functions.hypot("a", u"b")).collect()) - assert_close([math.hypot(i, 2) for i in range(10)], - df.select(functions.hypot("a", 2)).collect()) - assert_close([math.hypot(i, 2) for i in range(10)], - df.select(functions.hypot(df.a, 2)).collect()) + SQLTestUtils.assert_close([math.cos(i) for i in range(10)], + df.select(functions.cos(df.a)).collect()) + SQLTestUtils.assert_close([math.cos(i) for i in range(10)], + df.select(functions.cos("a")).collect()) + SQLTestUtils.assert_close([math.sin(i) for i in range(10)], + df.select(functions.sin(df.a)).collect()) + SQLTestUtils.assert_close([math.sin(i) for i in range(10)], + df.select(functions.sin(df['a'])).collect()) + SQLTestUtils.assert_close([math.pow(i, 2 * i) for i in range(10)], + df.select(functions.pow(df.a, df.b)).collect()) + SQLTestUtils.assert_close([math.pow(i, 2) for i in range(10)], + df.select(functions.pow(df.a, 2)).collect()) + SQLTestUtils.assert_close([math.pow(i, 2) for i in range(10)], + df.select(functions.pow(df.a, 2.0)).collect()) + SQLTestUtils.assert_close([math.hypot(i, 2 * i) for i in range(10)], + df.select(functions.hypot(df.a, df.b)).collect()) + SQLTestUtils.assert_close([math.hypot(i, 2 * i) for i in range(10)], + df.select(functions.hypot("a", u"b")).collect()) + SQLTestUtils.assert_close([math.hypot(i, 2) for i in range(10)], + df.select(functions.hypot("a", 2)).collect()) + SQLTestUtils.assert_close([math.hypot(i, 2) for i in range(10)], + df.select(functions.hypot(df.a, 2)).collect()) def test_inverse_trig_functions(self): from pyspark.sql import functions @@ -153,20 +152,20 @@ def test_inverse_trig_functions(self): def test_reciprocal_trig_functions(self): # SPARK-36683: Tests for reciprocal trig functions (SEC, CSC and COT) - l = [0.0, math.pi / 6, math.pi / 4, math.pi / 3, math.pi / 2, - math.pi, 3 * math.pi / 2, 2 * math.pi] + lst = [0.0, math.pi / 6, math.pi / 4, math.pi / 3, math.pi / 2, + math.pi, 3 * math.pi / 2, 2 * math.pi] - df = self.spark.createDataFrame(l, types.DoubleType()) + df = self.spark.createDataFrame(lst, types.DoubleType()) def to_reciprocal_trig(func): - return [1.0 / func(i) if func(i) != 0 else math.inf for i in l] - - assert_close(to_reciprocal_trig(math.cos), - df.select(sec(df.value)).collect()) - assert_close(to_reciprocal_trig(math.sin), - df.select(csc(df.value)).collect()) - assert_close(to_reciprocal_trig(math.tan), - df.select(cot(df.value)).collect()) + return [1.0 / func(i) if func(i) != 0 else math.inf for i in lst] + + SQLTestUtils.assert_close(to_reciprocal_trig(math.cos), + df.select(sec(df.value)).collect()) + SQLTestUtils.assert_close(to_reciprocal_trig(math.sin), + df.select(csc(df.value)).collect()) + SQLTestUtils.assert_close(to_reciprocal_trig(math.tan), + df.select(cot(df.value)).collect()) def test_rand_functions(self): df = self.df From bf2179a13ead4c61c090082e336a1306cdfb183a Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 16:47:46 +0900 Subject: [PATCH 12/14] removed unnecessary spaces --- python/pyspark/sql/functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 9c09a7ce1e25..8c084b284ece 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -5210,9 +5210,9 @@ def _test(): from pyspark.sql import Row, SparkSession import pyspark.sql.functions globs = pyspark.sql.functions.__dict__.copy() - spark = SparkSession.builder \ - .master("local[4]") \ - .appName("sql.functions tests") \ + spark = SparkSession.builder\ + .master("local[4]")\ + .appName("sql.functions tests")\ .getOrCreate() sc = spark.sparkContext globs['sc'] = sc From a67b93ffada4a0d897636faa6ea44868740ba43e Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 17:47:20 +0900 Subject: [PATCH 13/14] edit assert_close in sqlutils.py: support negative infinity --- python/pyspark/testing/sqlutils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py index e6d3c3da157e..64077bf05131 100644 --- a/python/pyspark/testing/sqlutils.py +++ b/python/pyspark/testing/sqlutils.py @@ -16,6 +16,7 @@ # import datetime +import math import os import shutil import tempfile @@ -246,7 +247,7 @@ def function(self, *functions): @staticmethod def assert_close(a, b): c = [j[0] for j in b] - diff = [abs(v - c[k]) < 1e-6 if v != math.inf else v == c[k] + diff = [abs(v - c[k]) < 1e-6 if not math.isinf(v) else v == c[k] for k, v in enumerate(a)] return sum(diff) == len(a) From 057814038b37d3c844853e116ffa767b62d9c158 Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Fri, 17 Sep 2021 17:58:27 +0900 Subject: [PATCH 14/14] "not math.isinf" to "math.isfinite" in assert_close --- python/pyspark/testing/sqlutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py index 64077bf05131..eb1cacf71568 100644 --- a/python/pyspark/testing/sqlutils.py +++ b/python/pyspark/testing/sqlutils.py @@ -247,7 +247,7 @@ def function(self, *functions): @staticmethod def assert_close(a, b): c = [j[0] for j in b] - diff = [abs(v - c[k]) < 1e-6 if not math.isinf(v) else v == c[k] + diff = [abs(v - c[k]) < 1e-6 if math.isfinite(v) else v == c[k] for k, v in enumerate(a)] return sum(diff) == len(a)