From 5e79bfde70fa63188ff7abe19023192985509025 Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Fri, 18 Aug 2023 16:31:42 +0800 Subject: [PATCH] [SPARK-44834][PYTHON][SQL][TESTS][FOLLOW-UP] Update the analyzer results of the udtf tests ### What changes were proposed in this pull request? This is a follow up for https://github.com/apache/spark/pull/42517. We need to re-generate the analyzer results for udtf tests after https://github.com/apache/spark/pull/42519 is merged. Also updated PythonUDTFSuite after https://github.com/apache/spark/pull/42520 is merged. ### Why are the changes needed? To fix test failures ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Test only change Closes #42543 from allisonwang-db/spark-44834-fix. Authored-by: allisonwang-db Signed-off-by: Yuming Wang --- .../analyzer-results/udtf/udtf.sql.out | 51 +++---------------- .../execution/python/PythonUDTFSuite.scala | 17 +------ 2 files changed, 10 insertions(+), 58 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out index acf96794378e1..b46a1f230a856 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out @@ -10,84 +10,49 @@ CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t(c1, c2), false, true, LocalTempV -- !query SELECT * FROM udtf(1, 2) -- !query analysis -Project [x#x, y#x] -+- Generate TestUDTF(1, 2)#x, false, [x#x, y#x] - +- OneRowRelation +[Analyzer test output redacted due to nondeterminism] -- !query SELECT * FROM udtf(-1, 0) -- !query analysis -Project [x#x, y#x] -+- Generate TestUDTF(-1, 0)#x, false, [x#x, y#x] - +- OneRowRelation +[Analyzer test output redacted due to nondeterminism] -- !query SELECT * FROM udtf(0, -1) -- !query analysis -Project [x#x, y#x] -+- Generate TestUDTF(0, -1)#x, false, [x#x, y#x] - +- OneRowRelation +[Analyzer test output redacted due to nondeterminism] -- !query SELECT * FROM udtf(0, 0) -- !query analysis -Project [x#x, y#x] -+- Generate TestUDTF(0, 0)#x, false, [x#x, y#x] - +- OneRowRelation +[Analyzer test output redacted due to nondeterminism] -- !query SELECT a, b FROM udtf(1, 2) t(a, b) -- !query analysis -Project [a#x, b#x] -+- SubqueryAlias t - +- Project [x#x AS a#x, y#x AS b#x] - +- Generate TestUDTF(1, 2)#x, false, [x#x, y#x] - +- OneRowRelation +[Analyzer test output redacted due to nondeterminism] -- !query SELECT * FROM t1, LATERAL udtf(c1, c2) -- !query analysis -Project [c1#x, c2#x, x#x, y#x] -+- LateralJoin lateral-subquery#x [c1#x && c2#x], Inner - : +- Generate TestUDTF(outer(c1#x), outer(c2#x))#x, false, [x#x, y#x] - : +- OneRowRelation - +- SubqueryAlias t1 - +- View (`t1`, [c1#x,c2#x]) - +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x] - +- SubqueryAlias t - +- LocalRelation [c1#x, c2#x] +[Analyzer test output redacted due to nondeterminism] -- !query SELECT * FROM t1 LEFT JOIN LATERAL udtf(c1, c2) -- !query analysis -Project [c1#x, c2#x, x#x, y#x] -+- LateralJoin lateral-subquery#x [c1#x && c2#x], LeftOuter - : +- Generate TestUDTF(outer(c1#x), outer(c2#x))#x, false, [x#x, y#x] - : +- OneRowRelation - +- SubqueryAlias t1 - +- View (`t1`, [c1#x,c2#x]) - +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x] - +- SubqueryAlias t - +- LocalRelation [c1#x, c2#x] +[Analyzer test output redacted due to nondeterminism] -- !query SELECT * FROM udtf(1, 2) t(c1, c2), LATERAL udtf(c1, c2) -- !query analysis -Project [c1#x, c2#x, x#x, y#x] -+- LateralJoin lateral-subquery#x [c1#x && c2#x], Inner - : +- Generate TestUDTF(outer(c1#x), outer(c2#x))#x, false, [x#x, y#x] - : +- OneRowRelation - +- SubqueryAlias t - +- Project [x#x AS c1#x, y#x AS c2#x] - +- Generate TestUDTF(1, 2)#x, false, [x#x, y#x] - +- OneRowRelation +[Analyzer test output redacted due to nondeterminism] -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala index 8abcb0a6ce15e..4c17e3f5392c5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala @@ -41,19 +41,6 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession { | yield a, b, b - a |""".stripMargin - private val arrowPythonScript: String = - """ - |import pandas as pd - |class VectorizedUDTF: - | def eval(self, a: pd.Series, b: pd.Series): - | data = [ - | [a, b, a + b], - | [a, b, a - b], - | [a, b, b - a], - | ] - | yield pd.DataFrame(data) - |""".stripMargin - private val returnType: StructType = StructType.fromDDL("a int, b int, c int") private val pythonUDTF: UserDefinedPythonTableFunction = @@ -61,8 +48,8 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession { private val arrowPythonUDTF: UserDefinedPythonTableFunction = createUserDefinedPythonTableFunction( - "VectorizedUDTF", - arrowPythonScript, + "SimpleUDTF", + pythonScript, returnType, evalType = PythonEvalType.SQL_ARROW_TABLE_UDF)