Skip to content

Conversation

@imback82
Copy link
Contributor

@imback82 imback82 commented Jul 11, 2019

What changes were proposed in this pull request?

This PR adds some tests converted from inline-table.sql to test UDFs. Please see contribution guide of this umbrella ticket - SPARK-27921.

Diff comparing to 'inline-table.sql'

diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
index 4e80f0bda5..2cf24e50c8 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
@@ -3,33 +3,33 @@
 
 
 -- !query 0
-select * from values ("one", 1)
+select udf(col1), udf(col2) from values ("one", 1)
 -- !query 0 schema
-struct<col1:string,col2:int>
+struct<CAST(udf(cast(col1 as string)) AS STRING):string,CAST(udf(cast(col2 as string)) AS INT):int>
 -- !query 0 output
 one	1
 
 
 -- !query 1
-select * from values ("one", 1) as data
+select udf(col1), udf(udf(col2)) from values ("one", 1) as data
 -- !query 1 schema
-struct<col1:string,col2:int>
+struct<CAST(udf(cast(col1 as string)) AS STRING):string,CAST(udf(cast(cast(udf(cast(col2 as string)) as int) as string)) AS INT):int>
 -- !query 1 output
 one	1
 
 
 -- !query 2
-select * from values ("one", 1) as data(a, b)
+select udf(a), b from values ("one", 1) as data(a, b)
 -- !query 2 schema
-struct<a:string,b:int>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:int>
 -- !query 2 output
 one	1
 
 
 -- !query 3
-select * from values 1, 2, 3 as data(a)
+select udf(a) from values 1, 2, 3 as data(a)
 -- !query 3 schema
-struct<a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 3 output
 1
 2
@@ -37,9 +37,9 @@ struct<a:int>
 
 
 -- !query 4
-select * from values ("one", 1), ("two", 2), ("three", null) as data(a, b)
+select udf(a), b from values ("one", 1), ("two", 2), ("three", null) as data(a, b)
 -- !query 4 schema
-struct<a:string,b:int>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:int>
 -- !query 4 output
 one	1
 three	NULL
@@ -47,107 +47,107 @@ two	2
 
 
 -- !query 5
-select * from values ("one", null), ("two", null) as data(a, b)
+select udf(a), b from values ("one", null), ("two", null) as data(a, b)
 -- !query 5 schema
-struct<a:string,b:null>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:null>
 -- !query 5 output
 one	NULL
 two	NULL
 
 
 -- !query 6
-select * from values ("one", 1), ("two", 2L) as data(a, b)
+select udf(a), b from values ("one", 1), ("two", 2L) as data(a, b)
 -- !query 6 schema
-struct<a:string,b:bigint>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:bigint>
 -- !query 6 output
 one	1
 two	2
 
 
 -- !query 7
-select * from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b)
+select udf(udf(a)), udf(b) from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b)
 -- !query 7 schema
-struct<a:string,b:bigint>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as string) as string)) AS STRING):string,CAST(udf(cast(b as string)) AS BIGINT):bigint>
 -- !query 7 output
 one	1
 two	4
 
 
 -- !query 8
-select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
+select udf(a), b from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
 -- !query 8 schema
-struct<a:string,b:array<int>>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:array<int>>
 -- !query 8 output
 one	[0,1]
 two	[2,3]
 
 
 -- !query 9
-select * from values ("one", 2.0), ("two", 3.0D) as data(a, b)
+select udf(a), b from values ("one", 2.0), ("two", 3.0D) as data(a, b)
 -- !query 9 schema
-struct<a:string,b:double>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:double>
 -- !query 9 output
 one	2.0
 two	3.0
 
 
 -- !query 10
-select * from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
+select udf(a), b from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
 -- !query 10 schema
 struct<>
 -- !query 10 output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression rand(5) in inline table definition; line 1 pos 29
+cannot evaluate expression rand(5) in inline table definition; line 1 pos 37
 
 
 -- !query 11
-select * from values ("one", 2.0), ("two") as data(a, b)
+select udf(a), udf(b) from values ("one", 2.0), ("two") as data(a, b)
 -- !query 11 schema
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 1; line 1 pos 14
+expected 2 columns but found 1 columns in row 1; line 1 pos 27
 
 
 -- !query 12
-select * from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
+select udf(a), udf(b) from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
 -- !query 12 schema
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-incompatible types found in column b for inline table; line 1 pos 14
+incompatible types found in column b for inline table; line 1 pos 27
 
 
 -- !query 13
-select * from values ("one"), ("two") as data(a, b)
+select udf(a), udf(b) from values ("one"), ("two") as data(a, b)
 -- !query 13 schema
 struct<>
 -- !query 13 output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 0; line 1 pos 14
+expected 2 columns but found 1 columns in row 0; line 1 pos 27
 
 
 -- !query 14
-select * from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
+select udf(a), udf(b) from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
 -- !query 14 schema
 struct<>
 -- !query 14 output
 org.apache.spark.sql.AnalysisException
-Undefined function: 'random_not_exist_func'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 29
+Undefined function: 'random_not_exist_func'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 42
 
 
 -- !query 15
-select * from values ("one", count(1)), ("two", 2) as data(a, b)
+select udf(a), udf(b) from values ("one", count(1)), ("two", 2) as data(a, b)
 -- !query 15 schema
 struct<>
 -- !query 15 output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression count(1) in inline table definition; line 1 pos 29
+cannot evaluate expression count(1) in inline table definition; line 1 pos 42
 
 
 -- !query 16
-select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b)
+select udf(a), b from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b)
 -- !query 16 schema
-struct<a:timestamp,b:array<timestamp>>
+struct<CAST(udf(cast(a as string)) AS TIMESTAMP):timestamp,b:array<timestamp>>
 -- !query 16 output
 1991-12-06 00:00:00	[1991-12-06 01:00:00.0,1991-12-06 12:00:00.0]

How was this patch tested?

Tested as guided in SPARK-27921.

@SparkQA
Copy link

SparkQA commented Jul 12, 2019

Test build #107557 has finished for PR 25124 at commit 89212b7.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@HyukjinKwon
Copy link
Member

retest this please

@HyukjinKwon
Copy link
Member

Looks fine in general otherwise.

@SparkQA
Copy link

SparkQA commented Jul 18, 2019

Test build #107823 has finished for PR 25124 at commit 89212b7.

  • This patch fails Spark unit tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@imback82
Copy link
Contributor Author

@HyukjinKwon, this is ready for review. Thanks!

@SparkQA
Copy link

SparkQA commented Jul 19, 2019

Test build #107862 has finished for PR 25124 at commit bd7aea8.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@SparkQA
Copy link

SparkQA commented Jul 19, 2019

Test build #107880 has finished for PR 25124 at commit 0562cc9.

  • This patch fails due to an unknown error code, -9.
  • This patch merges cleanly.
  • This patch adds no public classes.

@HyukjinKwon
Copy link
Member

retest this please

@SparkQA
Copy link

SparkQA commented Jul 19, 2019

Test build #107890 has finished for PR 25124 at commit 0562cc9.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@@ -0,0 +1,53 @@
-- This test file was converted from inline-table.sql.
-- [SPARK-28291] UDFs cannot be evaluated within inline table definition
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@imback82, can we add a todo that says we should add UDFs in VALUES clause when SPARK-28291 is resolved? It's kind of sad because I actually intended to test such cases here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g.:

-- TODO: we should ...

@SparkQA
Copy link

SparkQA commented Jul 20, 2019

Test build #107927 has finished for PR 25124 at commit 1ba0886.

  • This patch passes all tests.
  • This patch merges cleanly.
  • This patch adds no public classes.

@HyukjinKwon
Copy link
Member

Merged to master.

yiheng pushed a commit to yiheng/spark that referenced this pull request Jul 24, 2019
… into UDF test base

## What changes were proposed in this pull request?

This PR adds some tests converted from `inline-table.sql` to test UDFs. Please see contribution guide of this umbrella ticket - [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).

<details><summary>Diff comparing to 'inline-table.sql'</summary>
<p>

```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
index 4e80f0b..2cf24e5 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
 -3,33 +3,33

 -- !query 0
-select * from values ("one", 1)
+select udf(col1), udf(col2) from values ("one", 1)
 -- !query 0 schema
-struct<col1:string,col2:int>
+struct<CAST(udf(cast(col1 as string)) AS STRING):string,CAST(udf(cast(col2 as string)) AS INT):int>
 -- !query 0 output
 one	1

 -- !query 1
-select * from values ("one", 1) as data
+select udf(col1), udf(udf(col2)) from values ("one", 1) as data
 -- !query 1 schema
-struct<col1:string,col2:int>
+struct<CAST(udf(cast(col1 as string)) AS STRING):string,CAST(udf(cast(cast(udf(cast(col2 as string)) as int) as string)) AS INT):int>
 -- !query 1 output
 one	1

 -- !query 2
-select * from values ("one", 1) as data(a, b)
+select udf(a), b from values ("one", 1) as data(a, b)
 -- !query 2 schema
-struct<a:string,b:int>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:int>
 -- !query 2 output
 one	1

 -- !query 3
-select * from values 1, 2, 3 as data(a)
+select udf(a) from values 1, 2, 3 as data(a)
 -- !query 3 schema
-struct<a:int>
+struct<CAST(udf(cast(a as string)) AS INT):int>
 -- !query 3 output
 1
 2
 -37,9 +37,9  struct<a:int>

 -- !query 4
-select * from values ("one", 1), ("two", 2), ("three", null) as data(a, b)
+select udf(a), b from values ("one", 1), ("two", 2), ("three", null) as data(a, b)
 -- !query 4 schema
-struct<a:string,b:int>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:int>
 -- !query 4 output
 one	1
 three	NULL
 -47,107 +47,107  two	2

 -- !query 5
-select * from values ("one", null), ("two", null) as data(a, b)
+select udf(a), b from values ("one", null), ("two", null) as data(a, b)
 -- !query 5 schema
-struct<a:string,b:null>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:null>
 -- !query 5 output
 one	NULL
 two	NULL

 -- !query 6
-select * from values ("one", 1), ("two", 2L) as data(a, b)
+select udf(a), b from values ("one", 1), ("two", 2L) as data(a, b)
 -- !query 6 schema
-struct<a:string,b:bigint>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:bigint>
 -- !query 6 output
 one	1
 two	2

 -- !query 7
-select * from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b)
+select udf(udf(a)), udf(b) from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b)
 -- !query 7 schema
-struct<a:string,b:bigint>
+struct<CAST(udf(cast(cast(udf(cast(a as string)) as string) as string)) AS STRING):string,CAST(udf(cast(b as string)) AS BIGINT):bigint>
 -- !query 7 output
 one	1
 two	4

 -- !query 8
-select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
+select udf(a), b from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
 -- !query 8 schema
-struct<a:string,b:array<int>>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:array<int>>
 -- !query 8 output
 one	[0,1]
 two	[2,3]

 -- !query 9
-select * from values ("one", 2.0), ("two", 3.0D) as data(a, b)
+select udf(a), b from values ("one", 2.0), ("two", 3.0D) as data(a, b)
 -- !query 9 schema
-struct<a:string,b:double>
+struct<CAST(udf(cast(a as string)) AS STRING):string,b:double>
 -- !query 9 output
 one	2.0
 two	3.0

 -- !query 10
-select * from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
+select udf(a), b from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
 -- !query 10 schema
 struct<>
 -- !query 10 output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression rand(5) in inline table definition; line 1 pos 29
+cannot evaluate expression rand(5) in inline table definition; line 1 pos 37

 -- !query 11
-select * from values ("one", 2.0), ("two") as data(a, b)
+select udf(a), udf(b) from values ("one", 2.0), ("two") as data(a, b)
 -- !query 11 schema
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 1; line 1 pos 14
+expected 2 columns but found 1 columns in row 1; line 1 pos 27

 -- !query 12
-select * from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
+select udf(a), udf(b) from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
 -- !query 12 schema
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-incompatible types found in column b for inline table; line 1 pos 14
+incompatible types found in column b for inline table; line 1 pos 27

 -- !query 13
-select * from values ("one"), ("two") as data(a, b)
+select udf(a), udf(b) from values ("one"), ("two") as data(a, b)
 -- !query 13 schema
 struct<>
 -- !query 13 output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 0; line 1 pos 14
+expected 2 columns but found 1 columns in row 0; line 1 pos 27

 -- !query 14
-select * from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
+select udf(a), udf(b) from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
 -- !query 14 schema
 struct<>
 -- !query 14 output
 org.apache.spark.sql.AnalysisException
-Undefined function: 'random_not_exist_func'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 29
+Undefined function: 'random_not_exist_func'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 42

 -- !query 15
-select * from values ("one", count(1)), ("two", 2) as data(a, b)
+select udf(a), udf(b) from values ("one", count(1)), ("two", 2) as data(a, b)
 -- !query 15 schema
 struct<>
 -- !query 15 output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression count(1) in inline table definition; line 1 pos 29
+cannot evaluate expression count(1) in inline table definition; line 1 pos 42

 -- !query 16
-select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b)
+select udf(a), b from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b)
 -- !query 16 schema
-struct<a:timestamp,b:array<timestamp>>
+struct<CAST(udf(cast(a as string)) AS TIMESTAMP):timestamp,b:array<timestamp>>
 -- !query 16 output
 1991-12-06 00:00:00	[1991-12-06 01:00:00.0,1991-12-06 12:00:00.0]

```
</p>
</details>

## How was this patch tested?

Tested as guided in [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).

Closes apache#25124 from imback82/inline-table-sql.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants