Merge branch 'SPARK-28158' of github.com:uncleGen/spark into SPARK-28158

uncleGen · uncleGen · commit 12ee9d64f21a · 2019-09-25T16:18:49.000+08:00
diff --git a/mllib/pom.xml b/mllib/pom.xml
@@ -74,6 +74,19 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-hive_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-hive_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-graphx_${scala.binary.version}</artifactId>
diff --git a/mllib/src/test/resources/TestLogRegUDF.jar b/mllib/src/test/resources/TestLogRegUDF.jar
diff --git a/mllib/src/test/resources/test-data/libsvm/sample_libsvm_data.txt b/mllib/src/test/resources/test-data/libsvm/sample_libsvm_data.txt
@@ -0,0 +1 @@
+0 128:51 129:159 130:253 131:159 132:50 155:48 156:238 157:252 158:252 159:252 160:237 182:54 183:227 184:253 185:252 186:239 187:233 188:252 189:57 190:6 208:10 209:60 210:224 211:252 212:253 213:252 214:202 215:84 216:252 217:253 218:122 236:163 237:252 238:252 239:252 240:253 241:252 242:252 243:96 244:189 245:253 246:167 263:51 264:238 265:253 266:253 267:190 268:114 269:253 270:228 271:47 272:79 273:255 274:168 290:48 291:238 292:252 293:252 294:179 295:12 296:75 297:121 298:21 301:253 302:243 303:50 317:38 318:165 319:253 320:233 321:208 322:84 329:253 330:252 331:165 344:7 345:178 346:252 347:240 348:71 349:19 350:28 357:253 358:252 359:195 372:57 373:252 374:252 375:63 385:253 386:252 387:195 400:198 401:253 402:190 413:255 414:253 415:196 427:76 428:246 429:252 430:112 441:253 442:252 443:148 455:85 456:252 457:230 458:25 467:7 468:135 469:253 470:186 471:12 483:85 484:252 485:223 494:7 495:131 496:252 497:225 498:71 511:85 512:252 513:145 521:48 522:165 523:252 524:173 539:86 540:253 541:225 548:114 549:238 550:253 551:162 567:85 568:252 569:249 570:146 571:48 572:29 573:85 574:178 575:225 576:253 577:223 578:167 579:56 595:85 596:252 597:252 598:252 599:229 600:215 601:252 602:252 603:252 604:196 605:130 623:28 624:199 625:252 626:252 627:253 628:252 629:252 630:233 631:145 652:25 653:128 654:252 655:253 656:252 657:141 658:37
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.ml.linalg
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.sql.catalyst.JavaTypeInference
+import org.apache.spark.sql.{QueryTest, Row, SparkSession}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, JavaTypeInference}
+import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.types._
 
-class VectorUDTSuite extends SparkFunSuite {
+class VectorUDTSuite extends QueryTest {
 
   test("preloaded VectorUDT") {
     val dv1 = Vectors.dense(Array.empty[Double])
@@ -44,4 +45,39 @@ class VectorUDTSuite extends SparkFunSuite {
     assert(dataType.asInstanceOf[StructType].fields.map(_.dataType)
       === Seq(new VectorUDT, DoubleType))
   }
+
+  test("SPARK-28158 Hive UDFs supports UDT type") {
+    val functionName = "Logistic_Regression"
+    val sql = spark.sql _
+    try {
+      val df = spark.read.format("libsvm").options(Map("vectorType" -> "dense"))
+        .load(TestHive.getHiveFile("test-data/libsvm/sample_libsvm_data.txt").getPath)
+      df.createOrReplaceTempView("src")
+
+      // `Logistic_Regression` accepts features (with Vector type), and returns the
+      // prediction value. To simplify the UDF implementation, the `Logistic_Regression`
+      // will return 0.95d directly.
+      sql(
+        s"""
+           |CREATE FUNCTION Logistic_Regression
+           |AS 'org.apache.spark.sql.hive.LogisticRegressionUDF'
+           |USING JAR '${TestHive.getHiveFile("TestLogRegUDF.jar").toURI}'
+        """.stripMargin)
+
+      checkAnswer(
+        sql("SELECT Logistic_Regression(features) FROM src"),
+        Row(0.95) :: Nil)
+    } catch {
+      case cause: Throwable => throw cause
+    } finally {
+      // If the test failed part way, we don't want to mask the failure by failing to remove
+      // temp tables that never got created.
+      spark.sql(s"DROP FUNCTION IF EXISTS $functionName")
+      assert(
+        !spark.sessionState.catalog.functionExists(FunctionIdentifier(functionName)),
+        s"Function $functionName should have been dropped. But, it still exists.")
+    }
+  }
+
+  override protected val spark: SparkSession = TestHive.sparkSession
 }

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+0 128:51 129:159 130:253 131:159 132:50 155:48 156:238 157:252 158:252 159:252 160:237 182:54 183:227 184:253 185:252 186:239 187:233 188:252 189:57 190:6 208:10 209:60 210:224 211:252 212:253 213:252 214:202 215:84 216:252 217:253 218:122 236:163 237:252 238:252 239:252 240:253 241:252 242:252 243:96 244:189 245:253 246:167 263:51 264:238 265:253 266:253 267:190 268:114 269:253 270:228 271:47 272:79 273:255 274:168 290:48 291:238 292:252 293:252 294:179 295:12 296:75 297:121 298:21 301:253 302:243 303:50 317:38 318:165 319:253 320:233 321:208 322:84 329:253 330:252 331:165 344:7 345:178 346:252 347:240 348:71 349:19 350:28 357:253 358:252 359:195 372:57 373:252 374:252 375:63 385:253 386:252 387:195 400:198 401:253 402:190 413:255 414:253 415:196 427:76 428:246 429:252 430:112 441:253 442:252 443:148 455:85 456:252 457:230 458:25 467:7 468:135 469:253 470:186 471:12 483:85 484:252 485:223 494:7 495:131 496:252 497:225 498:71 511:85 512:252 513:145 521:48 522:165 523:252 524:173 539:86 540:253 541:225 548:114 549:238 550:253 551:162 567:85 568:252 569:249 570:146 571:48 572:29 573:85 574:178 575:225 576:253 577:223 578:167 579:56 595:85 596:252 597:252 598:252 599:229 600:215 601:252 602:252 603:252 604:196 605:130 623:28 624:199 625:252 626:252 627:253 628:252 629:252 630:233 631:145 652:25 653:128 654:252 655:253 656:252 657:141 658:37