Skip to content

Commit 93e07db

Browse files
author
Marcelo Vanzin
committed
Fix HiveCommandSuite.
Need to make a copy of the input when using "LOAD DATA" vs. "LOAD DATA LOCAL" since Hive moves the input file in the former case.
1 parent 11f11ca commit 93e07db

File tree

1 file changed

+33
-5
lines changed

1 file changed

+33
-5
lines changed

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
package org.apache.spark.sql.hive.execution
1919

20+
import java.io.File
21+
22+
import com.google.common.io.Files
23+
2024
import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
2125
import org.apache.spark.sql.catalyst.TableIdentifier
2226
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
@@ -232,31 +236,40 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
232236
sql("""LOAD DATA LOCAL INPATH "/non-existing/data.txt" INTO TABLE non_part_table""")
233237
}
234238

235-
val testData = hiveContext.getHiveFile("data/files/employee.dat").getCanonicalPath
239+
val testData = hiveContext.getHiveFile("data/files/employee.dat")
236240

237241
// Non-local inpath: without URI Scheme and Authority
238-
sql(s"""LOAD DATA INPATH "$testData" INTO TABLE non_part_table""")
242+
withCopy(testData) { tmp =>
243+
sql(s"""LOAD DATA INPATH "${tmp.getCanonicalPath()}" INTO TABLE non_part_table""")
244+
}
245+
239246
checkAnswer(
240247
sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
241248
Row(16, "john") :: Nil)
242249

243250
// Use URI as LOCAL inpath:
244251
// file:/path/to/data/files/employee.dat
245-
val uri = "file:" + testData
252+
val uri = "file:" + testData.getCanonicalPath()
246253
sql(s"""LOAD DATA LOCAL INPATH "$uri" INTO TABLE non_part_table""")
247254

248255
checkAnswer(
249256
sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
250257
Row(16, "john") :: Row(16, "john") :: Nil)
251258

252259
// Use URI as non-LOCAL inpath
253-
sql(s"""LOAD DATA INPATH "$uri" INTO TABLE non_part_table""")
260+
withCopy(testData) { tmp =>
261+
val tmpUri = "file:" + tmp.getCanonicalPath()
262+
sql(s"""LOAD DATA INPATH "$tmpUri" INTO TABLE non_part_table""")
263+
}
254264

255265
checkAnswer(
256266
sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
257267
Row(16, "john") :: Row(16, "john") :: Row(16, "john") :: Nil)
258268

259-
sql(s"""LOAD DATA INPATH "$uri" OVERWRITE INTO TABLE non_part_table""")
269+
withCopy(testData) { tmp =>
270+
val tmpUri = "file:" + tmp.getCanonicalPath()
271+
sql(s"""LOAD DATA INPATH "$tmpUri" OVERWRITE INTO TABLE non_part_table""")
272+
}
260273

261274
checkAnswer(
262275
sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
@@ -418,4 +431,19 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
418431
assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
419432
}
420433
}
434+
435+
/**
436+
* Run a function with a copy of the input file. Use this for tests that use "LOAD DATA"
437+
* (instead of "LOAD DATA LOCAL") since, according to Hive's semantics, files are moved
438+
* into the target location in that case, and we need the original file to be preserved.
439+
*/
440+
private def withCopy(source: File)(fn: File => Unit): Unit = {
441+
val tmp = File.createTempFile(source.getName(), ".tmp")
442+
Files.copy(source, tmp)
443+
try {
444+
fn(tmp)
445+
} finally {
446+
tmp.delete()
447+
}
448+
}
421449
}

0 commit comments

Comments
 (0)