Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1010,9 +1010,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
.orElse(Some("org.apache.hadoop.mapred.TextInputFormat")),
outputFormat = defaultHiveSerde.flatMap(_.outputFormat)
.orElse(Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
// Note: Keep this unspecified because we use the presence of the serde to decide
// whether to convert a table created by CTAS to a datasource table.
serde = None,
serde = defaultHiveSerde.flatMap(_.serde),
compressed = false,
properties = Map())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical.{Generate, ScriptTransformation}
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.CreateTable
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.StructType

class HiveDDLCommandSuite extends PlanTest {
class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingleton {
val parser = TestHive.sessionState.sqlParser

private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
Expand Down Expand Up @@ -556,4 +558,24 @@ class HiveDDLCommandSuite extends PlanTest {
assert(partition2.get.apply("c") == "1" && partition2.get.apply("d") == "2")
}

test("Test the default fileformat for Hive-serde tables") {
withSQLConf("hive.default.fileformat" -> "orc") {
val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
assert(exists)
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
assert(desc.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
}

withSQLConf("hive.default.fileformat" -> "parquet") {
val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
assert(exists)
val input = desc.storage.inputFormat
val output = desc.storage.outputFormat
val serde = desc.storage.serde
assert(input == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
assert(output == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
assert(serde == Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {

def checkRelation(
tableName: String,
isDataSourceParquet: Boolean,
isDataSourceTable: Boolean,
format: String,
userSpecifiedLocation: Option[String] = None): Unit = {
val relation = EliminateSubqueryAliases(
Expand All @@ -501,7 +501,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
relation match {
case LogicalRelation(r: HadoopFsRelation, _, _) =>
if (!isDataSourceParquet) {
if (!isDataSourceTable) {
fail(
s"${classOf[MetastoreRelation].getCanonicalName} is expected, but found " +
s"${HadoopFsRelation.getClass.getCanonicalName}.")
Expand All @@ -514,7 +514,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
assert(catalogTable.provider.get === format)

case r: MetastoreRelation =>
if (isDataSourceParquet) {
if (isDataSourceTable) {
fail(
s"${HadoopFsRelation.getClass.getCanonicalName} is expected, but found " +
s"${classOf[MetastoreRelation].getCanonicalName}.")
Expand All @@ -524,8 +524,15 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
assert(r.catalogTable.storage.locationUri.get === location)
case None => // OK.
}
// Also make sure that the format is the desired format.
// Also make sure that the format and serde are as desired.
assert(catalogTable.storage.inputFormat.get.toLowerCase.contains(format))
assert(catalogTable.storage.outputFormat.get.toLowerCase.contains(format))
val serde = catalogTable.storage.serde.get
format match {
case "sequence" | "text" => assert(serde.contains("LazySimpleSerDe"))
case "rcfile" => assert(serde.contains("LazyBinaryColumnarSerDe"))
case _ => assert(serde.toLowerCase.contains(format))
}
}

// When a user-specified location is defined, the table type needs to be EXTERNAL.
Expand Down Expand Up @@ -587,6 +594,30 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}

test("CTAS with default fileformat") {
val table = "ctas1"
val ctas = s"CREATE TABLE IF NOT EXISTS $table SELECT key k, value FROM src"
withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
withSQLConf("hive.default.fileformat" -> "textfile") {
withTable(table) {
sql(ctas)
// We should use parquet here as that is the default datasource fileformat. The default
// datasource file format is controlled by `spark.sql.sources.default` configuration.
// This testcase verifies that setting `hive.default.fileformat` has no impact on
// the target table's fileformat in case of CTAS.
assert(sessionState.conf.defaultDataSourceName === "parquet")
checkRelation(tableName = table, isDataSourceTable = true, format = "parquet")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Scala 2.10, we need to name all the fields when we use named parameters. This is the reason for build failure while using scala 2.10

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think there is an error because in your mixing of named and positional arguments, the positional arguments are not the prefix of argument lists.

I.e., the compilation error:

checkRelation(table, isDataSourceTable = true, "parquet")

Should be fixed by:

checkRelation(table, isDataSourceTable = true, format = "parquet")

Copy link
Contributor Author

@dilipbiswal dilipbiswal Oct 15, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@viirya so what i have should work , right ? I have also named the first field for code readability even though its not strictly necessary to fix the compilation issue. I also had a question, do you know how to trigger a test for scala 2.10 ? I would like to run against 2.10 if possible. I have run it against my local env though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I know, we can't trigger it. Maybe @yhuai will know it? You can compile it with scala 2.10 locally to make sure it passes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I think it is no problem you have all named arguments for them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we cannot trigger scala 2.10 build for a pr.

}
}
withSQLConf("spark.sql.sources.default" -> "orc") {
withTable(table) {
sql(ctas)
checkRelation(tableName = table, isDataSourceTable = true, format = "orc")
}
}
}
}

test("CTAS without serde with location") {
withSQLConf(SQLConf.CONVERT_CTAS.key -> "true") {
withTempDir { dir =>
Expand Down