apache · HyukjinKwon · Sep 23, 2017 · squito · Sep 25, 2017 · HyukjinKwon
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -139,7 +139,7 @@ object PartitioningUtils {
           "root directory of the table. If there are multiple root directories, " +
           "please load them separately and then union them.")
 
-      val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues)
+      val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues, timeZone)
 
       // Creates the StructType which represents the partition columns.
       val fields = {
@@ -318,7 +318,8 @@ object PartitioningUtils {
    * }}}
    */
   def resolvePartitions(
-      pathsWithPartitionValues: Seq[(Path, PartitionValues)]): Seq[PartitionValues] = {
+      pathsWithPartitionValues: Seq[(Path, PartitionValues)],
+      timeZone: TimeZone): Seq[PartitionValues] = {
     if (pathsWithPartitionValues.isEmpty) {
       Seq.empty
     } else {
@@ -333,7 +334,7 @@ object PartitioningUtils {
       val values = pathsWithPartitionValues.map(_._2)
       val columnCount = values.head.columnNames.size
       val resolvedValues = (0 until columnCount).map { i =>
-        resolveTypeConflicts(values.map(_.literals(i)))
+        resolveTypeConflicts(values.map(_.literals(i)), timeZone)
       }
 
       // Fills resolved literals back to each partition
@@ -470,15 +471,15 @@ object PartitioningUtils {
    * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower"
    * types.
    */
-  private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = {
+  private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = {
     val desiredType = {
       val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
   /** 
    * Case 2 type widening (see the classdoc comment above for TypeCoercion). 
    * 
    * i.e. the main difference with [[findTightestCommonType]] is that here we allow some 
    * loss of precision when widening decimal and double, and promotion to string. 
    */ 
   private[analysis] def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = { 
     findTightestCommonType(t1, t2) 
       .orElse(findWiderTypeForDecimal(t1, t2)) 
       .orElse(stringPromotion(t1, t2)) 
       .orElse((t1, t2) match { 
         case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) => 
           findWiderTypeForTwo(et1, et2).map(ArrayType(_, containsNull1 || containsNull2)) 
         case _ => None 
       }) 
   } 
 val decimalTry = Try { 
   // `BigDecimal` conversion can fail when the `field` is not a form of number. 
   val bigDecimal = new JBigDecimal(raw) 
   // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`). 
   require(bigDecimal.scale <= 0) 
   // `DecimalType` conversion can fail when 
   //   1. The precision is bigger than 38. 
   //   2. scale is bigger than precision. 
   Literal(bigDecimal) 
 } 
   /** 
    * Case 2 type widening (see the classdoc comment above for TypeCoercion). 
    * 
    * i.e. the main difference with [[findTightestCommonType]] is that here we allow some 
    * loss of precision when widening decimal and double, and promotion to string. 
    */ 
   private[analysis] def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = { 
     findTightestCommonType(t1, t2) 
       .orElse(findWiderTypeForDecimal(t1, t2)) 
       .orElse(stringPromotion(t1, t2)) 
       .orElse((t1, t2) match { 
         case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) => 
           findWiderTypeForTwo(et1, et2).map(ArrayType(_, containsNull1 || containsNull2)) 
         case _ => None 
       }) 
   } 
 val decimalTry = Try { 
   // `BigDecimal` conversion can fail when the `field` is not a form of number. 
   val bigDecimal = new JBigDecimal(raw) 
   // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`). 
   require(bigDecimal.scale <= 0) 
   // `DecimalType` conversion can fail when 
   //   1. The precision is bigger than 38. 
   //   2. scale is bigger than precision. 
   Literal(bigDecimal) 
 } 
       // Falls back to string if all values of this column are null or empty string
       if (topType == NullType) StringType else topType
     }
 
     literals.map { case l @ Literal(_, dataType) =>
-      Literal.create(Cast(l, desiredType).eval(), desiredType)
+      Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType)
     }
   }
 }
diff --git a/...a/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/...a/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -1055,4 +1055,16 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       }
     }
   }
+
+  test("SPARK-22109: Resolve type conflicts between strings and timestamps in partition column") {
+    val df = Seq(
+      (1, "2015-01-01 00:00:00"),
+      (2, "2014-01-01 00:00:00"),
+      (3, "blah")).toDF("i", "str")
+
+    withTempPath { path =>
+      df.write.format("parquet").partitionBy("str").save(path.getAbsolutePath)
+      checkAnswer(spark.read.load(path.getAbsolutePath), df)
+    }
+  }
 }