apache · yanakad · Dec 17, 2015 · Dec 18, 2015
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -36,6 +36,8 @@ import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation
 import org.apache.spark.sql.execution.datasources.{LogicalRelation, ResolvedDataSource}
 import org.apache.spark.sql.types.StructType
 
+import scala.util.{Success, Try}
+
 /**
  * :: Experimental ::
  * Interface used to load a [[DataFrame]] from external storage systems (e.g. file systems,
@@ -306,19 +308,27 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging {
     if (paths.isEmpty) {
       sqlContext.emptyDataFrame
     } else {
-      val globbedPaths = paths.flatMap { path =>
+      val globbedPaths = paths.map { path =>
         val hdfsPath = new Path(path)
         val fs = hdfsPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
         val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-        SparkHadoopUtil.get.globPathIfNecessary(qualified)
-      }.toArray
-
-      sqlContext.baseRelationToDataFrame(
-        new ParquetRelation(
-          globbedPaths.map(_.toString), userSpecifiedSchema, None, extraOptions.toMap)(sqlContext))
+        Try(SparkHadoopUtil.get.globPathIfNecessary(qualified))
+      }.collect { case Success(s) => s }.flatten.toArray
+
+      if (globbedPaths.isEmpty) {
+        sqlContext.emptyDataFrame
+      } else {
+        sqlContext.baseRelationToDataFrame(
+          new ParquetRelation(
+            globbedPaths.map(_.toString),
+            userSpecifiedSchema,
+            None,
+            extraOptions.toMap)(sqlContext))
+      }
     }
   }
 
+
   /**
    * Loads an ORC file and returns the result as a [[DataFrame]].
    *

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -523,6 +523,17 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test(" Missing parquet files(SPARK-12369"){
+    withTempPath { path =>
+      Seq((2012, "a", "b")).toDF("year", "vala", "valb")
+      .write.partitionBy("year", "vala").parquet(path.getAbsolutePath)
+      val df = sqlContext.read.parquet(s"${path.getAbsolutePath}/year=2015/*/*.parquet")
+      assert(df.inputFiles.isEmpty)
+      val df1 = sqlContext.read.parquet(s"${path.getAbsolutePath}/year=2012/*/*.parquet")
+      assert(df1.inputFiles.nonEmpty)
+    }
+  }
+
   ignore("show") {
     // This test case is intended ignored, but to make sure it compiles correctly
     testData.select($"*").show()