diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 3ed1e55adec6..82ec5f06f028 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -36,6 +36,8 @@ import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation import org.apache.spark.sql.execution.datasources.{LogicalRelation, ResolvedDataSource} import org.apache.spark.sql.types.StructType +import scala.util.{Success, Try} + /** * :: Experimental :: * Interface used to load a [[DataFrame]] from external storage systems (e.g. file systems, @@ -306,19 +308,27 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging { if (paths.isEmpty) { sqlContext.emptyDataFrame } else { - val globbedPaths = paths.flatMap { path => + val globbedPaths = paths.map { path => val hdfsPath = new Path(path) val fs = hdfsPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory) - SparkHadoopUtil.get.globPathIfNecessary(qualified) - }.toArray - - sqlContext.baseRelationToDataFrame( - new ParquetRelation( - globbedPaths.map(_.toString), userSpecifiedSchema, None, extraOptions.toMap)(sqlContext)) + Try(SparkHadoopUtil.get.globPathIfNecessary(qualified)) + }.collect { case Success(s) => s }.flatten.toArray + + if (globbedPaths.isEmpty) { + sqlContext.emptyDataFrame + } else { + sqlContext.baseRelationToDataFrame( + new ParquetRelation( + globbedPaths.map(_.toString), + userSpecifiedSchema, + None, + extraOptions.toMap)(sqlContext)) + } } } + /** * Loads an ORC file and returns the result as a [[DataFrame]]. * diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 0644bdaaa35c..0000039c52e9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -523,6 +523,17 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } } + test(" Missing parquet files(SPARK-12369"){ + withTempPath { path => + Seq((2012, "a", "b")).toDF("year", "vala", "valb") + .write.partitionBy("year", "vala").parquet(path.getAbsolutePath) + val df = sqlContext.read.parquet(s"${path.getAbsolutePath}/year=2015/*/*.parquet") + assert(df.inputFiles.isEmpty) + val df1 = sqlContext.read.parquet(s"${path.getAbsolutePath}/year=2012/*/*.parquet") + assert(df1.inputFiles.nonEmpty) + } + } + ignore("show") { // This test case is intended ignored, but to make sure it compiles correctly testData.select($"*").show()