Address comments here first

HyukjinKwon · HyukjinKwon · commit 9f2851bfe559 · 2017-05-16T08:58:13.000+09:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -243,7 +243,8 @@ private[parquet] object ParquetFilters {
  * Note that, this is a hacky workaround to allow dots in column names. Currently, column APIs
  * in Parquet's `FilterApi` only allows dot-separated names so here we resemble those columns
  * but only allow single column path that allows dots in the names as we don't currently push
- * down filters with nested fields.
+ * down filters with nested fields. The functions in this object are based on
+ * the codes in `org.apache.parquet.filter2.predicate`.
  */
 private[parquet] object ParquetColumns {
   def intColumn(columnPath: String): Column[Integer] with SupportsLtGt = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -545,35 +545,29 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
 
     Seq(true, false).foreach { vectorized =>
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
-        withTempPath { path =>
-          Seq(Some(1), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
-          assert(spark.read.parquet(path.getAbsolutePath).where("`col.dots` > 0").count() == 1)
-        }
-
-        withTempPath { path =>
-          Seq(Some(1L), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
-          assert(spark.read.parquet(path.getAbsolutePath).where("`col.dots` >= 1L").count() == 1)
-        }
-
-        withTempPath { path =>
-          Seq(Some(1.0F), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
-          assert(spark.read.parquet(path.getAbsolutePath).where("`col.dots` < 2.0").count() == 1)
-        }
-
-        withTempPath { path =>
-          Seq(Some(1.0D), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
-          assert(spark.read.parquet(path.getAbsolutePath).where("`col.dots` <= 1.0D").count() == 1)
-        }
-
-        withTempPath { path =>
-          Seq(true, false).toDF("col.dots").write.parquet(path.getAbsolutePath)
-          assert(spark.read.parquet(path.getAbsolutePath).where("`col.dots` == true").count() == 1)
-        }
-
-        withTempPath { path =>
-          Seq("apple", null).toDF("col.dots").write.parquet(path.getAbsolutePath)
-          assert(
-            spark.read.parquet(path.getAbsolutePath).where("`col.dots` IS NOT NULL").count() == 1)
+        val dfs = Seq(
+          Seq(Some(1), None).toDF("col.dots"),
+          Seq(Some(1L), None).toDF("col.dots"),
+          Seq(Some(1.0F), None).toDF("col.dots"),
+          Seq(Some(1.0D), None).toDF("col.dots"),
+          Seq(true, false).toDF("col.dots"),
+          Seq("apple", null).toDF("col.dots")
+        )
+
+        val predicates = Seq(
+          "`col.dots` > 0",
+          "`col.dots` >= 1L",
+          "`col.dots` < 2.0",
+          "`col.dots` <= 1.0D",
+          "`col.dots` == true",
+          "`col.dots` IS NOT NULL"
+        )
+
+        dfs.zip(predicates).foreach { case (df, predicate) =>
+          withTempPath { path =>
+            df.write.parquet(path.getAbsolutePath)
+            assert(spark.read.parquet(path.getAbsolutePath).where(predicate).count() == 1)
+          }
         }
       }
     }