From 8cca76b7ec31c1c0cff1bd5b6772b43b49c30147 Mon Sep 17 00:00:00 2001
From: Michael Allman <msa@allman.ms>
Date: Tue, 11 Sep 2018 11:10:08 -0400
Subject: [PATCH 1/3] For ParquetSchemaPruningSuite.scala, move calls to
 `withSQLConf` inside calls to `test`

---
 .../parquet/ParquetSchemaPruningSuite.scala   | 44 ++++++++++---------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index eb99654fa78f..fccea66150d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -156,20 +156,24 @@ class ParquetSchemaPruningSuite
   }
 
   private def testSchemaPruning(testName: String)(testThunk: => Unit) {
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
-      test(s"Spark vectorized reader - without partition data column - $testName") {
+    test(s"Spark vectorized reader - without partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
         withContacts(testThunk)
       }
-      test(s"Spark vectorized reader - with partition data column - $testName") {
+    }
+    test(s"Spark vectorized reader - with partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
         withContactsWithDataPartitionColumn(testThunk)
       }
     }
 
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-      test(s"Parquet-mr reader - without partition data column - $testName") {
+    test(s"Parquet-mr reader - without partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
         withContacts(testThunk)
       }
-      test(s"Parquet-mr reader - with partition data column - $testName") {
+    }
+    test(s"Parquet-mr reader - with partition data column - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
         withContactsWithDataPartitionColumn(testThunk)
       }
     }
@@ -246,27 +250,27 @@ class ParquetSchemaPruningSuite
   }
 
   private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
-      SQLConf.CASE_SENSITIVE.key -> "true") {
-      test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
-          withMixedCaseData(testThunk)
+    test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+        SQLConf.CASE_SENSITIVE.key -> "true") {
+        withMixedCaseData(testThunk)
       }
     }
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-      SQLConf.CASE_SENSITIVE.key -> "false") {
-      test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
+    test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
+        SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
       }
     }
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
-      SQLConf.CASE_SENSITIVE.key -> "false") {
-      test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
-          withMixedCaseData(testThunk)
+    test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+        SQLConf.CASE_SENSITIVE.key -> "false") {
+        withMixedCaseData(testThunk)
       }
     }
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-      SQLConf.CASE_SENSITIVE.key -> "true") {
-      test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
+    test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
+        SQLConf.CASE_SENSITIVE.key -> "true") {
         withMixedCaseData(testThunk)
       }
     }

From c759aeabc8b3fb3c426e432bff794deddef3e05e Mon Sep 17 00:00:00 2001
From: Michael Allman <msa@allman.ms>
Date: Tue, 11 Sep 2018 11:33:47 -0400
Subject: [PATCH 2/3] We shouldn't expect queries selecting columns using
 different case to succeed when using a case-sensitive query parser

---
 .../parquet/ParquetSchemaPruningSuite.scala   | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index fccea66150d9..d4ca82220178 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -213,7 +213,7 @@ class ParquetSchemaPruningSuite
     MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
     Nil
 
-  testMixedCasePruning("select with exact column names") {
+  testExactCasePruning("select with exact column names") {
     val query = sql("select CoL1, coL2.B from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -249,13 +249,23 @@ class ParquetSchemaPruningSuite
     checkAnswer(query.orderBy("id"), Row(1) :: Nil)
   }
 
-  private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
+  private def testExactCasePruning(testName: String)(testThunk: => Unit) {
     test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
         SQLConf.CASE_SENSITIVE.key -> "true") {
         withMixedCaseData(testThunk)
       }
     }
+    test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
+        SQLConf.CASE_SENSITIVE.key -> "true") {
+        withMixedCaseData(testThunk)
+      }
+    }
+    testMixedCasePruning(testName)(testThunk)
+  }
+
+  private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
     test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
         SQLConf.CASE_SENSITIVE.key -> "false") {
@@ -268,12 +278,6 @@ class ParquetSchemaPruningSuite
         withMixedCaseData(testThunk)
       }
     }
-    test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
-        SQLConf.CASE_SENSITIVE.key -> "true") {
-        withMixedCaseData(testThunk)
-      }
-    }
   }
 
   private def withMixedCaseData(testThunk: => Unit) {

From 1c8963702f53c9d25cb741ef9c11bceef2c39188 Mon Sep 17 00:00:00 2001
From: Michael Allman <msa@allman.ms>
Date: Wed, 12 Sep 2018 10:49:14 -0400
Subject: [PATCH 3/3] Clarify the intent of the test methods for
 case-insensitive pruning testing

---
 .../parquet/ParquetSchemaPruningSuite.scala   | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index d4ca82220178..5a4c96c7caf9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -213,7 +213,7 @@ class ParquetSchemaPruningSuite
     MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
     Nil
 
-  testExactCasePruning("select with exact column names") {
+  testExactCaseQueryPruning("select with exact column names") {
     val query = sql("select CoL1, coL2.B from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -222,7 +222,7 @@ class ParquetSchemaPruningSuite
       Nil)
   }
 
-  testMixedCasePruning("select with lowercase column names") {
+  testMixedCaseQueryPruning("select with lowercase column names") {
     val query = sql("select col1, col2.b from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -231,7 +231,7 @@ class ParquetSchemaPruningSuite
       Nil)
   }
 
-  testMixedCasePruning("select with different-case column names") {
+  testMixedCaseQueryPruning("select with different-case column names") {
     val query = sql("select cOL1, cOl2.b from mixedcase")
     checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
     checkAnswer(query.orderBy("id"),
@@ -240,7 +240,7 @@ class ParquetSchemaPruningSuite
       Nil)
   }
 
-  testMixedCasePruning("filter with different-case column names") {
+  testMixedCaseQueryPruning("filter with different-case column names") {
     val query = sql("select id from mixedcase where Col2.b = 2")
     // Pruning with filters is currently unsupported. As-is, the file reader will read the id column
     // and the entire coL2 struct. Once pruning with filters has been implemented we can uncomment
@@ -249,7 +249,10 @@ class ParquetSchemaPruningSuite
     checkAnswer(query.orderBy("id"), Row(1) :: Nil)
   }
 
-  private def testExactCasePruning(testName: String)(testThunk: => Unit) {
+  // Tests schema pruning for a query whose column and field names are exactly the same as the table
+  // schema's column and field names. N.B. this implies that `testThunk` should pass using either a
+  // case-sensitive or case-insensitive query parser
+  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
     test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
         SQLConf.CASE_SENSITIVE.key -> "true") {
@@ -262,18 +265,20 @@ class ParquetSchemaPruningSuite
         withMixedCaseData(testThunk)
       }
     }
-    testMixedCasePruning(testName)(testThunk)
+    testMixedCaseQueryPruning(testName)(testThunk)
   }
 
-  private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
-    test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
+  // Tests schema pruning for a query whose column and field names may differ in case from the table
+  // schema's column and field names
+  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
+    test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
         SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
       }
     }
-    test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+    test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
         SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
       }