From 8cca76b7ec31c1c0cff1bd5b6772b43b49c30147 Mon Sep 17 00:00:00 2001 From: Michael Allman Date: Tue, 11 Sep 2018 11:10:08 -0400 Subject: [PATCH 1/3] For ParquetSchemaPruningSuite.scala, move calls to `withSQLConf` inside calls to `test` --- .../parquet/ParquetSchemaPruningSuite.scala | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index eb99654fa78f..fccea66150d9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -156,20 +156,24 @@ class ParquetSchemaPruningSuite } private def testSchemaPruning(testName: String)(testThunk: => Unit) { - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") { - test(s"Spark vectorized reader - without partition data column - $testName") { + test(s"Spark vectorized reader - without partition data column - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") { withContacts(testThunk) } - test(s"Spark vectorized reader - with partition data column - $testName") { + } + test(s"Spark vectorized reader - with partition data column - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") { withContactsWithDataPartitionColumn(testThunk) } } - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { - test(s"Parquet-mr reader - without partition data column - $testName") { + test(s"Parquet-mr reader - without partition data column - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { withContacts(testThunk) } - test(s"Parquet-mr reader - with partition data column - $testName") { + } + test(s"Parquet-mr reader - with partition data column - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { withContactsWithDataPartitionColumn(testThunk) } } @@ -246,27 +250,27 @@ class ParquetSchemaPruningSuite } private def testMixedCasePruning(testName: String)(testThunk: => Unit) { - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", - SQLConf.CASE_SENSITIVE.key -> "true") { - test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") { - withMixedCaseData(testThunk) + test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", + SQLConf.CASE_SENSITIVE.key -> "true") { + withMixedCaseData(testThunk) } } - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", - SQLConf.CASE_SENSITIVE.key -> "false") { - test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") { + test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", + SQLConf.CASE_SENSITIVE.key -> "false") { withMixedCaseData(testThunk) } } - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", - SQLConf.CASE_SENSITIVE.key -> "false") { - test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") { - withMixedCaseData(testThunk) + test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", + SQLConf.CASE_SENSITIVE.key -> "false") { + withMixedCaseData(testThunk) } } - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", - SQLConf.CASE_SENSITIVE.key -> "true") { - test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") { + test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", + SQLConf.CASE_SENSITIVE.key -> "true") { withMixedCaseData(testThunk) } } From c759aeabc8b3fb3c426e432bff794deddef3e05e Mon Sep 17 00:00:00 2001 From: Michael Allman Date: Tue, 11 Sep 2018 11:33:47 -0400 Subject: [PATCH 2/3] We shouldn't expect queries selecting columns using different case to succeed when using a case-sensitive query parser --- .../parquet/ParquetSchemaPruningSuite.scala | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index fccea66150d9..d4ca82220178 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -213,7 +213,7 @@ class ParquetSchemaPruningSuite MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) :: Nil - testMixedCasePruning("select with exact column names") { + testExactCasePruning("select with exact column names") { val query = sql("select CoL1, coL2.B from mixedcase") checkScan(query, "struct>") checkAnswer(query.orderBy("id"), @@ -249,13 +249,23 @@ class ParquetSchemaPruningSuite checkAnswer(query.orderBy("id"), Row(1) :: Nil) } - private def testMixedCasePruning(testName: String)(testThunk: => Unit) { + private def testExactCasePruning(testName: String)(testThunk: => Unit) { test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", SQLConf.CASE_SENSITIVE.key -> "true") { withMixedCaseData(testThunk) } } + test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", + SQLConf.CASE_SENSITIVE.key -> "true") { + withMixedCaseData(testThunk) + } + } + testMixedCasePruning(testName)(testThunk) + } + + private def testMixedCasePruning(testName: String)(testThunk: => Unit) { test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", SQLConf.CASE_SENSITIVE.key -> "false") { @@ -268,12 +278,6 @@ class ParquetSchemaPruningSuite withMixedCaseData(testThunk) } } - test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") { - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", - SQLConf.CASE_SENSITIVE.key -> "true") { - withMixedCaseData(testThunk) - } - } } private def withMixedCaseData(testThunk: => Unit) { From 1c8963702f53c9d25cb741ef9c11bceef2c39188 Mon Sep 17 00:00:00 2001 From: Michael Allman Date: Wed, 12 Sep 2018 10:49:14 -0400 Subject: [PATCH 3/3] Clarify the intent of the test methods for case-insensitive pruning testing --- .../parquet/ParquetSchemaPruningSuite.scala | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index d4ca82220178..5a4c96c7caf9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -213,7 +213,7 @@ class ParquetSchemaPruningSuite MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) :: Nil - testExactCasePruning("select with exact column names") { + testExactCaseQueryPruning("select with exact column names") { val query = sql("select CoL1, coL2.B from mixedcase") checkScan(query, "struct>") checkAnswer(query.orderBy("id"), @@ -222,7 +222,7 @@ class ParquetSchemaPruningSuite Nil) } - testMixedCasePruning("select with lowercase column names") { + testMixedCaseQueryPruning("select with lowercase column names") { val query = sql("select col1, col2.b from mixedcase") checkScan(query, "struct>") checkAnswer(query.orderBy("id"), @@ -231,7 +231,7 @@ class ParquetSchemaPruningSuite Nil) } - testMixedCasePruning("select with different-case column names") { + testMixedCaseQueryPruning("select with different-case column names") { val query = sql("select cOL1, cOl2.b from mixedcase") checkScan(query, "struct>") checkAnswer(query.orderBy("id"), @@ -240,7 +240,7 @@ class ParquetSchemaPruningSuite Nil) } - testMixedCasePruning("filter with different-case column names") { + testMixedCaseQueryPruning("filter with different-case column names") { val query = sql("select id from mixedcase where Col2.b = 2") // Pruning with filters is currently unsupported. As-is, the file reader will read the id column // and the entire coL2 struct. Once pruning with filters has been implemented we can uncomment @@ -249,7 +249,10 @@ class ParquetSchemaPruningSuite checkAnswer(query.orderBy("id"), Row(1) :: Nil) } - private def testExactCasePruning(testName: String)(testThunk: => Unit) { + // Tests schema pruning for a query whose column and field names are exactly the same as the table + // schema's column and field names. N.B. this implies that `testThunk` should pass using either a + // case-sensitive or case-insensitive query parser + private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) { test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") { withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", SQLConf.CASE_SENSITIVE.key -> "true") { @@ -262,18 +265,20 @@ class ParquetSchemaPruningSuite withMixedCaseData(testThunk) } } - testMixedCasePruning(testName)(testThunk) + testMixedCaseQueryPruning(testName)(testThunk) } - private def testMixedCasePruning(testName: String)(testThunk: => Unit) { - test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") { - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", + // Tests schema pruning for a query whose column and field names may differ in case from the table + // schema's column and field names + private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) { + test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", SQLConf.CASE_SENSITIVE.key -> "false") { withMixedCaseData(testThunk) } } - test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") { - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", + test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false", SQLConf.CASE_SENSITIVE.key -> "false") { withMixedCaseData(testThunk) }