From e1c79cc9ce5fc57c36074364acf53d7932b55767 Mon Sep 17 00:00:00 2001 From: avulanov Date: Wed, 22 Jun 2016 20:07:19 -0700 Subject: [PATCH 01/10] Fix the construction of the file scheme --- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2286919f7aad..296bd1a5d22c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.internal +import java.io.File import java.util.{NoSuchElementException, Properties} import java.util.concurrent.TimeUnit @@ -55,7 +56,7 @@ object SQLConf { val WAREHOUSE_PATH = SQLConfigBuilder("spark.sql.warehouse.dir") .doc("The default location for managed databases and tables.") .stringConf - .createWithDefault("file:${system:user.dir}/spark-warehouse") + .createWithDefault("${system:user.dir}/spark-warehouse") val OPTIMIZER_MAX_ITERATIONS = SQLConfigBuilder("spark.sql.optimizer.maxIterations") .internal() @@ -679,7 +680,10 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) - def warehousePath: String = getConf(WAREHOUSE_PATH) + def warehousePath: String = { + getConf(WAREHOUSE_PATH).replace("${system:user.dir}", + new File(System.getProperty("user.dir")).toURI.toString) + } override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) From 23540ff32cb0f1701b95d5f4793f07c0246e8582 Mon Sep 17 00:00:00 2001 From: avulanov Date: Thu, 23 Jun 2016 11:03:37 -0700 Subject: [PATCH 02/10] Fix SQLConf test --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- .../scala/org/apache/spark/sql/internal/SQLConfSuite.scala | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 296bd1a5d22c..e4ec02cc8c87 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -681,8 +681,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) def warehousePath: String = { - getConf(WAREHOUSE_PATH).replace("${system:user.dir}", - new File(System.getProperty("user.dir")).toURI.toString) + new File(getConf(WAREHOUSE_PATH).replace("${system:user.dir}", + System.getProperty("user.dir"))).toURI.toString } override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala index 7424e177c5d3..5bf9cceefcd6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.internal +import java.io.File + import org.apache.spark.sql.{QueryTest, Row, SparkSession, SQLContext} import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.test.{SharedSQLContext, TestSQLContext} @@ -214,7 +216,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { // to get the default value, always unset it spark.conf.unset(SQLConf.WAREHOUSE_PATH.key) assert(spark.sessionState.conf.warehousePath - === s"file:${System.getProperty("user.dir")}/spark-warehouse") + === new File(s"${System.getProperty("user.dir")}/spark-warehouse").toURI.toString) } finally { sql(s"set ${SQLConf.WAREHOUSE_PATH}=$original") } From 330f068c7c10b9adaffbce52864a101e4c019e94 Mon Sep 17 00:00:00 2001 From: avulanov Date: Fri, 24 Jun 2016 18:36:05 -0700 Subject: [PATCH 03/10] Fix DDLSuite path construction and path equality tests --- .../sql/execution/command/DDLSuite.scala | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 564fc73ee702..1869c60103c0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -122,18 +122,20 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val path = tmpDir.toString // The generated temp path is not qualified. assert(!path.startsWith("file:/")) - sql(s"CREATE DATABASE db1 LOCATION '$path'") + val pathWithForwardSlash = path.replace("\\", "/") + sql(s"CREATE DATABASE db1 LOCATION '$pathWithForwardSlash'") val pathInCatalog = new Path(catalog.getDatabaseMetadata("db1").locationUri).toUri assert("file" === pathInCatalog.getScheme) - val expectedPath = if (path.endsWith(File.separator)) path.dropRight(1) else path + val uriPath = new File(path).toURI.getPath + val expectedPath = uriPath.substring(0, uriPath.length - 1) assert(expectedPath === pathInCatalog.getPath) withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) { sql(s"CREATE DATABASE db2") - val pathInCatalog = new Path(catalog.getDatabaseMetadata("db2").locationUri).toUri - assert("file" === pathInCatalog.getScheme) - val expectedPath = appendTrailingSlash(spark.sessionState.conf.warehousePath) + "db2.db" - assert(expectedPath === pathInCatalog.getPath) + val pathInCatalog2 = new Path(catalog.getDatabaseMetadata("db2").locationUri).toUri + assert("file" === pathInCatalog2.getScheme) + val expectedPath2 = new Path(spark.sessionState.conf.warehousePath + "db2.db").toUri + assert(expectedPath2 === pathInCatalog2) } sql("DROP DATABASE db1") @@ -154,8 +156,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expectedLocation = - "file:" + appendTrailingSlash(path) + s"$dbNameWithoutBackTicks.db" + val expectedLocation = new File(path).toURI.toString + s"$dbNameWithoutBackTicks.db" assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -181,8 +182,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbName) val expectedLocation = - "file:" + appendTrailingSlash(System.getProperty("user.dir")) + - s"spark-warehouse/$dbName.db" + new File(s"${System.getProperty("user.dir")}/spark-warehouse").toURI.toString + + s"$dbName.db" assert(db1 == CatalogDatabase( dbName, "", @@ -200,8 +201,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val catalog = spark.sessionState.catalog val databaseNames = Seq("db1", "`database`") withTempDir { tmpDir => - val path = tmpDir.toString - val dbPath = "file:" + path + val path = new File(tmpDir.toString).toURI.toString databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) @@ -210,7 +210,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", - if (dbPath.endsWith(File.separator)) dbPath.dropRight(1) else dbPath, + // TODO: the original path was with trailing "/", but catalog returns it without it + if (path.endsWith("/")) path.substring(0, path.length() - 1) else path, Map.empty)) sql(s"DROP DATABASE $dbName CASCADE") assert(!catalog.databaseExists(dbNameWithoutBackTicks)) @@ -233,8 +234,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expectedLocation = - "file:" + appendTrailingSlash(path) + s"$dbNameWithoutBackTicks.db" + val expectedLocation = new File(path).toURI.toString + s"$dbNameWithoutBackTicks.db" assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -474,7 +474,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) - val location = "file:" + appendTrailingSlash(path) + s"$dbNameWithoutBackTicks.db" + val location = new File(path).toURI.toString + s"$dbNameWithoutBackTicks.db" sql(s"CREATE DATABASE $dbName") From 552273609751eb68955eb247e13254dbaf8861b9 Mon Sep 17 00:00:00 2001 From: avulanov Date: Fri, 24 Jun 2016 19:07:30 -0700 Subject: [PATCH 04/10] SQLConf path construction with hadoop Path --- .../apache/spark/sql/internal/SQLConf.scala | 6 +++--- .../spark/sql/execution/command/DDLSuite.scala | 18 ++++++++---------- .../spark/sql/internal/SQLConfSuite.scala | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e4ec02cc8c87..c6e407cdd812 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -17,13 +17,13 @@ package org.apache.spark.sql.internal -import java.io.File import java.util.{NoSuchElementException, Properties} import java.util.concurrent.TimeUnit import scala.collection.JavaConverters._ import scala.collection.immutable +import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetOutputCommitter import org.apache.spark.internal.Logging @@ -681,8 +681,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) def warehousePath: String = { - new File(getConf(WAREHOUSE_PATH).replace("${system:user.dir}", - System.getProperty("user.dir"))).toURI.toString + new Path(getConf(WAREHOUSE_PATH).replace("${system:user.dir}", + System.getProperty("user.dir"))).toUri.toString } override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 1869c60103c0..79ef4f61929e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -111,10 +111,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { catalog.createPartitions(tableName, Seq(part), ignoreIfExists = false) } - private def appendTrailingSlash(path: String): String = { - if (!path.endsWith(File.separator)) path + File.separator else path - } - test("the qualified path of a database is stored in the catalog") { val catalog = spark.sessionState.catalog @@ -126,16 +122,17 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE db1 LOCATION '$pathWithForwardSlash'") val pathInCatalog = new Path(catalog.getDatabaseMetadata("db1").locationUri).toUri assert("file" === pathInCatalog.getScheme) - val uriPath = new File(path).toURI.getPath - val expectedPath = uriPath.substring(0, uriPath.length - 1) + val expectedPath = new Path(path).toUri.toString assert(expectedPath === pathInCatalog.getPath) withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) { sql(s"CREATE DATABASE db2") val pathInCatalog2 = new Path(catalog.getDatabaseMetadata("db2").locationUri).toUri assert("file" === pathInCatalog2.getScheme) - val expectedPath2 = new Path(spark.sessionState.conf.warehousePath + "db2.db").toUri - assert(expectedPath2 === pathInCatalog2) + val expectedPath2 = new Path(spark.sessionState.conf.warehousePath + "/" + "db2.db") + .toUri + .toString + assert(expectedPath2 === pathInCatalog2.getPath) } sql("DROP DATABASE db1") @@ -201,17 +198,18 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val catalog = spark.sessionState.catalog val databaseNames = Seq("db1", "`database`") withTempDir { tmpDir => - val path = new File(tmpDir.toString).toURI.toString + val path = new Path(tmpDir.toString).toUri.toString databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName Location '$path'") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) + val expPath = new File(tmpDir.toString).toURI.toString assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", // TODO: the original path was with trailing "/", but catalog returns it without it - if (path.endsWith("/")) path.substring(0, path.length() - 1) else path, + if (expPath.endsWith("/")) expPath.substring(0, expPath.length() - 1) else expPath, Map.empty)) sql(s"DROP DATABASE $dbName CASCADE") assert(!catalog.databaseExists(dbNameWithoutBackTicks)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala index 5bf9cceefcd6..bf1f61ade998 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.internal -import java.io.File +import org.apache.hadoop.fs.Path import org.apache.spark.sql.{QueryTest, Row, SparkSession, SQLContext} import org.apache.spark.sql.execution.WholeStageCodegenExec @@ -216,7 +216,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { // to get the default value, always unset it spark.conf.unset(SQLConf.WAREHOUSE_PATH.key) assert(spark.sessionState.conf.warehousePath - === new File(s"${System.getProperty("user.dir")}/spark-warehouse").toURI.toString) + === new Path(s"${System.getProperty("user.dir")}/spark-warehouse").toUri.toString) } finally { sql(s"set ${SQLConf.WAREHOUSE_PATH}=$original") } From 34ef3e1308725b631c92f17bdfd795b468657e98 Mon Sep 17 00:00:00 2001 From: avulanov Date: Mon, 27 Jun 2016 19:05:47 -0700 Subject: [PATCH 05/10] Path construction with makeQualifiedPath from SessionCatalog --- .../sql/execution/command/DDLSuite.scala | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 79ef4f61929e..c0549b194b74 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.execution.command -import java.io.File - import org.apache.hadoop.fs.Path import org.scalatest.BeforeAndAfterEach @@ -140,6 +138,18 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } + /** + * Returns a qualified path constructed by means of Hadoop Path and FileSystem. + * Copy-paste from SessionCatalog.scala + * @param path path + * @return qualified path + */ + private def makeQualifiedPath(path: String): Path = { + val hadoopPath = new Path(path) + val fs = hadoopPath.getFileSystem(sparkContext.hadoopConfiguration) + fs.makeQualified(hadoopPath) + } + test("Create/Drop Database") { withTempDir { tmpDir => val path = tmpDir.toString @@ -153,7 +163,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expectedLocation = new File(path).toURI.toString + s"$dbNameWithoutBackTicks.db" + val expectedLocation = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db") + .toString assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -179,8 +190,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbName) val expectedLocation = - new File(s"${System.getProperty("user.dir")}/spark-warehouse").toURI.toString + - s"$dbName.db" + makeQualifiedPath(s"${System.getProperty("user.dir")}/spark-warehouse" + + "/" + s"$dbName.db").toString assert(db1 == CatalogDatabase( dbName, "", @@ -204,12 +215,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName Location '$path'") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expPath = new File(tmpDir.toString).toURI.toString + val expPath = makeQualifiedPath(tmpDir.toString).toString assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", - // TODO: the original path was with trailing "/", but catalog returns it without it - if (expPath.endsWith("/")) expPath.substring(0, expPath.length() - 1) else expPath, + expPath, Map.empty)) sql(s"DROP DATABASE $dbName CASCADE") assert(!catalog.databaseExists(dbNameWithoutBackTicks)) @@ -232,7 +242,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expectedLocation = new File(path).toURI.toString + s"$dbNameWithoutBackTicks.db" + val expectedLocation = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db") + .toString assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -472,7 +483,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) - val location = new File(path).toURI.toString + s"$dbNameWithoutBackTicks.db" + val location = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db").toString sql(s"CREATE DATABASE $dbName") From 1be6499c42b23e15a33ae2f6d00ef78b5dda7820 Mon Sep 17 00:00:00 2001 From: avulanov Date: Mon, 27 Jun 2016 19:16:51 -0700 Subject: [PATCH 06/10] Fix scalastyle comment in test --- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index c0549b194b74..6565a68de47c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -138,13 +138,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - /** - * Returns a qualified path constructed by means of Hadoop Path and FileSystem. - * Copy-paste from SessionCatalog.scala - * @param path path - * @return qualified path - */ private def makeQualifiedPath(path: String): Path = { + // copy-paste from SessionCatalog val hadoopPath = new Path(path) val fs = hadoopPath.getFileSystem(sparkContext.hadoopConfiguration) fs.makeQualified(hadoopPath) From fb12118c4a692e4f4ec14cbc2754abac7059aab2 Mon Sep 17 00:00:00 2001 From: avulanov Date: Wed, 3 Aug 2016 13:26:48 +0300 Subject: [PATCH 07/10] Remove toUri --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c6e407cdd812..89083e7ce828 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -682,7 +682,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { def warehousePath: String = { new Path(getConf(WAREHOUSE_PATH).replace("${system:user.dir}", - System.getProperty("user.dir"))).toUri.toString + System.getProperty("user.dir"))).toString } override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala index bf1f61ade998..3c60b233c2b0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala @@ -216,7 +216,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { // to get the default value, always unset it spark.conf.unset(SQLConf.WAREHOUSE_PATH.key) assert(spark.sessionState.conf.warehousePath - === new Path(s"${System.getProperty("user.dir")}/spark-warehouse").toUri.toString) + === new Path(s"${System.getProperty("user.dir")}/spark-warehouse").toString) } finally { sql(s"set ${SQLConf.WAREHOUSE_PATH}=$original") } From 2b592b6a88e8e92800d245bf67a1c60f27abb1ed Mon Sep 17 00:00:00 2001 From: avulanov Date: Wed, 3 Aug 2016 15:16:48 +0300 Subject: [PATCH 08/10] Rebase --- .../scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 6565a68de47c..303c6a71ca14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.command +import java.io.File + import org.apache.hadoop.fs.Path import org.scalatest.BeforeAndAfterEach From 1b5b035ce7ead23f0abef7a002c1afffabadf65c Mon Sep 17 00:00:00 2001 From: avulanov Date: Thu, 4 Aug 2016 12:09:56 +0300 Subject: [PATCH 09/10] Addressing reviewers comments. Escape Windows path in create table string --- .../apache/spark/sql/internal/SQLConf.scala | 5 +---- .../sql/execution/command/DDLSuite.scala | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 89083e7ce828..b867a6551feb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -680,10 +680,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) - def warehousePath: String = { - new Path(getConf(WAREHOUSE_PATH).replace("${system:user.dir}", - System.getProperty("user.dir"))).toString - } + def warehousePath: String = new Path(getConf(WAREHOUSE_PATH)).toString override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 303c6a71ca14..8b5d192b5cb8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -122,17 +122,15 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE db1 LOCATION '$pathWithForwardSlash'") val pathInCatalog = new Path(catalog.getDatabaseMetadata("db1").locationUri).toUri assert("file" === pathInCatalog.getScheme) - val expectedPath = new Path(path).toUri.toString - assert(expectedPath === pathInCatalog.getPath) + val expectedPath = new Path(path).toUri + assert(expectedPath.getPath === pathInCatalog.getPath) withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) { sql(s"CREATE DATABASE db2") val pathInCatalog2 = new Path(catalog.getDatabaseMetadata("db2").locationUri).toUri assert("file" === pathInCatalog2.getScheme) - val expectedPath2 = new Path(spark.sessionState.conf.warehousePath + "/" + "db2.db") - .toUri - .toString - assert(expectedPath2 === pathInCatalog2.getPath) + val expectedPath2 = new Path(spark.sessionState.conf.warehousePath + "/" + "db2.db").toUri + assert(expectedPath2.getPath === pathInCatalog2.getPath) } sql("DROP DATABASE db1") @@ -269,12 +267,13 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val partitionClause = userSpecifiedPartitionCols.map(p => s"PARTITIONED BY ($p)").getOrElse("") val schemaClause = userSpecifiedSchema.map(s => s"($s)").getOrElse("") + val uri = path.toURI.toString sql( s""" |CREATE TABLE $tabName $schemaClause |USING parquet |OPTIONS ( - | path '$path' + | path '$uri' |) |$partitionClause """.stripMargin) @@ -373,6 +372,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val path = dir.getCanonicalPath val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("col1", "col2") df.write.format("json").save(path) + val uri = dir.toURI.toString withTable(tabName) { sql( @@ -380,7 +380,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { |CREATE TABLE $tabName |USING json |OPTIONS ( - | path '$path' + | path '$uri' |) """.stripMargin) @@ -413,6 +413,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { .add("col2", StringType).add("col4", LongType) .add("col1", IntegerType).add("col3", IntegerType) val partitionCols = Seq("col1", "col3") + val uri = dir.toURI.toString withTable(tabName) { spark.sql( @@ -420,7 +421,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { |CREATE TABLE $tabName |USING json |OPTIONS ( - | path '$path' + | path '$uri' |) """.stripMargin) val tableMetadata = catalog.getTableMetadata(TableIdentifier(tabName)) From ea24b59fe83c37dbab27579141b5c63cccee138d Mon Sep 17 00:00:00 2001 From: avulanov Date: Tue, 9 Aug 2016 11:56:11 +0300 Subject: [PATCH 10/10] Addressing reviewers comments --- .../sql/execution/command/DDLSuite.scala | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 8b5d192b5cb8..c4988a2980d8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -118,8 +118,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val path = tmpDir.toString // The generated temp path is not qualified. assert(!path.startsWith("file:/")) - val pathWithForwardSlash = path.replace("\\", "/") - sql(s"CREATE DATABASE db1 LOCATION '$pathWithForwardSlash'") + val uri = tmpDir.toURI + sql(s"CREATE DATABASE db1 LOCATION '$uri'") val pathInCatalog = new Path(catalog.getDatabaseMetadata("db1").locationUri).toUri assert("file" === pathInCatalog.getScheme) val expectedPath = new Path(path).toUri @@ -138,11 +138,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - private def makeQualifiedPath(path: String): Path = { + private def makeQualifiedPath(path: String): String = { // copy-paste from SessionCatalog val hadoopPath = new Path(path) val fs = hadoopPath.getFileSystem(sparkContext.hadoopConfiguration) - fs.makeQualified(hadoopPath) + fs.makeQualified(hadoopPath).toString } test("Create/Drop Database") { @@ -159,7 +159,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) val expectedLocation = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db") - .toString assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -186,7 +185,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val db1 = catalog.getDatabaseMetadata(dbName) val expectedLocation = makeQualifiedPath(s"${System.getProperty("user.dir")}/spark-warehouse" + - "/" + s"$dbName.db").toString + "/" + s"$dbName.db") assert(db1 == CatalogDatabase( dbName, "", @@ -210,7 +209,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val dbNameWithoutBackTicks = cleanIdentifier(dbName) sql(s"CREATE DATABASE $dbName Location '$path'") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) - val expPath = makeQualifiedPath(tmpDir.toString).toString + val expPath = makeQualifiedPath(tmpDir.toString) assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -238,7 +237,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql(s"CREATE DATABASE $dbName") val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks) val expectedLocation = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db") - .toString assert(db1 == CatalogDatabase( dbNameWithoutBackTicks, "", @@ -267,7 +265,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val partitionClause = userSpecifiedPartitionCols.map(p => s"PARTITIONED BY ($p)").getOrElse("") val schemaClause = userSpecifiedSchema.map(s => s"($s)").getOrElse("") - val uri = path.toURI.toString + val uri = path.toURI sql( s""" |CREATE TABLE $tabName $schemaClause @@ -372,7 +370,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val path = dir.getCanonicalPath val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("col1", "col2") df.write.format("json").save(path) - val uri = dir.toURI.toString + val uri = dir.toURI withTable(tabName) { sql( @@ -413,7 +411,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { .add("col2", StringType).add("col4", LongType) .add("col1", IntegerType).add("col3", IntegerType) val partitionCols = Seq("col1", "col3") - val uri = dir.toURI.toString + val uri = dir.toURI withTable(tabName) { spark.sql( @@ -481,7 +479,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { databaseNames.foreach { dbName => try { val dbNameWithoutBackTicks = cleanIdentifier(dbName) - val location = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db").toString + val location = makeQualifiedPath(path + "/" + s"$dbNameWithoutBackTicks.db") sql(s"CREATE DATABASE $dbName")