-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-11042] [SQL] Add a mechanism to ban creating multiple root SQLContexts/HiveContexts in a JVM #9058
[SPARK-11042] [SQL] Add a mechanism to ban creating multiple root SQLContexts/HiveContexts in a JVM #9058
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,7 +26,7 @@ import scala.collection.immutable | |
import scala.reflect.runtime.universe.TypeTag | ||
import scala.util.control.NonFatal | ||
|
||
import org.apache.spark.SparkContext | ||
import org.apache.spark.{SparkException, SparkContext} | ||
import org.apache.spark.annotation.{DeveloperApi, Experimental} | ||
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} | ||
import org.apache.spark.rdd.RDD | ||
|
@@ -64,22 +64,37 @@ import org.apache.spark.util.Utils | |
*/ | ||
class SQLContext private[sql]( | ||
@transient val sparkContext: SparkContext, | ||
@transient protected[sql] val cacheManager: CacheManager) | ||
@transient protected[sql] val cacheManager: CacheManager, | ||
val isRootContext: Boolean) | ||
extends org.apache.spark.Logging with Serializable { | ||
|
||
self => | ||
|
||
def this(sparkContext: SparkContext) = this(sparkContext, new CacheManager) | ||
def this(sparkContext: SparkContext) = this(sparkContext, new CacheManager, true) | ||
def this(sparkContext: JavaSparkContext) = this(sparkContext.sc) | ||
|
||
// If spark.sql.allowMultipleContexts is true, we will throw an exception if a user | ||
// wants to create a new root SQLContext (a SLQContext that is not created by newSession). | ||
private val allowMultipleContexts = | ||
sparkContext.conf.getBoolean( | ||
SQLConf.ALLOW_MULTIPLE_CONTEXTS.key, | ||
SQLConf.ALLOW_MULTIPLE_CONTEXTS.defaultValue.get) | ||
|
||
SQLContext.assertNoRootSQLContextIsRunning( | ||
isRootContext, | ||
allowMultipleContexts) | ||
|
||
/** | ||
* Returns a SQLContext as new session, with separated SQL configurations, temporary tables, | ||
* registered functions, but sharing the same SparkContext and CacheManager. | ||
* | ||
* @since 1.6.0 | ||
*/ | ||
def newSession(): SQLContext = { | ||
new SQLContext(sparkContext, cacheManager) | ||
new SQLContext( | ||
sparkContext = sparkContext, | ||
cacheManager = cacheManager, | ||
isRootContext = false) | ||
} | ||
|
||
/** | ||
|
@@ -1239,6 +1254,24 @@ object SQLContext { | |
instantiatedContext.compareAndSet(null, sqlContext) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @davies Seems when we create a sql context for a new session, instantiatedContext will be set to the context representing that session. Do you think if it makes sense to use instantiatedContext hold the root sql context (the one created directly from user-facing constructor instead of newSession)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, right. |
||
} | ||
|
||
private[sql] def getInstantiatedContextOption(): Option[SQLContext] = { | ||
Option(instantiatedContext.get()) | ||
} | ||
|
||
private[sql] def assertNoRootSQLContextIsRunning( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's better to inline this function, put the related logic together. |
||
isRootContext: Boolean, | ||
allowMultipleRootSQLContexts: Boolean): Unit = { | ||
if (!allowMultipleRootSQLContexts && isRootContext) { | ||
getInstantiatedContextOption() match { | ||
case Some(rootSQLContext) => | ||
val errMsg = "Only one SparkContext/HiveContext may be running in this JVM." + | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SparkContext -> SQLContext |
||
s" To ignore this error, set ${SQLConf.ALLOW_MULTIPLE_CONTEXTS.key} = true." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could only be set in SparkConf |
||
throw new SparkException(errMsg) | ||
case None => // OK | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Changes the SQLContext that will be returned in this thread and its children when | ||
* SQLContext.getOrCreate() is called. This can be used to ensure that a given thread receives | ||
|
@@ -1260,6 +1293,10 @@ object SQLContext { | |
activeContext.remove() | ||
} | ||
|
||
private[sql] def getActiveContextOption(): Option[SQLContext] = { | ||
Option(activeContext.get()) | ||
} | ||
|
||
/** | ||
* Converts an iterator of Java Beans to InternalRow using the provided | ||
* bean info & schema. This is not related to the singleton, but is a static | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql | ||
|
||
import org.apache.spark._ | ||
import org.scalatest.BeforeAndAfterAll | ||
|
||
class MultiSQLContextsSuite extends SparkFunSuite with BeforeAndAfterAll { | ||
|
||
private var originalActiveSQLContext: Option[SQLContext] = _ | ||
private var originalInstantiatedSQLContext: Option[SQLContext] = _ | ||
private var sparkConf: SparkConf = _ | ||
|
||
override protected def beforeAll(): Unit = { | ||
originalActiveSQLContext = SQLContext.getActiveContextOption() | ||
originalInstantiatedSQLContext = SQLContext.getInstantiatedContextOption() | ||
|
||
SQLContext.clearActive() | ||
originalInstantiatedSQLContext.foreach(ctx => SQLContext.clearInstantiatedContext(ctx)) | ||
sparkConf = | ||
new SparkConf(false) | ||
.setMaster("local[*]") | ||
.setAppName("test") | ||
.set("spark.ui.enabled", "false") | ||
.set("spark.driver.allowMultipleContexts", "true") | ||
} | ||
|
||
override protected def afterAll(): Unit = { | ||
// Set these states back. | ||
originalActiveSQLContext.foreach(ctx => SQLContext.setActive(ctx)) | ||
originalInstantiatedSQLContext.foreach(ctx => SQLContext.setInstantiatedContext(ctx)) | ||
} | ||
|
||
def testNewSession(rootSQLContext: SQLContext): Unit = { | ||
// Make sure we can successfully create new Session. | ||
rootSQLContext.newSession() | ||
|
||
// Reset the state. It is always safe to clear the active context. | ||
SQLContext.clearActive() | ||
} | ||
|
||
def testCreatingNewSQLContext(allowsMultipleContexts: Boolean): Unit = { | ||
val conf = | ||
sparkConf | ||
.clone | ||
.set(SQLConf.ALLOW_MULTIPLE_CONTEXTS.key, allowsMultipleContexts.toString) | ||
val sparkContext = new SparkContext(conf) | ||
|
||
try { | ||
if (allowsMultipleContexts) { | ||
new SQLContext(sparkContext) | ||
SQLContext.clearActive() | ||
} else { | ||
// If allowsMultipleContexts is false, make sure we can get the error. | ||
val message = intercept[SparkException] { | ||
new SQLContext(sparkContext) | ||
}.getMessage | ||
assert(message.contains("Only one SparkContext/HiveContext may be running")) | ||
} | ||
} finally { | ||
sparkContext.stop() | ||
} | ||
} | ||
|
||
test("test the flag to disallow creating multiple root SQLContext") { | ||
Seq(false, true).foreach { allowMultipleSQLContexts => | ||
val conf = | ||
sparkConf | ||
.clone | ||
.set(SQLConf.ALLOW_MULTIPLE_CONTEXTS.key, allowMultipleSQLContexts.toString) | ||
val sc = new SparkContext(conf) | ||
try { | ||
val rootSQLContext = new SQLContext(sc) | ||
testNewSession(rootSQLContext) | ||
testNewSession(rootSQLContext) | ||
testCreatingNewSQLContext(allowMultipleSQLContexts) | ||
|
||
SQLContext.clearInstantiatedContext(rootSQLContext) | ||
} finally { | ||
sc.stop() | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can use
conf.getConf(SQLConf.ALLOW_MULTIPLE_CONTEXTS)
here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, at here, we have not populated SQLConf (
conf
you are referring at here is SQLConf, right?).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah yes, SQLConf has not been populated here. nvm.