-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-43952][CORE][CONNECT][SQL] Add SparkContext APIs for query cancellation by tag #41440
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
82d6caa
34ef9f6
7f0bb59
5bebbc2
370631b
651d594
3733303
1217de1
c375a97
aa2f950
35336b0
89f3047
4b33ef5
8fe0058
40255a4
cecbfda
a87746a
3bfe83c
bd79500
d964895
050bf85
c8d9b46
607f11f
7d15f3c
12921d1
cd8d058
98f6034
d52ff2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -829,6 +829,55 @@ class SparkContext(config: SparkConf) extends Logging { | |
| setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, null) | ||
| } | ||
|
|
||
| /** | ||
| * Set the behavior of job cancellation from jobs started in this thread. | ||
| * | ||
| * @param interruptOnCancel If true, then job cancellation will result in `Thread.interrupt()` | ||
| * being called on the job's executor threads. This is useful to help ensure that the tasks | ||
| * are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Someone should check if this is still a thing :)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The HDFS-1208 bug is still open... but multiple places in core of Spark has by now elected to just pass |
||
| * may respond to Thread.interrupt() by marking nodes as dead. | ||
| */ | ||
| def setInterruptOnCancel(interruptOnCancel: Boolean): Unit = { | ||
| setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, interruptOnCancel.toString) | ||
| } | ||
|
|
||
| /** | ||
| * Add a tag to be assigned to all the jobs started by this thread. | ||
juliuszsompolski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * | ||
| * @param tag The tag to be added. Cannot contain ',' (comma) character. | ||
| */ | ||
| def addJobTag(tag: String): Unit = { | ||
| SparkContext.throwIfInvalidTag(tag) | ||
| val existingTags = getJobTags() | ||
| val newTags = (existingTags + tag).mkString(SparkContext.SPARK_JOB_TAGS_SEP) | ||
| setLocalProperty(SparkContext.SPARK_JOB_TAGS, newTags) | ||
| } | ||
|
|
||
| /** | ||
| * Remove a tag previously added to be assigned to all the jobs started by this thread. | ||
| * Noop if such a tag was not added earlier. | ||
| * | ||
| * @param tag The tag to be removed. Cannot contain ',' (comma) character. | ||
| */ | ||
| def removeJobTag(tag: String): Unit = { | ||
| SparkContext.throwIfInvalidTag(tag) | ||
| val existingTags = getJobTags() | ||
| val newTags = (existingTags - tag).mkString(SparkContext.SPARK_JOB_TAGS_SEP) | ||
| setLocalProperty(SparkContext.SPARK_JOB_TAGS, newTags) | ||
| } | ||
|
|
||
| /** Get the tags that are currently set to be assigned to all the jobs started by this thread. */ | ||
| def getJobTags(): Set[String] = { | ||
| Option(getLocalProperty(SparkContext.SPARK_JOB_TAGS)) | ||
| .map(_.split(SparkContext.SPARK_JOB_TAGS_SEP).toSet) | ||
| .getOrElse(Set()) | ||
| } | ||
|
|
||
| /** Clear the current thread's job tags. */ | ||
| def clearJobTags(): Unit = { | ||
| setLocalProperty(SparkContext.SPARK_JOB_TAGS, null) | ||
| } | ||
|
|
||
| /** | ||
| * Execute a block of code in a scope such that all new RDDs created in this body will | ||
| * be part of the same scope. For more detail, see {{org.apache.spark.rdd.RDDOperationScope}}. | ||
|
|
@@ -2471,6 +2520,17 @@ class SparkContext(config: SparkConf) extends Logging { | |
| dagScheduler.cancelJobGroup(groupId) | ||
| } | ||
|
|
||
| /** | ||
| * Cancel active jobs that have the specified tag. See `org.apache.spark.SparkContext.addJobTag`. | ||
juliuszsompolski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * | ||
| * @param tag The tag to be added. Cannot contain ',' (comma) character. | ||
| */ | ||
| def cancelJobsWithTag(tag: String): Unit = { | ||
| SparkContext.throwIfInvalidTag(tag) | ||
| assertNotStopped() | ||
| dagScheduler.cancelJobsWithTag(tag) | ||
| } | ||
|
|
||
| /** Cancel all jobs that have been scheduled or are running. */ | ||
| def cancelAllJobs(): Unit = { | ||
| assertNotStopped() | ||
|
|
@@ -2840,6 +2900,7 @@ object SparkContext extends Logging { | |
| private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description" | ||
| private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id" | ||
| private[spark] val SPARK_JOB_INTERRUPT_ON_CANCEL = "spark.job.interruptOnCancel" | ||
| private[spark] val SPARK_JOB_TAGS = "spark.job.tags" | ||
| private[spark] val SPARK_SCHEDULER_POOL = "spark.scheduler.pool" | ||
| private[spark] val RDD_SCOPE_KEY = "spark.rdd.scope" | ||
| private[spark] val RDD_SCOPE_NO_OVERRIDE_KEY = "spark.rdd.scope.noOverride" | ||
|
|
@@ -2851,6 +2912,22 @@ object SparkContext extends Logging { | |
| */ | ||
| private[spark] val DRIVER_IDENTIFIER = "driver" | ||
|
|
||
| /** Separator of tags in SPARK_JOB_TAGS property */ | ||
| private[spark] val SPARK_JOB_TAGS_SEP = "," | ||
|
|
||
| private[spark] def throwIfInvalidTag(tag: String) = { | ||
| if (tag == null) { | ||
| throw new IllegalArgumentException("Spark job tag cannot be null.") | ||
| } | ||
| if (tag.contains(SPARK_JOB_TAGS_SEP)) { | ||
| throw new IllegalArgumentException( | ||
| s"Spark job tag cannot contain '$SPARK_JOB_TAGS_SEP'.") | ||
| } | ||
| if (tag.isEmpty) { | ||
| throw new IllegalArgumentException( | ||
| "Spark job tag cannot be an empty string.") | ||
| } | ||
| } | ||
|
|
||
| private implicit def arrayToArrayWritable[T <: Writable : ClassTag](arr: Iterable[T]) | ||
| : ArrayWritable = { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.