-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-47577][SPARK-47579] Correct misleading usage of log key TASK_ID #46951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,9 @@ package org.apache.spark.scheduler | |
|
|
||
| import java.util.Properties | ||
|
|
||
| import org.apache.spark.internal.LogKeys.{STAGE_ATTEMPT, STAGE_ID} | ||
| import org.apache.spark.internal.MessageWithContext | ||
|
|
||
| /** | ||
| * A set of tasks submitted together to the low-level TaskScheduler, usually representing | ||
| * missing partitions of a particular stage. | ||
|
|
@@ -34,4 +37,12 @@ private[spark] class TaskSet( | |
| val id: String = s"$stageId.$stageAttemptId" | ||
|
|
||
| override def toString: String = "TaskSet " + id | ||
|
|
||
| // Identifier used in the structured logging framework. | ||
| lazy val logId: MessageWithContext = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So we intentionally not treat it as a log parameter?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you mean by "log parameter"?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nvm, it's a |
||
| val hashMap = new java.util.HashMap[String, String]() | ||
| hashMap.put(STAGE_ID.name, stageId.toString) | ||
| hashMap.put(STAGE_ATTEMPT.name, stageAttemptId.toString) | ||
| MessageWithContext(id, hashMap) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -989,9 +989,11 @@ private[spark] class TaskSetManager( | |
| if (ef.className == classOf[TaskOutputFileAlreadyExistException].getName) { | ||
| // If we can not write to output file in the task, there's no point in trying to | ||
| // re-execute it. | ||
| logError(log"Task ${MDC(TASK_ID, info.id)} in stage ${MDC(STAGE_ID, taskSet.id)} " + | ||
| log"(TID ${MDC(TID, tid)}) can not write to output file: " + | ||
| log"${MDC(ERROR, ef.description)}; not retrying") | ||
| logError( | ||
| log"Task ${MDC(TASK_INDEX, info.index)}.${MDC(TASK_ATTEMPT_ID, info.attemptNumber)} " + | ||
| log"in stage ${MDC(STAGE_ID, taskSet.stageId)}." + | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one only shows once...I was thinking about not having a new method |
||
| log"${MDC(STAGE_ATTEMPT, taskSet.stageAttemptId)} (TID ${MDC(TASK_ID, tid)}) " + | ||
| log"can not write to output file: ${MDC(ERROR, ef.description)}; not retrying") | ||
| emptyTaskInfoAccumulablesAndNotifyDagScheduler(tid, tasks(index), reason, null, | ||
| accumUpdates, metricPeaks) | ||
| abort("Task %s in stage %s (TID %d) can not write to output file: %s".format( | ||
|
|
@@ -1057,8 +1059,8 @@ private[spark] class TaskSetManager( | |
| info.host, info.executorId, index, failureReasonString)) | ||
| numFailures(index) += 1 | ||
| if (numFailures(index) >= maxTaskFailures) { | ||
| logError(log"Task ${MDC(TASK_ID, index)} in stage ${MDC(STAGE_ID, taskSet.id)} failed " + | ||
| log"${MDC(MAX_ATTEMPTS, maxTaskFailures)} times; aborting job") | ||
| logError(log"Task ${MDC(TASK_INDEX, index)} in stage " + taskSet.logId + | ||
| log" failed ${MDC(MAX_ATTEMPTS, maxTaskFailures)} times; aborting job") | ||
| abort("Task %d in stage %s failed %d times, most recent failure: %s\nDriver stacktrace:" | ||
| .format(index, taskSet.id, maxTaskFailures, failureReasonString), failureException) | ||
| return | ||
|
|
@@ -1252,8 +1254,8 @@ private[spark] class TaskSetManager( | |
| if (speculated) { | ||
| addPendingTask(index, speculatable = true) | ||
| logInfo( | ||
| log"Marking task ${MDC(INDEX, index)} in stage ${MDC(STAGE_ID, taskSet.id)} (on " + | ||
| log"${MDC(HOST, info.host)}) as speculatable because it ran more than " + | ||
| log"Marking task ${MDC(TASK_INDEX, index)} in stage " + taskSet.logId + | ||
| log" (on ${MDC(HOST, info.host)}) as speculatable because it ran more than " + | ||
| log"${MDC(TIMEOUT, threshold)} ms(${MDC(NUM_TASKS, speculatableTasks.size + 1)}" + | ||
| log"speculatable tasks in this taskset now)") | ||
| speculatableTasks += index | ||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really want to call this
TASK_NAME?From the code below, it seems inconsistent:
spark/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
Line 839 in 33a9c5d
spark/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
Lines 583 to 587 in 33a9c5d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well, I can't find a better name here..