Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PAPI better error message when localization/delocalization fails #4718

Merged
merged 5 commits into from
Mar 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
version 1.0

task localize_file {
input {
File input_file
}
command {
cat "localizing file over 1 GB"
}
runtime {
docker: "ubuntu:latest"
disks: "local-disk 1 HDD"
}
output {
String out = read_string(stdout())
}
}

workflow localize_file_larger_than_disk_space {
File wf_input = "gs://cromwell_test_bucket/file_over_1_gb.txt"

call localize_file { input: input_file = wf_input }

output {
String content = localize_file.out
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: localize_file_larger_than_disk_space
testFormat: workflowfailure
backends: [Papiv2]
workflowType: WDL
workflowTypeVersion: 1.0
tags: ["wdl_1.0"]

files {
workflow: input_localization/localize_file_larger_than_disk_space.wdl
}

metadata {
workflowName: localize_file_larger_than_disk_space
status: Failed
"failures.0.message": "Workflow failed"
"failures.0.causedBy.0.message": "Task localize_file_larger_than_disk_space.localize_file:NA:1 failed. The job was stopped before the command finished. PAPI error code 9. Please check the log file for more details: gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/travis/localize_file_larger_than_disk_space/<<UUID>>/call-localize_file/localize_file.log."
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ metadata {
workflowName: requester_pays_localization
status: Failed
"failures.0.message": "Workflow failed"
"failures.0.causedBy.0.message": ~~"does not have serviceusage.services.use access"
"failures.0.causedBy.0.message": "Task requester_pays_localization.localize:NA:1 failed. The job was stopped before the command finished. PAPI error code 9. Please check the log file for more details: gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/travis/requester_pays_localization/<<UUID>>/call-localize/localize.log."
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ object PipelinesApiAsyncBackendJobExecutionActor {
val JesUnexpectedTermination = 13
val JesPreemption = 14

val PapiFailedPreConditionErrorCode = 9

// If the JES code is 2 (UNKNOWN), this sub-string indicates preemption:
val FailedToStartDueToPreemptionSubstring = "failed to start due to preemption"

Expand Down Expand Up @@ -584,6 +586,14 @@ class PipelinesApiAsyncBackendJobExecutionActor(override val standardParams: Sta
override def handleExecutionFailure(runStatus: RunStatus,
returnCode: Option[Int]): Future[ExecutionHandle] = {

def generateBetterErrorMsg(runStatus: RunStatus.UnsuccessfulRunStatus, errorMsg: String): String = {
if (runStatus.errorCode.getCode.value == PapiFailedPreConditionErrorCode
&& errorMsg.contains("Execution failed")
&& (errorMsg.contains("Localization") || errorMsg.contains("Delocalization"))) {
s"Please check the log file for more details: $jesLogPath."
} else errorMsg
}

// Inner function: Handles a 'Failed' runStatus (or Preempted if preemptible was false)
def handleFailedRunStatus(runStatus: RunStatus.UnsuccessfulRunStatus,
returnCode: Option[Int]): Future[ExecutionHandle] = {
Expand All @@ -592,19 +602,22 @@ class PipelinesApiAsyncBackendJobExecutionActor(override val standardParams: Sta
def isDockerPullFailure: Boolean = prettyError.contains("not found: does not exist or no pull access")

(runStatus.errorCode, runStatus.jesCode) match {
case (Status.NOT_FOUND, Some(JesFailedToDelocalize)) => Future.successful(FailedNonRetryableExecutionHandle(FailedToDelocalizeFailure(runStatus.prettyPrintedError, jobTag, Option(standardPaths.error))))
case (Status.ABORTED, Some(JesUnexpectedTermination)) => handleUnexpectedTermination(runStatus.errorCode, runStatus.prettyPrintedError, returnCode)
case (Status.NOT_FOUND, Some(JesFailedToDelocalize)) => Future.successful(FailedNonRetryableExecutionHandle(FailedToDelocalizeFailure(prettyError, jobTag, Option(standardPaths.error))))
case (Status.ABORTED, Some(JesUnexpectedTermination)) => handleUnexpectedTermination(runStatus.errorCode, prettyError, returnCode)
case _ if isDockerPullFailure =>
val unable = s"Unable to pull Docker image '$jobDockerImage' "
val details = if (hasDockerCredentials)
"but Docker credentials are present; is this Docker account authorized to pull the image? " else
"and there are effectively no Docker credentials present (one or more of token, authorization, or Google KMS key may be missing). " +
"Please check your private Docker configuration and/or the pull access for this image. "
val message = unable + details + runStatus.prettyPrintedError
val message = unable + details + prettyError
Future.successful(FailedNonRetryableExecutionHandle(StandardException(
runStatus.errorCode, message, jobTag, returnCode, standardPaths.error), returnCode))
case _ => Future.successful(FailedNonRetryableExecutionHandle(StandardException(
runStatus.errorCode, runStatus.prettyPrintedError, jobTag, returnCode, standardPaths.error), returnCode))
case _ => {
val finalPrettyPrintedError = generateBetterErrorMsg(runStatus, prettyError)
Future.successful(FailedNonRetryableExecutionHandle(StandardException(
runStatus.errorCode, finalPrettyPrintedError, jobTag, returnCode, standardPaths.error), returnCode))
}
}
}

Expand Down