Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix unexecuted CWL jobs with dynamic requirements failing on missing inputs #5136

Merged
merged 7 commits into from
Nov 1, 2024
11 changes: 8 additions & 3 deletions src/toil/cwl/cwltoil.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ class Conditional:
"""
Object holding conditional expression until we are ready to evaluate it.

Evaluation occurs at the moment the encloses step is ready to run.
Evaluation occurs before the enclosing step's inputs are type-checked.
"""

def __init__(
Expand Down Expand Up @@ -2447,23 +2447,28 @@ def __init__(
self.cwltool = tool
self.cwljob = cwljob
self.runtime_context = runtime_context
self.conditional = conditional
self.conditional = conditional or Conditional()
self.parent_name = parent_name

def run(self, file_store: AbstractFileStore) -> Any:
"""Create a child job with the correct resource requirements set."""
cwljob = resolve_dict_w_promises(self.cwljob, file_store)

# Check confitional to license full evaluation of job inputs.
if self.conditional.is_false(cwljob):
return self.conditional.skipped_outputs()

fill_in_defaults(
self.cwltool.tool["inputs"],
cwljob,
self.runtime_context.make_fs_access(self.runtime_context.basedir or ""),
)
# Don't forward the conditional. We checked it already.
realjob = CWLJob(
tool=self.cwltool,
cwljob=cwljob,
runtime_context=self.runtime_context,
parent_name=self.parent_name,
conditional=self.conditional,
)
self.addChild(realjob)
return realjob.rv()
Expand Down
8 changes: 8 additions & 0 deletions src/toil/test/cwl/cwlTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,14 @@ def test_glob_dir_bypass_file_store(self) -> None:
except FileNotFoundError:
pass

def test_required_input_condition_protection(self) -> None:
# This doesn't run containerized
self._tester(
"src/toil/test/cwl/not_run_required_input.cwl",
"src/toil/test/cwl/empty.json",
{},
)

@needs_slurm
def test_slurm_node_memory(self) -> None:
pass
Expand Down
29 changes: 29 additions & 0 deletions src/toil/test/cwl/not_run_required_input.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# This workflow fills in a required int from an optional int, but only when the
# int is really present. But it also uses the value to compute the conditional
# task's resource requirements, so Toil can't just schedule the task and then
# check the condition.
# See <https://github.com/DataBiosphere/toil/issues/4930#issue-2297563321>
cwlVersion: v1.2
class: Workflow
requirements:
InlineJavascriptRequirement: {}
inputs:
optional_input: int?
steps:
the_step:
in:
required_input:
source: optional_input
when: $(inputs.required_input != null)
run:
cwlVersion: v1.2
class: CommandLineTool
inputs:
required_input: int
requirements:
ResourceRequirement:
coresMax: $(inputs.required_input)
baseCommand: "nproc"
outputs: []
out: []
outputs: []