-
Notifications
You must be signed in to change notification settings - Fork 94
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Batch spawn POC #5438
Draft
hjoliver
wants to merge
4
commits into
cylc:master
Choose a base branch
from
hjoliver:batch-spawn
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Batch spawn POC #5438
Changes from 1 commit
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,6 +133,8 @@ def __init__( | |
self.config.runtime['descendants'] | ||
) | ||
self.tasks_to_hold: Set[Tuple[str, 'PointBase']] = set() | ||
self.tasks_to_spawn = {} | ||
self.tasks_to_spawn_forced = {} | ||
|
||
def set_stop_task(self, task_id): | ||
"""Set stop after a task.""" | ||
|
@@ -1214,70 +1216,112 @@ def spawn_on_output(self, itask, output, forced=False): | |
and itask.identity not in self.expected_failed_tasks | ||
): | ||
self.abort_task_failed = True | ||
|
||
if not forced and output in [ | ||
TASK_OUTPUT_SUCCEEDED, | ||
TASK_OUTPUT_EXPIRED, | ||
TASK_OUTPUT_FAILED | ||
]: | ||
self.remove_if_complete(itask) | ||
|
||
try: | ||
children = itask.graph_children[output] | ||
if forced: | ||
self.tasks_to_spawn_forced[ | ||
(itask, output) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Possibly consider using a deque or list rather than dict unless there's a need to perform |
||
] = itask.graph_children[output] | ||
else: | ||
self.tasks_to_spawn[ | ||
(itask, output) | ||
] = itask.graph_children[output] | ||
except KeyError: | ||
# No children depend on this output | ||
children = [] | ||
pass | ||
else: | ||
self.spawn_children() | ||
|
||
suicide = [] | ||
for c_name, c_point, is_abs in children: | ||
if is_abs: | ||
self.abs_outputs_done.add( | ||
(str(itask.point), itask.tdef.name, output)) | ||
self.workflow_db_mgr.put_insert_abs_output( | ||
str(itask.point), itask.tdef.name, output) | ||
self.workflow_db_mgr.process_queued_ops() | ||
|
||
c_taskid = Tokens( | ||
cycle=str(c_point), | ||
task=c_name, | ||
).relative_id | ||
c_task = ( | ||
self._get_hidden_task_by_id(c_taskid) | ||
or self._get_main_task_by_id(c_taskid) | ||
) | ||
if c_task is not None and c_task != itask: | ||
# (Avoid self-suicide: A => !A) | ||
self.merge_flows(c_task, itask.flow_nums) | ||
elif ( | ||
c_task is None | ||
and (itask.flow_nums or forced) | ||
and not itask.flow_wait | ||
): | ||
# If child is not in the pool already, and parent belongs to a | ||
# flow (so it can spawn children), and parent is not waiting | ||
# for an upcoming flow merge before spawning ... then spawn it. | ||
c_task = self.spawn_task(c_name, c_point, itask.flow_nums) | ||
def spawn_children(self): | ||
self._spawn_children(self.tasks_to_spawn) | ||
self._spawn_children(self.tasks_to_spawn_forced, forced=True) | ||
|
||
if c_task is not None: | ||
# Have child task, update its prerequisites. | ||
def _spawn_children(self, children, forced=False): | ||
suicide = [] | ||
LIMIT = 10 | ||
COUNT = 0 | ||
keys_done = [] | ||
for key, value in children.items(): | ||
keys_done.append(key) | ||
itask, output = key | ||
for C_INNER, (c_name, c_point, is_abs) in enumerate(value): | ||
del value[C_INNER] | ||
C_INNER += 1 | ||
COUNT += 1 | ||
if COUNT > LIMIT: | ||
break | ||
if is_abs: | ||
tasks, *_ = self.filter_task_proxies( | ||
[f'*/{c_name}'], | ||
warn=False, | ||
) | ||
if c_task not in tasks: | ||
tasks.append(c_task) | ||
else: | ||
tasks = [c_task] | ||
for t in tasks: | ||
t.state.satisfy_me({ | ||
(str(itask.point), itask.tdef.name, output) | ||
}) | ||
self.data_store_mgr.delta_task_prerequisite(t) | ||
# Add it to the hidden pool or move it to the main pool. | ||
self.add_to_pool(t) | ||
self.abs_outputs_done.add( | ||
(str(itask.point), itask.tdef.name, output)) | ||
self.workflow_db_mgr.put_insert_abs_output( | ||
str(itask.point), itask.tdef.name, output) | ||
self.workflow_db_mgr.process_queued_ops() | ||
|
||
if t.point <= self.runahead_limit_point: | ||
self.rh_release_and_queue(t) | ||
c_taskid = Tokens( | ||
cycle=str(c_point), | ||
task=c_name, | ||
).relative_id | ||
c_task = ( | ||
self._get_hidden_task_by_id(c_taskid) | ||
or self._get_main_task_by_id(c_taskid) | ||
) | ||
if c_task is not None and c_task != itask: | ||
# (Avoid self-suicide: A => !A) | ||
self.merge_flows(c_task, itask.flow_nums) | ||
elif ( | ||
c_task is None | ||
and (itask.flow_nums or forced) | ||
and not itask.flow_wait | ||
): | ||
# If child is not in the pool already, and parent belongs | ||
# to a flow (so it can spawn children), and parent is not | ||
# waiting for an upcoming flow merge before spawning ... | ||
# then spawn it. | ||
c_task = self.spawn_task(c_name, c_point, itask.flow_nums) | ||
|
||
# Event-driven suicide. | ||
if ( | ||
t.state.suicide_prerequisites and | ||
t.state.suicide_prerequisites_all_satisfied() | ||
): | ||
suicide.append(t) | ||
if c_task is not None: | ||
# Have child task, update its prerequisites. | ||
if is_abs: | ||
tasks, *_ = self.filter_task_proxies( | ||
[f'*/{c_name}'], | ||
warn=False, | ||
) | ||
if c_task not in tasks: | ||
tasks.append(c_task) | ||
else: | ||
tasks = [c_task] | ||
for t in tasks: | ||
t.state.satisfy_me({ | ||
(str(itask.point), itask.tdef.name, output) | ||
}) | ||
self.data_store_mgr.delta_task_prerequisite(t) | ||
# Add it to the hidden pool or move it to the main | ||
# pool. | ||
self.add_to_pool(t) | ||
|
||
if t.point <= self.runahead_limit_point: | ||
self.rh_release_and_queue(t) | ||
|
||
# Event-driven suicide. | ||
if ( | ||
t.state.suicide_prerequisites and | ||
t.state.suicide_prerequisites_all_satisfied() | ||
): | ||
suicide.append(t) | ||
|
||
if COUNT > LIMIT: | ||
break | ||
|
||
for key in keys_done: | ||
if not children[key]: | ||
del children[key] | ||
|
||
for c_task in suicide: | ||
msg = self.__class__.SUICIDE_MSG | ||
|
@@ -1289,13 +1333,6 @@ def spawn_on_output(self, itask, output, forced=False): | |
msg += " suiciding while active" | ||
self.remove(c_task, msg) | ||
|
||
if not forced and output in [ | ||
TASK_OUTPUT_SUCCEEDED, | ||
TASK_OUTPUT_EXPIRED, | ||
TASK_OUTPUT_FAILED | ||
]: | ||
self.remove_if_complete(itask) | ||
|
||
def remove_if_complete(self, itask): | ||
"""Remove finished itask if required outputs are complete. | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think a lot of the functional tests can be sensitive to the order of main loop events. Maybe try to relocate this where spawning happened before, near process_queued_task_messages or whatever.