diff --git a/changes.d/6330.fix.md b/changes.d/6330.fix.md new file mode 100644 index 0000000000..190a9637a9 --- /dev/null +++ b/changes.d/6330.fix.md @@ -0,0 +1 @@ +Fix bug where broadcasting failed to change platform selected after host selection failure. \ No newline at end of file diff --git a/cylc/flow/task_job_mgr.py b/cylc/flow/task_job_mgr.py index 185966ff12..9eec82970b 100644 --- a/cylc/flow/task_job_mgr.py +++ b/cylc/flow/task_job_mgr.py @@ -306,10 +306,12 @@ def submit_task_jobs(self, workflow, itasks, curve_auth, # Get another platform, if task config platform is a group use_next_platform_in_group = False - if itask.tdef.rtconfig['platform']: + bc_mgr = self.task_events_mgr.broadcast_mgr + rtconf = bc_mgr.get_updated_rtconfig(itask) + if rtconf['platform']: try: platform = get_platform( - itask.tdef.rtconfig['platform'], + rtconf['platform'], bad_hosts=self.bad_hosts ) except PlatformLookupError: diff --git a/tests/integration/test_task_job_mgr.py b/tests/integration/test_task_job_mgr.py index 9265f2198d..48a49eb30a 100644 --- a/tests/integration/test_task_job_mgr.py +++ b/tests/integration/test_task_job_mgr.py @@ -187,3 +187,52 @@ async def test__prep_submit_task_job_impl_handles_execution_time_limit( schd.task_job_mgr._prep_submit_task_job( schd.workflow, task_a) assert not task_a.summary.get('execution_time_limit', '') + + +async def test_broadcast_platform_change( + mock_glbl_cfg, + flow, + scheduler, + start, + log_filter, +): + """Broadcast can change task platform. + + Even after host selection failure. + + see https://github.com/cylc/cylc-flow/issues/6320 + """ + mock_glbl_cfg( + 'cylc.flow.platforms.glbl_cfg', + ''' + [platforms] + [[foo]] + hosts = food + ''') + + id_ = flow({ + "scheduling": {"graph": {"R1": "mytask"}}, + # Platform = None doesn't cause this issue! + "runtime": {"mytask": {"platform": "localhost"}}}) + + schd = scheduler(id_, run_mode='live') + + async with start(schd) as log: + # Change the task platform with broadcast: + schd.broadcast_mgr.put_broadcast( + ['1'], ['mytask'], [{'platform': 'foo'}]) + + # Simulate prior failure to contact hosts: + schd.task_job_mgr.task_remote_mgr.bad_hosts = {'food'} + + # Attempt job submission: + schd.task_job_mgr.submit_task_jobs( + schd.workflow, + schd.pool.get_tasks(), + schd.server.curve_auth, + schd.server.client_pub_key_dir) + + # Check that task platform hasn't become "localhost": + assert schd.pool.get_tasks()[0].platform['name'] == 'foo' + # ... and that remote init failed because all hosts bad: + assert log_filter(log, contains="(no hosts were reachable)")