Skip to content

Commit

Permalink
Issue #3320 Slurm autoscaler effective job start date parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
tcibinan committed Sep 22, 2023
1 parent a21e6c3 commit 5d39556
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
1 change: 1 addition & 0 deletions workflows/pipe-common/pipeline/hpc/autoscaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ def scale_down(self, child_host):
Logger.info('Disabling additional worker %s...' % child_host)
self.grid_engine.disable_host(child_host)
jobs = self.grid_engine.get_jobs()
# todo: Ignore canceled/completed job here and etc.
disabled_host_jobs = [job for job in jobs if child_host in job.hosts]
if disabled_host_jobs:
Logger.warn('Disabled additional worker %s has %s associated jobs. Scaling down is interrupted.'
Expand Down
7 changes: 6 additions & 1 deletion workflows/pipe-common/pipeline/hpc/engine/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,18 @@ def _parse_jobs(self, scontrol_jobs_output):
# this job to `num_node` nodes and provide portion of resources
# TODO maybe there is another way to achieve that?
for node_idx in range(num_node):
job_state = GridEngineJobState.from_letter_code(job_dict.get("JobState"))
if job_state == GridEngineJobState.PENDING:
# In certain cases pending job's start date can be estimated start date.
# It confuses autoscaler and therefore should be ignored.
job_dict["StartTime"] = "Unknown"
jobs.append(
GridEngineJob(
id=job_dict.get("JobId") + "_" + str(node_idx),
root_id=job_dict.get("JobId"),
name=job_dict.get("JobName"),
user=self._parse_user(job_dict.get("UserId")),
state=GridEngineJobState.from_letter_code(job_dict.get("JobState")),
state=job_state,
datetime=self._parse_date(
job_dict.get("StartTime") if job_dict.get("StartTime") != "Unknown" else job_dict.get("SubmitTime")),
hosts=self._parse_nodelist(job_dict.get("NodeList")),
Expand Down

0 comments on commit 5d39556

Please sign in to comment.