Skip to content

Commit

Permalink
feat: enhance job memory request calculations (#59)
Browse files Browse the repository at this point in the history
Implement handling for memory requests 
per GPU and log errors for invalid memory requests.
  • Loading branch information
conradtchan authored Nov 8, 2024
1 parent 06072a9 commit a32c795
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions backend/backend_ozstar.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,19 +872,33 @@ def job_mem_max(self, job_id):
def job_mem_request(self, job_id):
job = self.pyslurm_job[self.id_map[job_id]]

# Check if the job has a minimum memory per CPU specified
if job["min_memory_cpu"] is not None:
# If the job specifies tasks per node and CPUs per task
if job["ntasks_per_node"] > 0 and job["cpus_per_task"] > 0:
# Calculate the total memory request based on tasks per node and CPUs per task
return (
job["min_memory_cpu"]
* job["ntasks_per_node"]
* job["cpus_per_task"]
)
# If the job specifies the number of CPUs and nodes
elif job["num_cpus"] > 0 and job["num_nodes"] > 0:
# Calculate the total memory request based on the number of CPUs and nodes
return job["min_memory_cpu"] * job["num_cpus"] / job["num_nodes"]
else:
# Log an error if the job does not have a valid memory request
self.log.error(f"Job {job_id} has no valid memory request")
return 0

# Check if the job is requesting memory per gpu (e.g. --mem-per-gpu=160GB)
if job["mem_per_tres"] is not None:
# Example value: 'gres/gpu:163840'
mem_per_gpu = int(job["mem_per_tres"].split(":")[1])
return mem_per_gpu * self.job_ngpus(job_id)

else:
# If no minimum memory per CPU is specified, return the minimum memory per node
return job["min_memory_node"]

def job_lustre(self, job_id):
Expand Down

0 comments on commit a32c795

Please sign in to comment.